miteru 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miteru/cache.rb +4 -12
- data/lib/miteru/concerns/url_truncatable.rb +24 -0
- data/lib/miteru/crawler.rb +12 -31
- data/lib/miteru/downloader.rb +1 -1
- data/lib/miteru/kit.rb +2 -11
- data/lib/miteru/notifiers/base.rb +1 -1
- data/lib/miteru/notifiers/slack.rb +4 -4
- data/lib/miteru/notifiers/urlscan.rb +3 -3
- data/lib/miteru/orchestrator.rb +35 -39
- data/lib/miteru/service.rb +44 -0
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/website.rb +2 -11
- data/lib/miteru.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3767b21d7f569a43215e2fdc0f83f9fc4f29fe2a4bb11fc3548a5d1657812774
|
4
|
+
data.tar.gz: 7877fb357548c619210ec63f3bd26c61c3fda68dc6d21910aa22fc0d7144ebf1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '02480c9a5604e5c7959ae4adb9e3a657d4b59f5c668be51b7d1fe120bd46e5548ecc9a26404d88d001729bf19f4f3d1b1545be80820c0717c0e027b7995030cd'
|
7
|
+
data.tar.gz: f2c1adc686393f3979ccf77e87c1bc5dba9cad9023903620ab61bbf6e6a2a4406dbbdd3ffb9ca73f5297c2135436dfd995f9572085fd9fde02fb5b72eac6032b
|
data/lib/miteru/cache.rb
CHANGED
@@ -19,8 +19,8 @@ module Miteru
|
|
19
19
|
# @param [Integer. nil] ex
|
20
20
|
#
|
21
21
|
def set(key, value, ex:)
|
22
|
-
value = redis.set("#{
|
23
|
-
|
22
|
+
value = redis.set("#{cache_prefix}:#{key}", value, ex:)
|
23
|
+
logger.info("Cache:#{key} is set.") if verbose?
|
24
24
|
value
|
25
25
|
end
|
26
26
|
|
@@ -28,21 +28,13 @@ module Miteru
|
|
28
28
|
# @param [String] key
|
29
29
|
#
|
30
30
|
def cached?(key)
|
31
|
-
value = redis.exists?("#{
|
32
|
-
|
31
|
+
value = redis.exists?("#{cache_prefix}:#{key}")
|
32
|
+
logger.info("Cache:#{key} found.") if verbose?
|
33
33
|
value
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
37
|
|
38
|
-
def verbose?
|
39
|
-
Miteru.config.verbose
|
40
|
-
end
|
41
|
-
|
42
|
-
def prefix
|
43
|
-
Miteru.config.cache_prefix
|
44
|
-
end
|
45
|
-
|
46
38
|
#
|
47
39
|
# @return [Redis]
|
48
40
|
#
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Miteru
|
4
|
+
module Concerns
|
5
|
+
module UrlTruncatable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
def decoded_url
|
9
|
+
@decoded_url ||= URI.decode_www_form_component(url)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# @return [String]
|
14
|
+
#
|
15
|
+
def truncated_url
|
16
|
+
@truncated_url ||= decoded_url.truncate(64)
|
17
|
+
end
|
18
|
+
|
19
|
+
def defanged_truncated_url
|
20
|
+
@defanged_truncated_url ||= truncated_url.to_s.gsub(".", "[.]")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/miteru/crawler.rb
CHANGED
@@ -11,23 +11,21 @@ module Miteru
|
|
11
11
|
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
|
12
12
|
info = "Website:#{website.info}."
|
13
13
|
info = info.colorize(:red) if website.kits?
|
14
|
-
|
14
|
+
logger.info(info)
|
15
15
|
|
16
16
|
website.kits.each do |kit|
|
17
17
|
downloader = Downloader.new(kit)
|
18
18
|
result = downloader.result
|
19
|
-
|
20
19
|
unless result.success?
|
21
|
-
|
20
|
+
logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
|
22
21
|
next
|
23
22
|
end
|
24
|
-
|
25
23
|
destination = result.value!
|
26
|
-
|
24
|
+
logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
|
27
25
|
# Remove downloaded file if auto_download is not allowed
|
28
26
|
FileUtils.rm(destination, force: true) unless auto_download?
|
29
|
-
# Notify the
|
30
|
-
notify
|
27
|
+
# Notify the kit
|
28
|
+
notify(kit)
|
31
29
|
end
|
32
30
|
|
33
31
|
# Cache the website
|
@@ -37,39 +35,22 @@ module Miteru
|
|
37
35
|
|
38
36
|
private
|
39
37
|
|
40
|
-
def cache?
|
41
|
-
Miteru.cache?
|
42
|
-
end
|
43
|
-
|
44
|
-
def cache
|
45
|
-
Miteru.cache
|
46
|
-
end
|
47
|
-
|
48
|
-
def cache_ex
|
49
|
-
Miteru.config.cache_ex
|
50
|
-
end
|
51
|
-
|
52
|
-
def auto_download?
|
53
|
-
Miteru.config.auto_download
|
54
|
-
end
|
55
|
-
|
56
38
|
#
|
57
|
-
# @param [Miteru::
|
39
|
+
# @param [Miteru::Kit] kit
|
58
40
|
#
|
59
|
-
def notify(
|
41
|
+
def notify(kit)
|
60
42
|
notifiers.each do |notifier|
|
61
|
-
result = notifier.result(
|
43
|
+
result = notifier.result(kit)
|
62
44
|
if result.success?
|
63
|
-
|
45
|
+
logger.info("Notifier:#{notifier.name} succeeded.")
|
64
46
|
else
|
65
|
-
|
47
|
+
logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
|
66
48
|
end
|
67
49
|
end
|
68
50
|
end
|
69
51
|
|
70
|
-
|
71
|
-
|
72
|
-
#
|
52
|
+
private
|
53
|
+
|
73
54
|
def notifiers
|
74
55
|
@notifiers ||= Miteru.notifiers.map(&:new)
|
75
56
|
end
|
data/lib/miteru/downloader.rb
CHANGED
data/lib/miteru/kit.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
module Miteru
|
4
4
|
class Kit < Service
|
5
|
+
include Concerns::UrlTruncatable
|
6
|
+
|
5
7
|
# @return [String]
|
6
8
|
attr_reader :url
|
7
9
|
|
@@ -85,17 +87,6 @@ module Miteru
|
|
85
87
|
@hostname ||= URI(url).hostname
|
86
88
|
end
|
87
89
|
|
88
|
-
def decoded_url
|
89
|
-
@decoded_url ||= URI.decode_www_form_component(url)
|
90
|
-
end
|
91
|
-
|
92
|
-
#
|
93
|
-
# @return [String]
|
94
|
-
#
|
95
|
-
def truncated_url
|
96
|
-
url.truncate(64)
|
97
|
-
end
|
98
|
-
|
99
90
|
private
|
100
91
|
|
101
92
|
def filename_to_download
|
@@ -77,13 +77,13 @@ module Miteru
|
|
77
77
|
#
|
78
78
|
# Notifiy to Slack
|
79
79
|
#
|
80
|
-
# @param [Miteru::
|
80
|
+
# @param [Miteru::Kit] kit
|
81
81
|
#
|
82
|
-
def call(
|
82
|
+
def call(kit)
|
83
83
|
return unless callable?
|
84
84
|
|
85
|
-
attachment = SlackAttachment.new(
|
86
|
-
notifier.post(text:
|
85
|
+
attachment = SlackAttachment.new(kit.url)
|
86
|
+
notifier.post(text: kit.defanged_truncated_url, attachments: attachment.to_a)
|
87
87
|
end
|
88
88
|
|
89
89
|
def callable?
|
@@ -4,12 +4,12 @@ module Miteru
|
|
4
4
|
module Notifiers
|
5
5
|
class UrlScan < Base
|
6
6
|
#
|
7
|
-
# @param [Miteru::
|
7
|
+
# @param [Miteru::Kit] kit
|
8
8
|
#
|
9
|
-
def call(
|
9
|
+
def call(kit)
|
10
10
|
return unless callable?
|
11
11
|
|
12
|
-
|
12
|
+
submit(kit.decoded_url, source: kit.source)
|
13
13
|
end
|
14
14
|
|
15
15
|
def callable?
|
data/lib/miteru/orchestrator.rb
CHANGED
@@ -3,24 +3,31 @@
|
|
3
3
|
module Miteru
|
4
4
|
class Orchestrator < Service
|
5
5
|
def call
|
6
|
-
|
6
|
+
logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
|
7
7
|
|
8
|
-
if
|
9
|
-
|
10
|
-
Jobs::CrawleJob.perform_async(website.url, website.source)
|
11
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
12
|
-
end
|
8
|
+
if sidekiq?
|
9
|
+
sidekiq_call
|
13
10
|
else
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
parallel_call
|
12
|
+
end
|
13
|
+
end
|
17
14
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
15
|
+
def sidekiq_call
|
16
|
+
non_cached_websites.each do |website|
|
17
|
+
Jobs::CrawleJob.perform_async(website.url, website.source)
|
18
|
+
logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parallel_call
|
23
|
+
logger.info("Use #{threads} thread(s).") if verbose?
|
24
|
+
Parallel.each(non_cached_websites, in_threads: threads) do |website|
|
25
|
+
logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
|
26
|
+
result = Crawler.result(website)
|
27
|
+
if result.success?
|
28
|
+
logger.info("Crawler:#{website.truncated_url} succeeded.")
|
29
|
+
else
|
30
|
+
logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -34,44 +41,33 @@ module Miteru
|
|
34
41
|
result = feed.result
|
35
42
|
if result.success?
|
36
43
|
websites = result.value!
|
37
|
-
|
44
|
+
logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
|
38
45
|
out << websites
|
39
46
|
else
|
40
|
-
|
47
|
+
logger.warn("Feed:#{feed.source} failed - #{result.failure}")
|
41
48
|
end
|
42
49
|
end
|
43
50
|
end.flatten.uniq(&:url)
|
44
51
|
end
|
45
52
|
|
53
|
+
#
|
54
|
+
# @return [Array<Miteru::Website>]
|
55
|
+
#
|
46
56
|
def non_cached_websites
|
47
|
-
|
48
|
-
|
49
|
-
|
57
|
+
@non_cached_websites ||= [].tap do |out|
|
58
|
+
out << if cache?
|
59
|
+
websites.reject { |website| cache.cached?(website.url) }
|
60
|
+
else
|
61
|
+
websites
|
62
|
+
end
|
63
|
+
end.flatten.uniq(&:url)
|
50
64
|
end
|
51
65
|
|
52
66
|
#
|
53
67
|
# @return [Array<Miteru::Feeds::Base>]
|
54
68
|
#
|
55
69
|
def feeds
|
56
|
-
Miteru.feeds.map(&:new)
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
def cache?
|
62
|
-
Miteru.cache?
|
63
|
-
end
|
64
|
-
|
65
|
-
def cache
|
66
|
-
Miteru.cache
|
67
|
-
end
|
68
|
-
|
69
|
-
def threads
|
70
|
-
Miteru.config.threads
|
71
|
-
end
|
72
|
-
|
73
|
-
def verbose?
|
74
|
-
Miteru.config.verbose
|
70
|
+
@feeds ||= Miteru.feeds.map(&:new)
|
75
71
|
end
|
76
72
|
end
|
77
73
|
end
|
data/lib/miteru/service.rb
CHANGED
@@ -24,5 +24,49 @@ module Miteru
|
|
24
24
|
new.result(...)
|
25
25
|
end
|
26
26
|
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def auto_download?
|
31
|
+
Miteru.config.auto_download
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# @return [SemanticLogger]
|
36
|
+
#
|
37
|
+
def logger
|
38
|
+
Miteru.logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def cache?
|
42
|
+
Miteru.cache?
|
43
|
+
end
|
44
|
+
|
45
|
+
def sidekiq?
|
46
|
+
Miteru.sidekiq?
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# @return [Miteru::Cache]
|
51
|
+
#
|
52
|
+
def cache
|
53
|
+
Miteru.cache
|
54
|
+
end
|
55
|
+
|
56
|
+
def threads
|
57
|
+
Miteru.config.threads
|
58
|
+
end
|
59
|
+
|
60
|
+
def verbose?
|
61
|
+
Miteru.config.verbose
|
62
|
+
end
|
63
|
+
|
64
|
+
def cache_prefix
|
65
|
+
Miteru.config.cache_prefix
|
66
|
+
end
|
67
|
+
|
68
|
+
def cache_ex
|
69
|
+
Miteru.config.cache_ex
|
70
|
+
end
|
27
71
|
end
|
28
72
|
end
|
data/lib/miteru/version.rb
CHANGED
data/lib/miteru/website.rb
CHANGED
@@ -4,6 +4,8 @@ require "oga"
|
|
4
4
|
|
5
5
|
module Miteru
|
6
6
|
class Website < Service
|
7
|
+
include Concerns::UrlTruncatable
|
8
|
+
|
7
9
|
# @return [String]
|
8
10
|
attr_reader :url
|
9
11
|
|
@@ -41,17 +43,6 @@ module Miteru
|
|
41
43
|
(href_links + possible_file_links).compact.uniq
|
42
44
|
end
|
43
45
|
|
44
|
-
#
|
45
|
-
# @return [String]
|
46
|
-
#
|
47
|
-
def truncated_url
|
48
|
-
url.truncate(64)
|
49
|
-
end
|
50
|
-
|
51
|
-
def defanged_truncated_url
|
52
|
-
truncated_url.to_s.gsub(".", "[.]")
|
53
|
-
end
|
54
|
-
|
55
46
|
def info
|
56
47
|
"#{defanged_truncated_url} has #{kits.length} kit(s) (Source: #{source})"
|
57
48
|
end
|
data/lib/miteru.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miteru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -590,6 +590,7 @@ files:
|
|
590
590
|
- lib/miteru/commands/web.rb
|
591
591
|
- lib/miteru/concerns/database_connectable.rb
|
592
592
|
- lib/miteru/concerns/error_unwrappable.rb
|
593
|
+
- lib/miteru/concerns/url_truncatable.rb
|
593
594
|
- lib/miteru/config.rb
|
594
595
|
- lib/miteru/crawler.rb
|
595
596
|
- lib/miteru/database.rb
|