miteru 2.2.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miteru/cache.rb +4 -12
- data/lib/miteru/concerns/url_truncatable.rb +24 -0
- data/lib/miteru/crawler.rb +12 -31
- data/lib/miteru/downloader.rb +1 -1
- data/lib/miteru/kit.rb +2 -11
- data/lib/miteru/notifiers/base.rb +1 -1
- data/lib/miteru/notifiers/slack.rb +5 -5
- data/lib/miteru/notifiers/urlscan.rb +3 -3
- data/lib/miteru/orchestrator.rb +35 -39
- data/lib/miteru/service.rb +44 -0
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/website.rb +2 -11
- data/lib/miteru.rb +1 -0
- data/miteru.gemspec +4 -4
- metadata +11 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 62cde56f6eff0777959ebcadb72619505b63d7fc88cde3a362effd9a707f364d
|
4
|
+
data.tar.gz: e3e0011791ab52b700f2fa824ee37285d1306426ec0cc54f5ca48f0e5bde8489
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebbf6663d3b6b07486c19f5dd56b2119e33ccafebe6f57ea5cf4629fa74a7ece9309ff6e8e900881d4bfe30d9f12e3d9cd5315f79ddec6b2f8e221ad824222dd
|
7
|
+
data.tar.gz: 63abffb3e35ab8f631188d2c116ab43c21535fbab9fb4c24d7cf304317f383e0cbc48dca01c17c600f67eeafa1a23cb93915f520eed92ff1997141b16e492be9
|
data/lib/miteru/cache.rb
CHANGED
@@ -19,8 +19,8 @@ module Miteru
|
|
19
19
|
# @param [Integer. nil] ex
|
20
20
|
#
|
21
21
|
def set(key, value, ex:)
|
22
|
-
value = redis.set("#{
|
23
|
-
|
22
|
+
value = redis.set("#{cache_prefix}:#{key}", value, ex:)
|
23
|
+
logger.info("Cache:#{key} is set.") if verbose?
|
24
24
|
value
|
25
25
|
end
|
26
26
|
|
@@ -28,21 +28,13 @@ module Miteru
|
|
28
28
|
# @param [String] key
|
29
29
|
#
|
30
30
|
def cached?(key)
|
31
|
-
value = redis.exists?("#{
|
32
|
-
|
31
|
+
value = redis.exists?("#{cache_prefix}:#{key}")
|
32
|
+
logger.info("Cache:#{key} found.") if verbose?
|
33
33
|
value
|
34
34
|
end
|
35
35
|
|
36
36
|
private
|
37
37
|
|
38
|
-
def verbose?
|
39
|
-
Miteru.config.verbose
|
40
|
-
end
|
41
|
-
|
42
|
-
def prefix
|
43
|
-
Miteru.config.cache_prefix
|
44
|
-
end
|
45
|
-
|
46
38
|
#
|
47
39
|
# @return [Redis]
|
48
40
|
#
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Miteru
|
4
|
+
module Concerns
|
5
|
+
module UrlTruncatable
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
def decoded_url
|
9
|
+
@decoded_url ||= URI.decode_www_form_component(url)
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# @return [String]
|
14
|
+
#
|
15
|
+
def truncated_url
|
16
|
+
@truncated_url ||= decoded_url.truncate(64)
|
17
|
+
end
|
18
|
+
|
19
|
+
def defanged_truncated_url
|
20
|
+
@defanged_truncated_url ||= truncated_url.to_s.gsub(".", "[.]")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/miteru/crawler.rb
CHANGED
@@ -11,23 +11,21 @@ module Miteru
|
|
11
11
|
Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
|
12
12
|
info = "Website:#{website.info}."
|
13
13
|
info = info.colorize(:red) if website.kits?
|
14
|
-
|
14
|
+
logger.info(info)
|
15
15
|
|
16
16
|
website.kits.each do |kit|
|
17
17
|
downloader = Downloader.new(kit)
|
18
18
|
result = downloader.result
|
19
|
-
|
20
19
|
unless result.success?
|
21
|
-
|
20
|
+
logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
|
22
21
|
next
|
23
22
|
end
|
24
|
-
|
25
23
|
destination = result.value!
|
26
|
-
|
24
|
+
logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
|
27
25
|
# Remove downloaded file if auto_download is not allowed
|
28
26
|
FileUtils.rm(destination, force: true) unless auto_download?
|
29
|
-
# Notify the
|
30
|
-
notify
|
27
|
+
# Notify the kit
|
28
|
+
notify(kit)
|
31
29
|
end
|
32
30
|
|
33
31
|
# Cache the website
|
@@ -37,39 +35,22 @@ module Miteru
|
|
37
35
|
|
38
36
|
private
|
39
37
|
|
40
|
-
def cache?
|
41
|
-
Miteru.cache?
|
42
|
-
end
|
43
|
-
|
44
|
-
def cache
|
45
|
-
Miteru.cache
|
46
|
-
end
|
47
|
-
|
48
|
-
def cache_ex
|
49
|
-
Miteru.config.cache_ex
|
50
|
-
end
|
51
|
-
|
52
|
-
def auto_download?
|
53
|
-
Miteru.config.auto_download
|
54
|
-
end
|
55
|
-
|
56
38
|
#
|
57
|
-
# @param [Miteru::
|
39
|
+
# @param [Miteru::Kit] kit
|
58
40
|
#
|
59
|
-
def notify(
|
41
|
+
def notify(kit)
|
60
42
|
notifiers.each do |notifier|
|
61
|
-
result = notifier.result(
|
43
|
+
result = notifier.result(kit)
|
62
44
|
if result.success?
|
63
|
-
|
45
|
+
logger.info("Notifier:#{notifier.name} succeeded.")
|
64
46
|
else
|
65
|
-
|
47
|
+
logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
|
66
48
|
end
|
67
49
|
end
|
68
50
|
end
|
69
51
|
|
70
|
-
|
71
|
-
|
72
|
-
#
|
52
|
+
private
|
53
|
+
|
73
54
|
def notifiers
|
74
55
|
@notifiers ||= Miteru.notifiers.map(&:new)
|
75
56
|
end
|
data/lib/miteru/downloader.rb
CHANGED
data/lib/miteru/kit.rb
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
module Miteru
|
4
4
|
class Kit < Service
|
5
|
+
include Concerns::UrlTruncatable
|
6
|
+
|
5
7
|
# @return [String]
|
6
8
|
attr_reader :url
|
7
9
|
|
@@ -85,17 +87,6 @@ module Miteru
|
|
85
87
|
@hostname ||= URI(url).hostname
|
86
88
|
end
|
87
89
|
|
88
|
-
def decoded_url
|
89
|
-
@decoded_url ||= URI.decode_www_form_component(url)
|
90
|
-
end
|
91
|
-
|
92
|
-
#
|
93
|
-
# @return [String]
|
94
|
-
#
|
95
|
-
def truncated_url
|
96
|
-
url.truncate(64)
|
97
|
-
end
|
98
|
-
|
99
90
|
private
|
100
91
|
|
101
92
|
def filename_to_download
|
@@ -75,15 +75,15 @@ module Miteru
|
|
75
75
|
|
76
76
|
class Slack < Base
|
77
77
|
#
|
78
|
-
#
|
78
|
+
# Notify to Slack
|
79
79
|
#
|
80
|
-
# @param [Miteru::
|
80
|
+
# @param [Miteru::Kit] kit
|
81
81
|
#
|
82
|
-
def call(
|
82
|
+
def call(kit)
|
83
83
|
return unless callable?
|
84
84
|
|
85
|
-
attachment = SlackAttachment.new(
|
86
|
-
notifier.post(text:
|
85
|
+
attachment = SlackAttachment.new(kit.url)
|
86
|
+
notifier.post(text: kit.basename, attachments: attachment.to_a)
|
87
87
|
end
|
88
88
|
|
89
89
|
def callable?
|
@@ -4,12 +4,12 @@ module Miteru
|
|
4
4
|
module Notifiers
|
5
5
|
class UrlScan < Base
|
6
6
|
#
|
7
|
-
# @param [Miteru::
|
7
|
+
# @param [Miteru::Kit] kit
|
8
8
|
#
|
9
|
-
def call(
|
9
|
+
def call(kit)
|
10
10
|
return unless callable?
|
11
11
|
|
12
|
-
|
12
|
+
submit(kit.decoded_url, source: kit.source)
|
13
13
|
end
|
14
14
|
|
15
15
|
def callable?
|
data/lib/miteru/orchestrator.rb
CHANGED
@@ -3,24 +3,31 @@
|
|
3
3
|
module Miteru
|
4
4
|
class Orchestrator < Service
|
5
5
|
def call
|
6
|
-
|
6
|
+
logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
|
7
7
|
|
8
|
-
if
|
9
|
-
|
10
|
-
Jobs::CrawleJob.perform_async(website.url, website.source)
|
11
|
-
Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
12
|
-
end
|
8
|
+
if sidekiq?
|
9
|
+
sidekiq_call
|
13
10
|
else
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
parallel_call
|
12
|
+
end
|
13
|
+
end
|
17
14
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
15
|
+
def sidekiq_call
|
16
|
+
non_cached_websites.each do |website|
|
17
|
+
Jobs::CrawleJob.perform_async(website.url, website.source)
|
18
|
+
logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def parallel_call
|
23
|
+
logger.info("Use #{threads} thread(s).") if verbose?
|
24
|
+
Parallel.each(non_cached_websites, in_threads: threads) do |website|
|
25
|
+
logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
|
26
|
+
result = Crawler.result(website)
|
27
|
+
if result.success?
|
28
|
+
logger.info("Crawler:#{website.truncated_url} succeeded.")
|
29
|
+
else
|
30
|
+
logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -34,44 +41,33 @@ module Miteru
|
|
34
41
|
result = feed.result
|
35
42
|
if result.success?
|
36
43
|
websites = result.value!
|
37
|
-
|
44
|
+
logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
|
38
45
|
out << websites
|
39
46
|
else
|
40
|
-
|
47
|
+
logger.warn("Feed:#{feed.source} failed - #{result.failure}")
|
41
48
|
end
|
42
49
|
end
|
43
50
|
end.flatten.uniq(&:url)
|
44
51
|
end
|
45
52
|
|
53
|
+
#
|
54
|
+
# @return [Array<Miteru::Website>]
|
55
|
+
#
|
46
56
|
def non_cached_websites
|
47
|
-
|
48
|
-
|
49
|
-
|
57
|
+
@non_cached_websites ||= [].tap do |out|
|
58
|
+
out << if cache?
|
59
|
+
websites.reject { |website| cache.cached?(website.url) }
|
60
|
+
else
|
61
|
+
websites
|
62
|
+
end
|
63
|
+
end.flatten.uniq(&:url)
|
50
64
|
end
|
51
65
|
|
52
66
|
#
|
53
67
|
# @return [Array<Miteru::Feeds::Base>]
|
54
68
|
#
|
55
69
|
def feeds
|
56
|
-
Miteru.feeds.map(&:new)
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
def cache?
|
62
|
-
Miteru.cache?
|
63
|
-
end
|
64
|
-
|
65
|
-
def cache
|
66
|
-
Miteru.cache
|
67
|
-
end
|
68
|
-
|
69
|
-
def threads
|
70
|
-
Miteru.config.threads
|
71
|
-
end
|
72
|
-
|
73
|
-
def verbose?
|
74
|
-
Miteru.config.verbose
|
70
|
+
@feeds ||= Miteru.feeds.map(&:new)
|
75
71
|
end
|
76
72
|
end
|
77
73
|
end
|
data/lib/miteru/service.rb
CHANGED
@@ -24,5 +24,49 @@ module Miteru
|
|
24
24
|
new.result(...)
|
25
25
|
end
|
26
26
|
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def auto_download?
|
31
|
+
Miteru.config.auto_download
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
# @return [SemanticLogger]
|
36
|
+
#
|
37
|
+
def logger
|
38
|
+
Miteru.logger
|
39
|
+
end
|
40
|
+
|
41
|
+
def cache?
|
42
|
+
Miteru.cache?
|
43
|
+
end
|
44
|
+
|
45
|
+
def sidekiq?
|
46
|
+
Miteru.sidekiq?
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# @return [Miteru::Cache]
|
51
|
+
#
|
52
|
+
def cache
|
53
|
+
Miteru.cache
|
54
|
+
end
|
55
|
+
|
56
|
+
def threads
|
57
|
+
Miteru.config.threads
|
58
|
+
end
|
59
|
+
|
60
|
+
def verbose?
|
61
|
+
Miteru.config.verbose
|
62
|
+
end
|
63
|
+
|
64
|
+
def cache_prefix
|
65
|
+
Miteru.config.cache_prefix
|
66
|
+
end
|
67
|
+
|
68
|
+
def cache_ex
|
69
|
+
Miteru.config.cache_ex
|
70
|
+
end
|
27
71
|
end
|
28
72
|
end
|
data/lib/miteru/version.rb
CHANGED
data/lib/miteru/website.rb
CHANGED
@@ -4,6 +4,8 @@ require "oga"
|
|
4
4
|
|
5
5
|
module Miteru
|
6
6
|
class Website < Service
|
7
|
+
include Concerns::UrlTruncatable
|
8
|
+
|
7
9
|
# @return [String]
|
8
10
|
attr_reader :url
|
9
11
|
|
@@ -41,17 +43,6 @@ module Miteru
|
|
41
43
|
(href_links + possible_file_links).compact.uniq
|
42
44
|
end
|
43
45
|
|
44
|
-
#
|
45
|
-
# @return [String]
|
46
|
-
#
|
47
|
-
def truncated_url
|
48
|
-
url.truncate(64)
|
49
|
-
end
|
50
|
-
|
51
|
-
def defanged_truncated_url
|
52
|
-
truncated_url.to_s.gsub(".", "[.]")
|
53
|
-
end
|
54
|
-
|
55
46
|
def info
|
56
47
|
"#{defanged_truncated_url} has #{kits.length} kit(s) (Source: #{source})"
|
57
48
|
end
|
data/lib/miteru.rb
CHANGED
data/miteru.gemspec
CHANGED
@@ -34,16 +34,16 @@ Gem::Specification.new do |spec|
|
|
34
34
|
spec.add_development_dependency "rake", "~> 13.1"
|
35
35
|
spec.add_development_dependency "rspec", "~> 3.13"
|
36
36
|
spec.add_development_dependency "simplecov-lcov", "~> 0.8"
|
37
|
-
spec.add_development_dependency "standard", "~> 1.
|
37
|
+
spec.add_development_dependency "standard", "~> 1.34"
|
38
38
|
spec.add_development_dependency "test-prof", "~> 1.3"
|
39
39
|
spec.add_development_dependency "vcr", "~> 6.2"
|
40
|
-
spec.add_development_dependency "webmock", "~> 3.
|
40
|
+
spec.add_development_dependency "webmock", "~> 3.20"
|
41
41
|
|
42
42
|
spec.add_dependency "activerecord", "7.1.3"
|
43
43
|
spec.add_dependency "addressable", "2.8.6"
|
44
44
|
spec.add_dependency "anyway_config", "2.6.3"
|
45
45
|
spec.add_dependency "colorize", "1.1.0"
|
46
|
-
spec.add_dependency "dotenv", "
|
46
|
+
spec.add_dependency "dotenv", "3.0.0"
|
47
47
|
spec.add_dependency "down", "5.4.1"
|
48
48
|
spec.add_dependency "dry-files", "1.1.0"
|
49
49
|
spec.add_dependency "dry-monads", "1.6.0"
|
@@ -55,7 +55,7 @@ Gem::Specification.new do |spec|
|
|
55
55
|
spec.add_dependency "rack", "3.0.9"
|
56
56
|
spec.add_dependency "rack-session", "2.0.0"
|
57
57
|
spec.add_dependency "rackup", "2.1.0"
|
58
|
-
spec.add_dependency "redis", "5.0
|
58
|
+
spec.add_dependency "redis", "5.1.0"
|
59
59
|
spec.add_dependency "semantic_logger", "4.15.0"
|
60
60
|
spec.add_dependency "sentry-ruby", "5.16.1"
|
61
61
|
spec.add_dependency "sentry-sidekiq", "5.16.1"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miteru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manabu Niseki
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -142,14 +142,14 @@ dependencies:
|
|
142
142
|
requirements:
|
143
143
|
- - "~>"
|
144
144
|
- !ruby/object:Gem::Version
|
145
|
-
version: '1.
|
145
|
+
version: '1.34'
|
146
146
|
type: :development
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '1.
|
152
|
+
version: '1.34'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: test-prof
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,14 +184,14 @@ dependencies:
|
|
184
184
|
requirements:
|
185
185
|
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
|
-
version: '3.
|
187
|
+
version: '3.20'
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
192
|
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
|
-
version: '3.
|
194
|
+
version: '3.20'
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
196
|
name: activerecord
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -254,14 +254,14 @@ dependencies:
|
|
254
254
|
requirements:
|
255
255
|
- - '='
|
256
256
|
- !ruby/object:Gem::Version
|
257
|
-
version:
|
257
|
+
version: 3.0.0
|
258
258
|
type: :runtime
|
259
259
|
prerelease: false
|
260
260
|
version_requirements: !ruby/object:Gem::Requirement
|
261
261
|
requirements:
|
262
262
|
- - '='
|
263
263
|
- !ruby/object:Gem::Version
|
264
|
-
version:
|
264
|
+
version: 3.0.0
|
265
265
|
- !ruby/object:Gem::Dependency
|
266
266
|
name: down
|
267
267
|
requirement: !ruby/object:Gem::Requirement
|
@@ -422,14 +422,14 @@ dependencies:
|
|
422
422
|
requirements:
|
423
423
|
- - '='
|
424
424
|
- !ruby/object:Gem::Version
|
425
|
-
version: 5.0
|
425
|
+
version: 5.1.0
|
426
426
|
type: :runtime
|
427
427
|
prerelease: false
|
428
428
|
version_requirements: !ruby/object:Gem::Requirement
|
429
429
|
requirements:
|
430
430
|
- - '='
|
431
431
|
- !ruby/object:Gem::Version
|
432
|
-
version: 5.0
|
432
|
+
version: 5.1.0
|
433
433
|
- !ruby/object:Gem::Dependency
|
434
434
|
name: semantic_logger
|
435
435
|
requirement: !ruby/object:Gem::Requirement
|
@@ -590,6 +590,7 @@ files:
|
|
590
590
|
- lib/miteru/commands/web.rb
|
591
591
|
- lib/miteru/concerns/database_connectable.rb
|
592
592
|
- lib/miteru/concerns/error_unwrappable.rb
|
593
|
+
- lib/miteru/concerns/url_truncatable.rb
|
593
594
|
- lib/miteru/config.rb
|
594
595
|
- lib/miteru/crawler.rb
|
595
596
|
- lib/miteru/database.rb
|