miteru 1.2.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/gem.yml +36 -0
- data/.github/workflows/{test.yml → ruby.yml} +4 -13
- data/.gitignore +4 -1
- data/.rspec +1 -1
- data/README.md +7 -17
- data/docker-compose.yml +12 -0
- data/exe/miteru +3 -3
- data/lefthook.yml +9 -0
- data/lib/miteru/cli/application.rb +27 -0
- data/lib/miteru/cli/base.rb +16 -0
- data/lib/miteru/cli/database.rb +11 -0
- data/lib/miteru/commands/database.rb +23 -0
- data/lib/miteru/commands/main.rb +37 -0
- data/lib/miteru/commands/sidekiq.rb +35 -0
- data/lib/miteru/commands/web.rb +37 -0
- data/lib/miteru/concerns/database_connectable.rb +16 -0
- data/lib/miteru/concerns/error_unwrappable.rb +30 -0
- data/lib/miteru/config.rb +98 -0
- data/lib/miteru/crawler.rb +28 -44
- data/lib/miteru/database.rb +50 -38
- data/lib/miteru/downloader.rb +52 -41
- data/lib/miteru/errors.rb +37 -0
- data/lib/miteru/feeds/ayashige.rb +9 -20
- data/lib/miteru/feeds/base.rb +141 -0
- data/lib/miteru/feeds/phishing_database.rb +11 -10
- data/lib/miteru/feeds/urlscan.rb +47 -19
- data/lib/miteru/feeds/urlscan_pro.rb +20 -18
- data/lib/miteru/http.rb +51 -0
- data/lib/miteru/kit.rb +28 -20
- data/lib/miteru/mixin.rb +2 -29
- data/lib/miteru/notifiers/base.rb +10 -3
- data/lib/miteru/notifiers/slack.rb +85 -10
- data/lib/miteru/notifiers/urlscan.rb +29 -14
- data/lib/miteru/orchestrator.rb +51 -0
- data/lib/miteru/record.rb +8 -15
- data/lib/miteru/service.rb +28 -0
- data/lib/miteru/sidekiq/application.rb +13 -0
- data/lib/miteru/sidekiq/jobs.rb +21 -0
- data/lib/miteru/version.rb +1 -1
- data/lib/miteru/web/application.rb +42 -0
- data/lib/miteru/website.rb +48 -48
- data/lib/miteru.rb +130 -22
- data/miteru-sidekiq.service +13 -0
- data/miteru.db-shm +0 -0
- data/miteru.db-wal +0 -0
- data/miteru.gemspec +49 -38
- metadata +265 -97
- data/.overcommit.yml +0 -12
- data/.standard.yml +0 -4
- data/lib/miteru/attachement.rb +0 -74
- data/lib/miteru/cli.rb +0 -41
- data/lib/miteru/configuration.rb +0 -122
- data/lib/miteru/error.rb +0 -7
- data/lib/miteru/feeds/feed.rb +0 -53
- data/lib/miteru/feeds/phishstats.rb +0 -28
- data/lib/miteru/feeds.rb +0 -45
- data/lib/miteru/http_client.rb +0 -85
data/lib/miteru/kit.rb
CHANGED
@@ -1,14 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "cgi"
|
4
|
-
require "uuidtools"
|
5
|
-
require "uri"
|
6
|
-
|
7
3
|
module Miteru
|
8
|
-
class Kit
|
9
|
-
VALID_EXTENSIONS = Miteru.configuration.valid_extensions
|
10
|
-
VALID_MIME_TYPES = Miteru.configuration.valid_mime_types
|
11
|
-
|
4
|
+
class Kit < Service
|
12
5
|
# @return [String]
|
13
6
|
attr_reader :url
|
14
7
|
|
@@ -27,7 +20,11 @@ module Miteru
|
|
27
20
|
# @return [Hash, nil]
|
28
21
|
attr_reader :headers
|
29
22
|
|
30
|
-
|
23
|
+
#
|
24
|
+
# @param [String] url
|
25
|
+
# @param [String] source
|
26
|
+
#
|
27
|
+
def initialize(url, source:)
|
31
28
|
@url = url
|
32
29
|
@source = source
|
33
30
|
|
@@ -91,6 +88,13 @@ module Miteru
|
|
91
88
|
@decoded_url ||= URI.decode_www_form_component(url)
|
92
89
|
end
|
93
90
|
|
91
|
+
#
|
92
|
+
# @return [String]
|
93
|
+
#
|
94
|
+
def truncated_url
|
95
|
+
url.truncate(64)
|
96
|
+
end
|
97
|
+
|
94
98
|
private
|
95
99
|
|
96
100
|
def filename_to_download
|
@@ -98,21 +102,25 @@ module Miteru
|
|
98
102
|
end
|
99
103
|
|
100
104
|
def base_dir
|
101
|
-
@base_dir ||= Miteru.
|
105
|
+
@base_dir ||= Miteru.config.download_to
|
102
106
|
end
|
103
107
|
|
104
108
|
def valid_ext?
|
105
|
-
|
109
|
+
Miteru.config.file_extensions.include? extname
|
110
|
+
end
|
111
|
+
|
112
|
+
def http
|
113
|
+
HTTP::Factory.build
|
106
114
|
end
|
107
115
|
|
108
116
|
def before_validation
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
117
|
+
Try[StandardError] do
|
118
|
+
res = http.head(url)
|
119
|
+
@content_length = res.content_length
|
120
|
+
@mime_type = res.content_type.mime_type.to_s
|
121
|
+
@status = res.status
|
122
|
+
@headers = res.headers.to_h
|
123
|
+
end.recover { nil }.value!
|
116
124
|
end
|
117
125
|
|
118
126
|
def reachable?
|
@@ -120,11 +128,11 @@ module Miteru
|
|
120
128
|
end
|
121
129
|
|
122
130
|
def valid_mime_type?
|
123
|
-
|
131
|
+
Miteru.config.file_mime_types.include? mime_type
|
124
132
|
end
|
125
133
|
|
126
134
|
def valid_content_length?
|
127
|
-
content_length.to_i
|
135
|
+
content_length.to_i.positive?
|
128
136
|
end
|
129
137
|
end
|
130
138
|
end
|
data/lib/miteru/mixin.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Miteru
|
2
4
|
module Mixins
|
3
5
|
module URL
|
@@ -13,35 +15,6 @@ module Miteru
|
|
13
15
|
def invalid_extension?(url)
|
14
16
|
IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
|
15
17
|
end
|
16
|
-
|
17
|
-
#
|
18
|
-
# Breakdown a URL into URLs
|
19
|
-
#
|
20
|
-
# @param [String] url
|
21
|
-
# @param [Boolean] enable_directory_traveling
|
22
|
-
#
|
23
|
-
# @return [Array<String>]
|
24
|
-
#
|
25
|
-
def breakdown(url, enable_directory_traveling)
|
26
|
-
begin
|
27
|
-
uri = URI.parse(url)
|
28
|
-
rescue URI::InvalidURIError => _e
|
29
|
-
return []
|
30
|
-
end
|
31
|
-
|
32
|
-
base = "#{uri.scheme}://#{uri.hostname}"
|
33
|
-
return [base] unless enable_directory_traveling
|
34
|
-
|
35
|
-
segments = uri.path.split("/")
|
36
|
-
return [base] if segments.length.zero?
|
37
|
-
|
38
|
-
urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
|
39
|
-
|
40
|
-
urls.reject do |breakdowned_url|
|
41
|
-
# Reject a url which ends with specific extension names
|
42
|
-
invalid_extension? breakdowned_url
|
43
|
-
end
|
44
|
-
end
|
45
18
|
end
|
46
19
|
end
|
47
20
|
end
|
@@ -2,14 +2,21 @@
|
|
2
2
|
|
3
3
|
module Miteru
|
4
4
|
module Notifiers
|
5
|
-
class Base
|
6
|
-
def
|
5
|
+
class Base < Service
|
6
|
+
def call(website)
|
7
7
|
raise NotImplementedError
|
8
8
|
end
|
9
9
|
|
10
|
-
def
|
10
|
+
def callable?
|
11
11
|
raise NotImplementedError
|
12
12
|
end
|
13
|
+
|
14
|
+
class << self
|
15
|
+
def inherited(child)
|
16
|
+
super
|
17
|
+
Miteru.notifiers << child
|
18
|
+
end
|
19
|
+
end
|
13
20
|
end
|
14
21
|
end
|
15
22
|
end
|
@@ -1,32 +1,107 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "colorize"
|
4
3
|
require "slack-notifier"
|
5
4
|
|
6
5
|
module Miteru
|
7
6
|
module Notifiers
|
7
|
+
class SlackAttachment
|
8
|
+
attr_reader :url
|
9
|
+
|
10
|
+
def initialize(url)
|
11
|
+
@url = url
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_a
|
15
|
+
[
|
16
|
+
{
|
17
|
+
text: defanged_url,
|
18
|
+
fallback: "VT & urlscan.io links",
|
19
|
+
actions:
|
20
|
+
}
|
21
|
+
]
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def actions
|
27
|
+
[vt_link, urlscan_link].compact
|
28
|
+
end
|
29
|
+
|
30
|
+
def vt_link
|
31
|
+
return nil unless _vt_link
|
32
|
+
|
33
|
+
{
|
34
|
+
type: "button",
|
35
|
+
text: "Lookup on VirusTotal",
|
36
|
+
url: _vt_link
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def urlscan_link
|
41
|
+
return nil unless _urlscan_link
|
42
|
+
|
43
|
+
{
|
44
|
+
type: "button",
|
45
|
+
text: "Lookup on urlscan.io",
|
46
|
+
url: _urlscan_link
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
def defanged_url
|
51
|
+
@defanged_url ||= url.to_s.gsub(".", "[.]")
|
52
|
+
end
|
53
|
+
|
54
|
+
def domain
|
55
|
+
@domain ||= [].tap do |out|
|
56
|
+
out << URI(url).hostname
|
57
|
+
rescue URI::Error => _e
|
58
|
+
out << nil
|
59
|
+
end.first
|
60
|
+
end
|
61
|
+
|
62
|
+
def _urlscan_link
|
63
|
+
return nil unless domain
|
64
|
+
|
65
|
+
"https://urlscan.io/domain/#{domain}"
|
66
|
+
end
|
67
|
+
|
68
|
+
def _vt_link
|
69
|
+
return nil unless domain
|
70
|
+
|
71
|
+
"https://www.virustotal.com/#/domain/#{domain}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
8
75
|
class Slack < Base
|
9
76
|
#
|
10
77
|
# Notifiy to Slack
|
11
78
|
#
|
12
|
-
# @param [Miteru::Website website
|
79
|
+
# @param [Miteru::Website] website
|
13
80
|
#
|
14
|
-
def
|
15
|
-
|
81
|
+
def call(website)
|
82
|
+
return unless callable?
|
83
|
+
|
84
|
+
attachment = SlackAttachment.new(website.url)
|
16
85
|
kits = website.kits.select(&:downloaded?)
|
86
|
+
notifier.post(text: website.message.capitalize, attachments: attachment.to_a) if kits.any?
|
87
|
+
end
|
88
|
+
|
89
|
+
def callable?
|
90
|
+
!Miteru.config.slack_webhook_url.nil?
|
91
|
+
end
|
17
92
|
|
18
|
-
|
93
|
+
private
|
19
94
|
|
20
|
-
|
21
|
-
Miteru.
|
95
|
+
def webhook_url
|
96
|
+
Miteru.config.slack_webhook_url
|
22
97
|
end
|
23
98
|
|
24
|
-
def
|
25
|
-
Miteru.
|
99
|
+
def channel
|
100
|
+
Miteru.config.slack_channel
|
26
101
|
end
|
27
102
|
|
28
103
|
def notifier
|
29
|
-
Slack::Notifier.new(
|
104
|
+
::Slack::Notifier.new(webhook_url, channel:)
|
30
105
|
end
|
31
106
|
end
|
32
107
|
end
|
@@ -1,36 +1,51 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "urlscan"
|
4
|
-
|
5
3
|
module Miteru
|
6
4
|
module Notifiers
|
7
5
|
class UrlScan < Base
|
8
6
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# @param [Miteru::Website website
|
7
|
+
# @param [Miteru::Website] website
|
12
8
|
#
|
13
|
-
def
|
9
|
+
def call(website)
|
10
|
+
return unless callable?
|
11
|
+
|
14
12
|
kits = website.kits.select(&:downloaded?)
|
15
|
-
return unless
|
13
|
+
return unless kits.any?
|
16
14
|
|
17
15
|
kits.each { |kit| submit(kit.url) }
|
18
16
|
end
|
19
17
|
|
20
|
-
def
|
21
|
-
Miteru.
|
18
|
+
def callable?
|
19
|
+
!Miteru.config.urlscan_api_key.nil?
|
22
20
|
end
|
23
21
|
|
24
22
|
private
|
25
23
|
|
26
|
-
|
27
|
-
|
24
|
+
#
|
25
|
+
# @return [::HTTP::Client]
|
26
|
+
#
|
27
|
+
def http
|
28
|
+
@http ||= HTTP::Factory.build(headers:, timeout:)
|
29
|
+
end
|
30
|
+
|
31
|
+
def headers
|
32
|
+
{"api-key": Miteru.config.urlscan_api_key}
|
33
|
+
end
|
34
|
+
|
35
|
+
def timeout
|
36
|
+
Miteru.config.timeout
|
37
|
+
end
|
38
|
+
|
39
|
+
def tags
|
40
|
+
%w[miteru phishkit]
|
41
|
+
end
|
42
|
+
|
43
|
+
def visibility
|
44
|
+
Miteru.config.urlscan_submit_visibility
|
28
45
|
end
|
29
46
|
|
30
47
|
def submit(url)
|
31
|
-
|
32
|
-
rescue StandardError
|
33
|
-
# do nothing
|
48
|
+
http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
|
34
49
|
end
|
35
50
|
end
|
36
51
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Miteru
|
4
|
+
class Orchestrator < Service
|
5
|
+
def call
|
6
|
+
Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
|
7
|
+
|
8
|
+
if Miteru.sidekiq?
|
9
|
+
websites.each { |website| Jobs::CrawleJob.perform_async(website.url, website.source) }
|
10
|
+
else
|
11
|
+
Miteru.logger.info("Use #{threads} thread(s).") if verbose?
|
12
|
+
Parallel.each(websites, in_threads: threads) { |website| crawl(website) }
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# @return [Array<Miteru::Websites>]
|
18
|
+
#
|
19
|
+
def websites
|
20
|
+
@websites ||= [].tap do |out|
|
21
|
+
feeds.each do |feed|
|
22
|
+
result = feed.result
|
23
|
+
if result.success?
|
24
|
+
websites = result.value!
|
25
|
+
Miteru.logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
|
26
|
+
out << websites
|
27
|
+
else
|
28
|
+
Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end.flatten
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def threads
|
37
|
+
Miteru.config.threads
|
38
|
+
end
|
39
|
+
|
40
|
+
def verbose?
|
41
|
+
Miteru.config.verbose
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# @return [Array<Miteru::Feeds::Base>]
|
46
|
+
#
|
47
|
+
def feeds
|
48
|
+
Miteru.feeds.map(&:new)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/miteru/record.rb
CHANGED
@@ -1,35 +1,27 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "active_record"
|
4
|
-
|
5
3
|
module Miteru
|
6
4
|
class Record < ActiveRecord::Base
|
7
5
|
class << self
|
8
6
|
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# @param [String] hash
|
7
|
+
# @param [String] sha256
|
12
8
|
#
|
13
9
|
# @return [Boolean] true if it is unique. Otherwise false.
|
14
10
|
#
|
15
|
-
def
|
16
|
-
|
17
|
-
return true if record.nil?
|
18
|
-
|
19
|
-
false
|
11
|
+
def unique_sha256?(sha256)
|
12
|
+
!where(sha256:).exists?
|
20
13
|
end
|
21
14
|
|
22
15
|
#
|
23
16
|
# Create a new record based on a kit
|
24
17
|
#
|
25
18
|
# @param [Miteru::Kit] kit
|
26
|
-
# @param [String]
|
19
|
+
# @param [String] sha256
|
27
20
|
#
|
28
21
|
# @return [Miteru::Record]
|
29
22
|
#
|
30
|
-
def create_by_kit_and_hash(kit,
|
23
|
+
def create_by_kit_and_hash(kit, sha256:)
|
31
24
|
record = new(
|
32
|
-
hash: hash,
|
33
25
|
source: kit.source,
|
34
26
|
hostname: kit.hostname,
|
35
27
|
url: kit.decoded_url,
|
@@ -37,11 +29,12 @@ module Miteru
|
|
37
29
|
filename: kit.filename,
|
38
30
|
filesize: kit.filesize,
|
39
31
|
mime_type: kit.mime_type,
|
40
|
-
downloaded_as: kit.filepath_to_download
|
32
|
+
downloaded_as: kit.filepath_to_download,
|
33
|
+
sha256:
|
41
34
|
)
|
42
35
|
record.save
|
43
36
|
record
|
44
|
-
rescue TypeError, ActiveRecord::RecordNotUnique
|
37
|
+
rescue TypeError, ActiveRecord::RecordNotUnique
|
45
38
|
nil
|
46
39
|
end
|
47
40
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Miteru
|
4
|
+
#
|
5
|
+
# Base class for services
|
6
|
+
#
|
7
|
+
class Service
|
8
|
+
include Dry::Monads[:result, :try]
|
9
|
+
|
10
|
+
def call(*args, **kwargs)
|
11
|
+
raise NotImplementedError, "You must implement #{self.class}##{__method__}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def result(...)
|
15
|
+
Try[StandardError] { call(...) }.to_result
|
16
|
+
end
|
17
|
+
|
18
|
+
class << self
|
19
|
+
def call(...)
|
20
|
+
new.call(...)
|
21
|
+
end
|
22
|
+
|
23
|
+
def result(...)
|
24
|
+
new.result(...)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
require "miteru/sidekiq/jobs"
|
6
|
+
|
7
|
+
Sidekiq.configure_server do |config|
|
8
|
+
config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
|
9
|
+
end
|
10
|
+
|
11
|
+
Sidekiq.configure_client do |config|
|
12
|
+
config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
|
13
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sidekiq"
|
4
|
+
|
5
|
+
module Miteru
|
6
|
+
module Jobs
|
7
|
+
class CrawleJob
|
8
|
+
include Sidekiq::Job
|
9
|
+
include Concerns::DatabaseConnectable
|
10
|
+
|
11
|
+
#
|
12
|
+
# @param [String] url
|
13
|
+
# @param [String] source
|
14
|
+
#
|
15
|
+
def perform(url, source)
|
16
|
+
website = Miteru::Website.new(url, source:)
|
17
|
+
with_db_connection { Crawler.call(website) }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/miteru/version.rb
CHANGED
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Rack
|
4
|
+
require "rack"
|
5
|
+
require "rack/session"
|
6
|
+
require "rackup"
|
7
|
+
|
8
|
+
require "rack/handler/puma"
|
9
|
+
|
10
|
+
# Sidekiq
|
11
|
+
require "sidekiq/web"
|
12
|
+
|
13
|
+
module Miteru
|
14
|
+
module Web
|
15
|
+
class App
|
16
|
+
class << self
|
17
|
+
def instance
|
18
|
+
Rack::Builder.new do
|
19
|
+
use Rack::Session::Cookie, secret: SecureRandom.hex(32), same_site: true, max_age: 86_400
|
20
|
+
|
21
|
+
map "/" do
|
22
|
+
run Sidekiq::Web
|
23
|
+
end
|
24
|
+
|
25
|
+
run App.new
|
26
|
+
end.to_app
|
27
|
+
end
|
28
|
+
|
29
|
+
def run!(port: 9292, host: "localhost", threads: "0:3", verbose: false, worker_timeout: 60, open: true)
|
30
|
+
Rackup::Handler::Puma.run(
|
31
|
+
instance,
|
32
|
+
Port: port,
|
33
|
+
Host: host,
|
34
|
+
Threads: threads,
|
35
|
+
Verbose: verbose,
|
36
|
+
worker_timeout:
|
37
|
+
)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|