miteru 1.2.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/gem.yml +36 -0
  3. data/.github/workflows/{test.yml → ruby.yml} +4 -13
  4. data/.gitignore +7 -2
  5. data/.rspec +1 -1
  6. data/README.md +7 -17
  7. data/docker-compose.yml +12 -0
  8. data/exe/miteru +3 -3
  9. data/lefthook.yml +9 -0
  10. data/lib/miteru/cli/application.rb +27 -0
  11. data/lib/miteru/cli/base.rb +16 -0
  12. data/lib/miteru/cli/database.rb +11 -0
  13. data/lib/miteru/commands/database.rb +23 -0
  14. data/lib/miteru/commands/main.rb +37 -0
  15. data/lib/miteru/commands/sidekiq.rb +35 -0
  16. data/lib/miteru/commands/web.rb +37 -0
  17. data/lib/miteru/concerns/database_connectable.rb +16 -0
  18. data/lib/miteru/concerns/error_unwrappable.rb +30 -0
  19. data/lib/miteru/config.rb +98 -0
  20. data/lib/miteru/crawler.rb +28 -44
  21. data/lib/miteru/database.rb +50 -38
  22. data/lib/miteru/downloader.rb +52 -41
  23. data/lib/miteru/errors.rb +37 -0
  24. data/lib/miteru/feeds/ayashige.rb +9 -20
  25. data/lib/miteru/feeds/base.rb +141 -0
  26. data/lib/miteru/feeds/phishing_database.rb +11 -10
  27. data/lib/miteru/feeds/urlscan.rb +47 -19
  28. data/lib/miteru/feeds/urlscan_pro.rb +20 -18
  29. data/lib/miteru/http.rb +51 -0
  30. data/lib/miteru/kit.rb +28 -20
  31. data/lib/miteru/mixin.rb +2 -29
  32. data/lib/miteru/notifiers/base.rb +10 -3
  33. data/lib/miteru/notifiers/slack.rb +85 -10
  34. data/lib/miteru/notifiers/urlscan.rb +29 -14
  35. data/lib/miteru/orchestrator.rb +58 -0
  36. data/lib/miteru/record.rb +8 -15
  37. data/lib/miteru/service.rb +28 -0
  38. data/lib/miteru/sidekiq/application.rb +13 -0
  39. data/lib/miteru/sidekiq/jobs.rb +21 -0
  40. data/lib/miteru/version.rb +1 -1
  41. data/lib/miteru/web/application.rb +42 -0
  42. data/lib/miteru/website.rb +48 -48
  43. data/lib/miteru.rb +130 -22
  44. data/miteru.gemspec +49 -38
  45. metadata +262 -97
  46. data/.overcommit.yml +0 -12
  47. data/.standard.yml +0 -4
  48. data/lib/miteru/attachement.rb +0 -74
  49. data/lib/miteru/cli.rb +0 -41
  50. data/lib/miteru/configuration.rb +0 -122
  51. data/lib/miteru/error.rb +0 -7
  52. data/lib/miteru/feeds/feed.rb +0 -53
  53. data/lib/miteru/feeds/phishstats.rb +0 -28
  54. data/lib/miteru/feeds.rb +0 -45
  55. data/lib/miteru/http_client.rb +0 -85
data/lib/miteru/kit.rb CHANGED
@@ -1,14 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "cgi"
4
- require "uuidtools"
5
- require "uri"
6
-
7
3
  module Miteru
8
- class Kit
9
- VALID_EXTENSIONS = Miteru.configuration.valid_extensions
10
- VALID_MIME_TYPES = Miteru.configuration.valid_mime_types
11
-
4
+ class Kit < Service
12
5
  # @return [String]
13
6
  attr_reader :url
14
7
 
@@ -27,7 +20,11 @@ module Miteru
27
20
  # @return [Hash, nil]
28
21
  attr_reader :headers
29
22
 
30
- def initialize(url, source)
23
+ #
24
+ # @param [String] url
25
+ # @param [String] source
26
+ #
27
+ def initialize(url, source:)
31
28
  @url = url
32
29
  @source = source
33
30
 
@@ -91,6 +88,13 @@ module Miteru
91
88
  @decoded_url ||= URI.decode_www_form_component(url)
92
89
  end
93
90
 
91
+ #
92
+ # @return [String]
93
+ #
94
+ def truncated_url
95
+ url.truncate(64)
96
+ end
97
+
94
98
  private
95
99
 
96
100
  def filename_to_download
@@ -98,21 +102,25 @@ module Miteru
98
102
  end
99
103
 
100
104
  def base_dir
101
- @base_dir ||= Miteru.configuration.download_to
105
+ @base_dir ||= Miteru.config.download_to
102
106
  end
103
107
 
104
108
  def valid_ext?
105
- VALID_EXTENSIONS.include? extname
109
+ Miteru.config.file_extensions.include? extname
110
+ end
111
+
112
+ def http
113
+ HTTP::Factory.build
106
114
  end
107
115
 
108
116
  def before_validation
109
- res = HTTPClient.head(url)
110
- @content_length = res.content_length
111
- @mime_type = res.content_type.mime_type.to_s
112
- @status = res.status
113
- @headers = res.headers.to_h
114
- rescue StandardError
115
- # do nothing
117
+ Try[StandardError] do
118
+ res = http.head(url)
119
+ @content_length = res.content_length
120
+ @mime_type = res.content_type.mime_type.to_s
121
+ @status = res.status
122
+ @headers = res.headers.to_h
123
+ end.recover { nil }.value!
116
124
  end
117
125
 
118
126
  def reachable?
@@ -120,11 +128,11 @@ module Miteru
120
128
  end
121
129
 
122
130
  def valid_mime_type?
123
- VALID_MIME_TYPES.include? mime_type
131
+ Miteru.config.file_mime_types.include? mime_type
124
132
  end
125
133
 
126
134
  def valid_content_length?
127
- content_length.to_i > 0
135
+ content_length.to_i.positive?
128
136
  end
129
137
  end
130
138
  end
data/lib/miteru/mixin.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Miteru
2
4
  module Mixins
3
5
  module URL
@@ -13,35 +15,6 @@ module Miteru
13
15
  def invalid_extension?(url)
14
16
  IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
15
17
  end
16
-
17
- #
18
- # Breakdown a URL into URLs
19
- #
20
- # @param [String] url
21
- # @param [Boolean] enable_directory_traveling
22
- #
23
- # @return [Array<String>]
24
- #
25
- def breakdown(url, enable_directory_traveling)
26
- begin
27
- uri = URI.parse(url)
28
- rescue URI::InvalidURIError => _e
29
- return []
30
- end
31
-
32
- base = "#{uri.scheme}://#{uri.hostname}"
33
- return [base] unless enable_directory_traveling
34
-
35
- segments = uri.path.split("/")
36
- return [base] if segments.length.zero?
37
-
38
- urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
39
-
40
- urls.reject do |breakdowned_url|
41
- # Reject a url which ends with specific extension names
42
- invalid_extension? breakdowned_url
43
- end
44
- end
45
18
  end
46
19
  end
47
20
  end
@@ -2,14 +2,21 @@
2
2
 
3
3
  module Miteru
4
4
  module Notifiers
5
- class Base
6
- def notify(website)
5
+ class Base < Service
6
+ def call(website)
7
7
  raise NotImplementedError
8
8
  end
9
9
 
10
- def notifiable?
10
+ def callable?
11
11
  raise NotImplementedError
12
12
  end
13
+
14
+ class << self
15
+ def inherited(child)
16
+ super
17
+ Miteru.notifiers << child
18
+ end
19
+ end
13
20
  end
14
21
  end
15
22
  end
@@ -1,32 +1,107 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "colorize"
4
3
  require "slack-notifier"
5
4
 
6
5
  module Miteru
7
6
  module Notifiers
7
+ class SlackAttachment
8
+ attr_reader :url
9
+
10
+ def initialize(url)
11
+ @url = url
12
+ end
13
+
14
+ def to_a
15
+ [
16
+ {
17
+ text: defanged_url,
18
+ fallback: "VT & urlscan.io links",
19
+ actions:
20
+ }
21
+ ]
22
+ end
23
+
24
+ private
25
+
26
+ def actions
27
+ [vt_link, urlscan_link].compact
28
+ end
29
+
30
+ def vt_link
31
+ return nil unless _vt_link
32
+
33
+ {
34
+ type: "button",
35
+ text: "Lookup on VirusTotal",
36
+ url: _vt_link
37
+ }
38
+ end
39
+
40
+ def urlscan_link
41
+ return nil unless _urlscan_link
42
+
43
+ {
44
+ type: "button",
45
+ text: "Lookup on urlscan.io",
46
+ url: _urlscan_link
47
+ }
48
+ end
49
+
50
+ def defanged_url
51
+ @defanged_url ||= url.to_s.gsub(".", "[.]")
52
+ end
53
+
54
+ def domain
55
+ @domain ||= [].tap do |out|
56
+ out << URI(url).hostname
57
+ rescue URI::Error => _e
58
+ out << nil
59
+ end.first
60
+ end
61
+
62
+ def _urlscan_link
63
+ return nil unless domain
64
+
65
+ "https://urlscan.io/domain/#{domain}"
66
+ end
67
+
68
+ def _vt_link
69
+ return nil unless domain
70
+
71
+ "https://www.virustotal.com/#/domain/#{domain}"
72
+ end
73
+ end
74
+
8
75
  class Slack < Base
9
76
  #
10
77
  # Notifiy to Slack
11
78
  #
12
- # @param [Miteru::Website website
79
+ # @param [Miteru::Website] website
13
80
  #
14
- def notify(website)
15
- attachement = Attachement.new(website.url)
81
+ def call(website)
82
+ return unless callable?
83
+
84
+ attachment = SlackAttachment.new(website.url)
16
85
  kits = website.kits.select(&:downloaded?)
86
+ notifier.post(text: website.message.capitalize, attachments: attachment.to_a) if kits.any?
87
+ end
88
+
89
+ def callable?
90
+ !Miteru.config.slack_webhook_url.nil?
91
+ end
17
92
 
18
- notifier.post(text: website.message.capitalize, attachments: attachement.to_a) if notifiable? && kits.any?
93
+ private
19
94
 
20
- message = kits.any? ? website.message.colorize(:light_red) : website.message
21
- Miteru.logger.info "#{website.url}: #{message}"
95
+ def webhook_url
96
+ Miteru.config.slack_webhook_url
22
97
  end
23
98
 
24
- def notifiable?
25
- Miteru.configuration.slack_webhook_url? && Miteru.configuration.post_to_slack?
99
+ def channel
100
+ Miteru.config.slack_channel
26
101
  end
27
102
 
28
103
  def notifier
29
- ::Slack::Notifier.new(Miteru.configuration.slack_webhook_url, channel: Miteru.configuration.slack_channel)
104
+ ::Slack::Notifier.new(webhook_url, channel:)
30
105
  end
31
106
  end
32
107
  end
@@ -1,36 +1,51 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  module Notifiers
7
5
  class UrlScan < Base
8
6
  #
9
- # Notifiy to urlscan.io
10
- #
11
- # @param [Miteru::Website website
7
+ # @param [Miteru::Website] website
12
8
  #
13
- def notify(website)
9
+ def call(website)
10
+ return unless callable?
11
+
14
12
  kits = website.kits.select(&:downloaded?)
15
- return unless notifiable? && kits.any?
13
+ return unless kits.any?
16
14
 
17
15
  kits.each { |kit| submit(kit.url) }
18
16
  end
19
17
 
20
- def notifiable?
21
- Miteru.configuration.urlscan_api_key?
18
+ def callable?
19
+ !Miteru.config.urlscan_api_key.nil?
22
20
  end
23
21
 
24
22
  private
25
23
 
26
- def api
27
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
24
+ #
25
+ # @return [::HTTP::Client]
26
+ #
27
+ def http
28
+ @http ||= HTTP::Factory.build(headers:, timeout:)
29
+ end
30
+
31
+ def headers
32
+ {"api-key": Miteru.config.urlscan_api_key}
33
+ end
34
+
35
+ def timeout
36
+ Miteru.config.timeout
37
+ end
38
+
39
+ def tags
40
+ %w[miteru phishkit]
41
+ end
42
+
43
+ def visibility
44
+ Miteru.config.urlscan_submit_visibility
28
45
  end
29
46
 
30
47
  def submit(url)
31
- api.submit(url, tags: ["miteru", "phishkit"], visibility: Miteru.configuration.urlscan_submit_visibility)
32
- rescue StandardError
33
- # do nothing
48
+ http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
34
49
  end
35
50
  end
36
51
  end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ class Orchestrator < Service
5
+ def call
6
+ Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
7
+
8
+ if Miteru.sidekiq?
9
+
10
+ websites.each do |website|
11
+ Jobs::CrawleJob.perform_async(website.url, website.source)
12
+ Miteru.logger.info("Website:#{website.truncated_url} crawler job queued") if verbose?
13
+ end
14
+ else
15
+ Miteru.logger.info("Use #{threads} thread(s).") if verbose?
16
+ Parallel.each(websites, in_threads: threads) do |website|
17
+ Miteru.logger.info("Website:#{website.truncated_url} crawling started") if verbose?
18
+ crawl(website)
19
+ end
20
+ end
21
+ end
22
+
23
+ #
24
+ # @return [Array<Miteru::Websites>]
25
+ #
26
+ def websites
27
+ @websites ||= [].tap do |out|
28
+ feeds.each do |feed|
29
+ result = feed.result
30
+ if result.success?
31
+ websites = result.value!
32
+ Miteru.logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
33
+ out << websites
34
+ else
35
+ Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
36
+ end
37
+ end
38
+ end.flatten
39
+ end
40
+
41
+ private
42
+
43
+ def threads
44
+ Miteru.config.threads
45
+ end
46
+
47
+ def verbose?
48
+ Miteru.config.verbose
49
+ end
50
+
51
+ #
52
+ # @return [Array<Miteru::Feeds::Base>]
53
+ #
54
+ def feeds
55
+ Miteru.feeds.map(&:new)
56
+ end
57
+ end
58
+ end
data/lib/miteru/record.rb CHANGED
@@ -1,35 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_record"
4
-
5
3
  module Miteru
6
4
  class Record < ActiveRecord::Base
7
5
  class << self
8
6
  #
9
- # Check uniqueness of a record by a hash
10
- #
11
- # @param [String] hash
7
+ # @param [String] sha256
12
8
  #
13
9
  # @return [Boolean] true if it is unique. Otherwise false.
14
10
  #
15
- def unique_hash?(hash)
16
- record = find_by(hash: hash)
17
- return true if record.nil?
18
-
19
- false
11
+ def unique_sha256?(sha256)
12
+ !where(sha256:).exists?
20
13
  end
21
14
 
22
15
  #
23
16
  # Create a new record based on a kit
24
17
  #
25
18
  # @param [Miteru::Kit] kit
26
- # @param [String] hash
19
+ # @param [String] sha256
27
20
  #
28
21
  # @return [Miteru::Record]
29
22
  #
30
- def create_by_kit_and_hash(kit, hash)
23
+ def create_by_kit_and_hash(kit, sha256:)
31
24
  record = new(
32
- hash: hash,
33
25
  source: kit.source,
34
26
  hostname: kit.hostname,
35
27
  url: kit.decoded_url,
@@ -37,11 +29,12 @@ module Miteru
37
29
  filename: kit.filename,
38
30
  filesize: kit.filesize,
39
31
  mime_type: kit.mime_type,
40
- downloaded_as: kit.filepath_to_download
32
+ downloaded_as: kit.filepath_to_download,
33
+ sha256:
41
34
  )
42
35
  record.save
43
36
  record
44
- rescue TypeError, ActiveRecord::RecordNotUnique => _e
37
+ rescue TypeError, ActiveRecord::RecordNotUnique
45
38
  nil
46
39
  end
47
40
  end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ #
5
+ # Base class for services
6
+ #
7
+ class Service
8
+ include Dry::Monads[:result, :try]
9
+
10
+ def call(*args, **kwargs)
11
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
12
+ end
13
+
14
+ def result(...)
15
+ Try[StandardError] { call(...) }.to_result
16
+ end
17
+
18
+ class << self
19
+ def call(...)
20
+ new.call(...)
21
+ end
22
+
23
+ def result(...)
24
+ new.result(...)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+
5
+ require "miteru/sidekiq/jobs"
6
+
7
+ Sidekiq.configure_server do |config|
8
+ config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
9
+ end
10
+
11
+ Sidekiq.configure_client do |config|
12
+ config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
13
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+
5
+ module Miteru
6
+ module Jobs
7
+ class CrawleJob
8
+ include Sidekiq::Job
9
+ include Concerns::DatabaseConnectable
10
+
11
+ #
12
+ # @param [String] url
13
+ # @param [String] source
14
+ #
15
+ def perform(url, source)
16
+ website = Miteru::Website.new(url, source:)
17
+ with_db_connection { Crawler.call(website) }
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "1.2.2"
4
+ VERSION = "2.0.1"
5
5
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rack
4
+ require "rack"
5
+ require "rack/session"
6
+ require "rackup"
7
+
8
+ require "rack/handler/puma"
9
+
10
+ # Sidekiq
11
+ require "sidekiq/web"
12
+
13
+ module Miteru
14
+ module Web
15
+ class App
16
+ class << self
17
+ def instance
18
+ Rack::Builder.new do
19
+ use Rack::Session::Cookie, secret: SecureRandom.hex(32), same_site: true, max_age: 86_400
20
+
21
+ map "/" do
22
+ run Sidekiq::Web
23
+ end
24
+
25
+ run App.new
26
+ end.to_app
27
+ end
28
+
29
+ def run!(port: 9292, host: "localhost", threads: "0:3", verbose: false, worker_timeout: 60, open: true)
30
+ Rackup::Handler::Puma.run(
31
+ instance,
32
+ Port: port,
33
+ Host: host,
34
+ Threads: threads,
35
+ Verbose: verbose,
36
+ worker_timeout:
37
+ )
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end