miteru 1.2.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/gem.yml +36 -0
  3. data/.github/workflows/{test.yml → ruby.yml} +4 -13
  4. data/.gitignore +4 -1
  5. data/.rspec +1 -1
  6. data/README.md +7 -17
  7. data/docker-compose.yml +12 -0
  8. data/exe/miteru +3 -3
  9. data/lefthook.yml +9 -0
  10. data/lib/miteru/cli/application.rb +27 -0
  11. data/lib/miteru/cli/base.rb +16 -0
  12. data/lib/miteru/cli/database.rb +11 -0
  13. data/lib/miteru/commands/database.rb +23 -0
  14. data/lib/miteru/commands/main.rb +37 -0
  15. data/lib/miteru/commands/sidekiq.rb +35 -0
  16. data/lib/miteru/commands/web.rb +37 -0
  17. data/lib/miteru/concerns/database_connectable.rb +16 -0
  18. data/lib/miteru/concerns/error_unwrappable.rb +30 -0
  19. data/lib/miteru/config.rb +98 -0
  20. data/lib/miteru/crawler.rb +28 -44
  21. data/lib/miteru/database.rb +50 -38
  22. data/lib/miteru/downloader.rb +52 -41
  23. data/lib/miteru/errors.rb +37 -0
  24. data/lib/miteru/feeds/ayashige.rb +9 -20
  25. data/lib/miteru/feeds/base.rb +141 -0
  26. data/lib/miteru/feeds/phishing_database.rb +11 -10
  27. data/lib/miteru/feeds/urlscan.rb +47 -19
  28. data/lib/miteru/feeds/urlscan_pro.rb +20 -18
  29. data/lib/miteru/http.rb +51 -0
  30. data/lib/miteru/kit.rb +28 -20
  31. data/lib/miteru/mixin.rb +2 -29
  32. data/lib/miteru/notifiers/base.rb +10 -3
  33. data/lib/miteru/notifiers/slack.rb +85 -10
  34. data/lib/miteru/notifiers/urlscan.rb +29 -14
  35. data/lib/miteru/orchestrator.rb +51 -0
  36. data/lib/miteru/record.rb +8 -15
  37. data/lib/miteru/service.rb +28 -0
  38. data/lib/miteru/sidekiq/application.rb +13 -0
  39. data/lib/miteru/sidekiq/jobs.rb +21 -0
  40. data/lib/miteru/version.rb +1 -1
  41. data/lib/miteru/web/application.rb +42 -0
  42. data/lib/miteru/website.rb +48 -48
  43. data/lib/miteru.rb +130 -22
  44. data/miteru-sidekiq.service +13 -0
  45. data/miteru.db-shm +0 -0
  46. data/miteru.db-wal +0 -0
  47. data/miteru.gemspec +49 -38
  48. metadata +265 -97
  49. data/.overcommit.yml +0 -12
  50. data/.standard.yml +0 -4
  51. data/lib/miteru/attachement.rb +0 -74
  52. data/lib/miteru/cli.rb +0 -41
  53. data/lib/miteru/configuration.rb +0 -122
  54. data/lib/miteru/error.rb +0 -7
  55. data/lib/miteru/feeds/feed.rb +0 -53
  56. data/lib/miteru/feeds/phishstats.rb +0 -28
  57. data/lib/miteru/feeds.rb +0 -45
  58. data/lib/miteru/http_client.rb +0 -85
data/lib/miteru/kit.rb CHANGED
@@ -1,14 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "cgi"
4
- require "uuidtools"
5
- require "uri"
6
-
7
3
  module Miteru
8
- class Kit
9
- VALID_EXTENSIONS = Miteru.configuration.valid_extensions
10
- VALID_MIME_TYPES = Miteru.configuration.valid_mime_types
11
-
4
+ class Kit < Service
12
5
  # @return [String]
13
6
  attr_reader :url
14
7
 
@@ -27,7 +20,11 @@ module Miteru
27
20
  # @return [Hash, nil]
28
21
  attr_reader :headers
29
22
 
30
- def initialize(url, source)
23
+ #
24
+ # @param [String] url
25
+ # @param [String] source
26
+ #
27
+ def initialize(url, source:)
31
28
  @url = url
32
29
  @source = source
33
30
 
@@ -91,6 +88,13 @@ module Miteru
91
88
  @decoded_url ||= URI.decode_www_form_component(url)
92
89
  end
93
90
 
91
+ #
92
+ # @return [String]
93
+ #
94
+ def truncated_url
95
+ url.truncate(64)
96
+ end
97
+
94
98
  private
95
99
 
96
100
  def filename_to_download
@@ -98,21 +102,25 @@ module Miteru
98
102
  end
99
103
 
100
104
  def base_dir
101
- @base_dir ||= Miteru.configuration.download_to
105
+ @base_dir ||= Miteru.config.download_to
102
106
  end
103
107
 
104
108
  def valid_ext?
105
- VALID_EXTENSIONS.include? extname
109
+ Miteru.config.file_extensions.include? extname
110
+ end
111
+
112
+ def http
113
+ HTTP::Factory.build
106
114
  end
107
115
 
108
116
  def before_validation
109
- res = HTTPClient.head(url)
110
- @content_length = res.content_length
111
- @mime_type = res.content_type.mime_type.to_s
112
- @status = res.status
113
- @headers = res.headers.to_h
114
- rescue StandardError
115
- # do nothing
117
+ Try[StandardError] do
118
+ res = http.head(url)
119
+ @content_length = res.content_length
120
+ @mime_type = res.content_type.mime_type.to_s
121
+ @status = res.status
122
+ @headers = res.headers.to_h
123
+ end.recover { nil }.value!
116
124
  end
117
125
 
118
126
  def reachable?
@@ -120,11 +128,11 @@ module Miteru
120
128
  end
121
129
 
122
130
  def valid_mime_type?
123
- VALID_MIME_TYPES.include? mime_type
131
+ Miteru.config.file_mime_types.include? mime_type
124
132
  end
125
133
 
126
134
  def valid_content_length?
127
- content_length.to_i > 0
135
+ content_length.to_i.positive?
128
136
  end
129
137
  end
130
138
  end
data/lib/miteru/mixin.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Miteru
2
4
  module Mixins
3
5
  module URL
@@ -13,35 +15,6 @@ module Miteru
13
15
  def invalid_extension?(url)
14
16
  IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
15
17
  end
16
-
17
- #
18
- # Breakdown a URL into URLs
19
- #
20
- # @param [String] url
21
- # @param [Boolean] enable_directory_traveling
22
- #
23
- # @return [Array<String>]
24
- #
25
- def breakdown(url, enable_directory_traveling)
26
- begin
27
- uri = URI.parse(url)
28
- rescue URI::InvalidURIError => _e
29
- return []
30
- end
31
-
32
- base = "#{uri.scheme}://#{uri.hostname}"
33
- return [base] unless enable_directory_traveling
34
-
35
- segments = uri.path.split("/")
36
- return [base] if segments.length.zero?
37
-
38
- urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
39
-
40
- urls.reject do |breakdowned_url|
41
- # Reject a url which ends with specific extension names
42
- invalid_extension? breakdowned_url
43
- end
44
- end
45
18
  end
46
19
  end
47
20
  end
@@ -2,14 +2,21 @@
2
2
 
3
3
  module Miteru
4
4
  module Notifiers
5
- class Base
6
- def notify(website)
5
+ class Base < Service
6
+ def call(website)
7
7
  raise NotImplementedError
8
8
  end
9
9
 
10
- def notifiable?
10
+ def callable?
11
11
  raise NotImplementedError
12
12
  end
13
+
14
+ class << self
15
+ def inherited(child)
16
+ super
17
+ Miteru.notifiers << child
18
+ end
19
+ end
13
20
  end
14
21
  end
15
22
  end
@@ -1,32 +1,107 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "colorize"
4
3
  require "slack-notifier"
5
4
 
6
5
  module Miteru
7
6
  module Notifiers
7
+ class SlackAttachment
8
+ attr_reader :url
9
+
10
+ def initialize(url)
11
+ @url = url
12
+ end
13
+
14
+ def to_a
15
+ [
16
+ {
17
+ text: defanged_url,
18
+ fallback: "VT & urlscan.io links",
19
+ actions:
20
+ }
21
+ ]
22
+ end
23
+
24
+ private
25
+
26
+ def actions
27
+ [vt_link, urlscan_link].compact
28
+ end
29
+
30
+ def vt_link
31
+ return nil unless _vt_link
32
+
33
+ {
34
+ type: "button",
35
+ text: "Lookup on VirusTotal",
36
+ url: _vt_link
37
+ }
38
+ end
39
+
40
+ def urlscan_link
41
+ return nil unless _urlscan_link
42
+
43
+ {
44
+ type: "button",
45
+ text: "Lookup on urlscan.io",
46
+ url: _urlscan_link
47
+ }
48
+ end
49
+
50
+ def defanged_url
51
+ @defanged_url ||= url.to_s.gsub(".", "[.]")
52
+ end
53
+
54
+ def domain
55
+ @domain ||= [].tap do |out|
56
+ out << URI(url).hostname
57
+ rescue URI::Error => _e
58
+ out << nil
59
+ end.first
60
+ end
61
+
62
+ def _urlscan_link
63
+ return nil unless domain
64
+
65
+ "https://urlscan.io/domain/#{domain}"
66
+ end
67
+
68
+ def _vt_link
69
+ return nil unless domain
70
+
71
+ "https://www.virustotal.com/#/domain/#{domain}"
72
+ end
73
+ end
74
+
8
75
  class Slack < Base
9
76
  #
10
77
  # Notifiy to Slack
11
78
  #
12
- # @param [Miteru::Website website
79
+ # @param [Miteru::Website] website
13
80
  #
14
- def notify(website)
15
- attachement = Attachement.new(website.url)
81
+ def call(website)
82
+ return unless callable?
83
+
84
+ attachment = SlackAttachment.new(website.url)
16
85
  kits = website.kits.select(&:downloaded?)
86
+ notifier.post(text: website.message.capitalize, attachments: attachment.to_a) if kits.any?
87
+ end
88
+
89
+ def callable?
90
+ !Miteru.config.slack_webhook_url.nil?
91
+ end
17
92
 
18
- notifier.post(text: website.message.capitalize, attachments: attachement.to_a) if notifiable? && kits.any?
93
+ private
19
94
 
20
- message = kits.any? ? website.message.colorize(:light_red) : website.message
21
- Miteru.logger.info "#{website.url}: #{message}"
95
+ def webhook_url
96
+ Miteru.config.slack_webhook_url
22
97
  end
23
98
 
24
- def notifiable?
25
- Miteru.configuration.slack_webhook_url? && Miteru.configuration.post_to_slack?
99
+ def channel
100
+ Miteru.config.slack_channel
26
101
  end
27
102
 
28
103
  def notifier
29
- ::Slack::Notifier.new(Miteru.configuration.slack_webhook_url, channel: Miteru.configuration.slack_channel)
104
+ ::Slack::Notifier.new(webhook_url, channel:)
30
105
  end
31
106
  end
32
107
  end
@@ -1,36 +1,51 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  module Notifiers
7
5
  class UrlScan < Base
8
6
  #
9
- # Notifiy to urlscan.io
10
- #
11
- # @param [Miteru::Website website
7
+ # @param [Miteru::Website] website
12
8
  #
13
- def notify(website)
9
+ def call(website)
10
+ return unless callable?
11
+
14
12
  kits = website.kits.select(&:downloaded?)
15
- return unless notifiable? && kits.any?
13
+ return unless kits.any?
16
14
 
17
15
  kits.each { |kit| submit(kit.url) }
18
16
  end
19
17
 
20
- def notifiable?
21
- Miteru.configuration.urlscan_api_key?
18
+ def callable?
19
+ !Miteru.config.urlscan_api_key.nil?
22
20
  end
23
21
 
24
22
  private
25
23
 
26
- def api
27
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
24
+ #
25
+ # @return [::HTTP::Client]
26
+ #
27
+ def http
28
+ @http ||= HTTP::Factory.build(headers:, timeout:)
29
+ end
30
+
31
+ def headers
32
+ {"api-key": Miteru.config.urlscan_api_key}
33
+ end
34
+
35
+ def timeout
36
+ Miteru.config.timeout
37
+ end
38
+
39
+ def tags
40
+ %w[miteru phishkit]
41
+ end
42
+
43
+ def visibility
44
+ Miteru.config.urlscan_submit_visibility
28
45
  end
29
46
 
30
47
  def submit(url)
31
- api.submit(url, tags: ["miteru", "phishkit"], visibility: Miteru.configuration.urlscan_submit_visibility)
32
- rescue StandardError
33
- # do nothing
48
+ http.post("/api/v1/scan/", json: {tags:, visibility:, url:})
34
49
  end
35
50
  end
36
51
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ class Orchestrator < Service
5
+ def call
6
+ Miteru.logger.info("#{websites.length} websites loaded in total.") if verbose?
7
+
8
+ if Miteru.sidekiq?
9
+ websites.each { |website| Jobs::CrawleJob.perform_async(website.url, website.source) }
10
+ else
11
+ Miteru.logger.info("Use #{threads} thread(s).") if verbose?
12
+ Parallel.each(websites, in_threads: threads) { |website| crawl(website) }
13
+ end
14
+ end
15
+
16
+ #
17
+ # @return [Array<Miteru::Websites>]
18
+ #
19
+ def websites
20
+ @websites ||= [].tap do |out|
21
+ feeds.each do |feed|
22
+ result = feed.result
23
+ if result.success?
24
+ websites = result.value!
25
+ Miteru.logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
26
+ out << websites
27
+ else
28
+ Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
29
+ end
30
+ end
31
+ end.flatten
32
+ end
33
+
34
+ private
35
+
36
+ def threads
37
+ Miteru.config.threads
38
+ end
39
+
40
+ def verbose?
41
+ Miteru.config.verbose
42
+ end
43
+
44
+ #
45
+ # @return [Array<Miteru::Feeds::Base>]
46
+ #
47
+ def feeds
48
+ Miteru.feeds.map(&:new)
49
+ end
50
+ end
51
+ end
data/lib/miteru/record.rb CHANGED
@@ -1,35 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_record"
4
-
5
3
  module Miteru
6
4
  class Record < ActiveRecord::Base
7
5
  class << self
8
6
  #
9
- # Check uniqueness of a record by a hash
10
- #
11
- # @param [String] hash
7
+ # @param [String] sha256
12
8
  #
13
9
  # @return [Boolean] true if it is unique. Otherwise false.
14
10
  #
15
- def unique_hash?(hash)
16
- record = find_by(hash: hash)
17
- return true if record.nil?
18
-
19
- false
11
+ def unique_sha256?(sha256)
12
+ !where(sha256:).exists?
20
13
  end
21
14
 
22
15
  #
23
16
  # Create a new record based on a kit
24
17
  #
25
18
  # @param [Miteru::Kit] kit
26
- # @param [String] hash
19
+ # @param [String] sha256
27
20
  #
28
21
  # @return [Miteru::Record]
29
22
  #
30
- def create_by_kit_and_hash(kit, hash)
23
+ def create_by_kit_and_hash(kit, sha256:)
31
24
  record = new(
32
- hash: hash,
33
25
  source: kit.source,
34
26
  hostname: kit.hostname,
35
27
  url: kit.decoded_url,
@@ -37,11 +29,12 @@ module Miteru
37
29
  filename: kit.filename,
38
30
  filesize: kit.filesize,
39
31
  mime_type: kit.mime_type,
40
- downloaded_as: kit.filepath_to_download
32
+ downloaded_as: kit.filepath_to_download,
33
+ sha256:
41
34
  )
42
35
  record.save
43
36
  record
44
- rescue TypeError, ActiveRecord::RecordNotUnique => _e
37
+ rescue TypeError, ActiveRecord::RecordNotUnique
45
38
  nil
46
39
  end
47
40
  end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ #
5
+ # Base class for services
6
+ #
7
+ class Service
8
+ include Dry::Monads[:result, :try]
9
+
10
+ def call(*args, **kwargs)
11
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
12
+ end
13
+
14
+ def result(...)
15
+ Try[StandardError] { call(...) }.to_result
16
+ end
17
+
18
+ class << self
19
+ def call(...)
20
+ new.call(...)
21
+ end
22
+
23
+ def result(...)
24
+ new.result(...)
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+
5
+ require "miteru/sidekiq/jobs"
6
+
7
+ Sidekiq.configure_server do |config|
8
+ config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
9
+ end
10
+
11
+ Sidekiq.configure_client do |config|
12
+ config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
13
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sidekiq"
4
+
5
+ module Miteru
6
+ module Jobs
7
+ class CrawleJob
8
+ include Sidekiq::Job
9
+ include Concerns::DatabaseConnectable
10
+
11
+ #
12
+ # @param [String] url
13
+ # @param [String] source
14
+ #
15
+ def perform(url, source)
16
+ website = Miteru::Website.new(url, source:)
17
+ with_db_connection { Crawler.call(website) }
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "1.2.2"
4
+ VERSION = "2.0.0"
5
5
  end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Rack
4
+ require "rack"
5
+ require "rack/session"
6
+ require "rackup"
7
+
8
+ require "rack/handler/puma"
9
+
10
+ # Sidekiq
11
+ require "sidekiq/web"
12
+
13
+ module Miteru
14
+ module Web
15
+ class App
16
+ class << self
17
+ def instance
18
+ Rack::Builder.new do
19
+ use Rack::Session::Cookie, secret: SecureRandom.hex(32), same_site: true, max_age: 86_400
20
+
21
+ map "/" do
22
+ run Sidekiq::Web
23
+ end
24
+
25
+ run App.new
26
+ end.to_app
27
+ end
28
+
29
+ def run!(port: 9292, host: "localhost", threads: "0:3", verbose: false, worker_timeout: 60, open: true)
30
+ Rackup::Handler::Puma.run(
31
+ instance,
32
+ Port: port,
33
+ Host: host,
34
+ Threads: threads,
35
+ Verbose: verbose,
36
+ worker_timeout:
37
+ )
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end