miteru 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/gem.yml +36 -0
  3. data/.github/workflows/{test.yml → ruby.yml} +4 -13
  4. data/.gitignore +4 -1
  5. data/.rspec +1 -1
  6. data/README.md +7 -17
  7. data/docker-compose.yml +12 -0
  8. data/exe/miteru +3 -3
  9. data/lefthook.yml +9 -0
  10. data/lib/miteru/cli/application.rb +27 -0
  11. data/lib/miteru/cli/base.rb +16 -0
  12. data/lib/miteru/cli/database.rb +11 -0
  13. data/lib/miteru/commands/database.rb +23 -0
  14. data/lib/miteru/commands/main.rb +37 -0
  15. data/lib/miteru/commands/sidekiq.rb +35 -0
  16. data/lib/miteru/commands/web.rb +37 -0
  17. data/lib/miteru/concerns/database_connectable.rb +16 -0
  18. data/lib/miteru/concerns/error_unwrappable.rb +30 -0
  19. data/lib/miteru/config.rb +98 -0
  20. data/lib/miteru/crawler.rb +28 -44
  21. data/lib/miteru/database.rb +50 -38
  22. data/lib/miteru/downloader.rb +52 -41
  23. data/lib/miteru/errors.rb +37 -0
  24. data/lib/miteru/feeds/ayashige.rb +9 -20
  25. data/lib/miteru/feeds/base.rb +141 -0
  26. data/lib/miteru/feeds/phishing_database.rb +11 -10
  27. data/lib/miteru/feeds/urlscan.rb +47 -19
  28. data/lib/miteru/feeds/urlscan_pro.rb +20 -18
  29. data/lib/miteru/http.rb +51 -0
  30. data/lib/miteru/kit.rb +28 -20
  31. data/lib/miteru/mixin.rb +2 -29
  32. data/lib/miteru/notifiers/base.rb +10 -3
  33. data/lib/miteru/notifiers/slack.rb +85 -10
  34. data/lib/miteru/notifiers/urlscan.rb +29 -14
  35. data/lib/miteru/orchestrator.rb +51 -0
  36. data/lib/miteru/record.rb +8 -15
  37. data/lib/miteru/service.rb +28 -0
  38. data/lib/miteru/sidekiq/application.rb +13 -0
  39. data/lib/miteru/sidekiq/jobs.rb +21 -0
  40. data/lib/miteru/version.rb +1 -1
  41. data/lib/miteru/web/application.rb +42 -0
  42. data/lib/miteru/website.rb +48 -48
  43. data/lib/miteru.rb +130 -22
  44. data/miteru-sidekiq.service +13 -0
  45. data/miteru.db-shm +0 -0
  46. data/miteru.db-wal +0 -0
  47. data/miteru.gemspec +49 -38
  48. metadata +265 -97
  49. data/.overcommit.yml +0 -12
  50. data/.standard.yml +0 -4
  51. data/lib/miteru/attachement.rb +0 -74
  52. data/lib/miteru/cli.rb +0 -41
  53. data/lib/miteru/configuration.rb +0 -122
  54. data/lib/miteru/error.rb +0 -7
  55. data/lib/miteru/feeds/feed.rb +0 -53
  56. data/lib/miteru/feeds/phishstats.rb +0 -28
  57. data/lib/miteru/feeds.rb +0 -45
  58. data/lib/miteru/http_client.rb +0 -85
@@ -1,11 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_record"
4
-
5
- class InitialSchema < ActiveRecord::Migration[7.0]
3
+ class V2Schema < ActiveRecord::Migration[7.0]
6
4
  def change
7
5
  create_table :records, if_not_exists: true do |t|
8
- t.string :hash, null: false, index: { unique: true }
6
+ t.string :sha256, null: false, index: {unique: true}
9
7
  t.string :hostname, null: false
10
8
  t.json :headers, null: false
11
9
  t.text :filename, null: false
@@ -13,61 +11,75 @@ class InitialSchema < ActiveRecord::Migration[7.0]
13
11
  t.integer :filesize, null: false
14
12
  t.string :mime_type, null: false
15
13
  t.text :url, null: false
14
+ t.string :source, null: false
16
15
 
17
16
  t.timestamps
18
17
  end
19
18
  end
20
19
  end
21
20
 
22
- class V11Schema < ActiveRecord::Migration[7.0]
23
- def change
24
- add_column :records, :source, :string, if_not_exists: true
25
- end
26
- end
27
-
28
- def adapter
29
- return "postgresql" if Miteru.configuration.database.start_with?("postgresql://", "postgres://")
30
- return "mysql2" if Miteru.configuration.database.start_with?("mysql2://")
31
-
32
- "sqlite3"
21
+ #
22
+ # @return [Array<ActiveRecord::Migration>] schemas
23
+ #
24
+ def schemas
25
+ [V2Schema]
33
26
  end
34
27
 
35
28
  module Miteru
36
29
  class Database
37
30
  class << self
31
+ #
32
+ # DB migration
33
+ #
34
+ # @param [Symbol] direction
35
+ #
36
+ def migrate(direction)
37
+ schemas.each { |schema| schema.migrate direction }
38
+ end
39
+
40
+ #
41
+ # Establish DB connection
42
+ #
38
43
  def connect
39
- case adapter
40
- when "postgresql", "mysql2"
41
- ActiveRecord::Base.establish_connection(Miteru.configuration.database)
42
- else
43
- ActiveRecord::Base.establish_connection(
44
- adapter: adapter,
45
- database: Miteru.configuration.database
46
- )
47
- end
44
+ return if connected?
48
45
 
49
- # ActiveRecord::Base.logger = Logger.new STDOUT
50
- ActiveRecord::Migration.verbose = false
46
+ ActiveRecord::Base.establish_connection Miteru.config.database_url.to_s
47
+ ActiveRecord::Base.logger = Logger.new($stdout) if Miteru.development?
48
+ end
51
49
 
52
- InitialSchema.migrate(:up)
53
- V11Schema.migrate(:up)
54
- rescue StandardError => _e
55
- # Do nothing
50
+ #
51
+ # @return [Boolean]
52
+ #
53
+ def connected?
54
+ ActiveRecord::Base.connected?
56
55
  end
57
56
 
57
+ #
58
+ # Close DB connection(s)
59
+ #
58
60
  def close
59
- ActiveRecord::Base.clear_active_connections!
60
- ActiveRecord::Base.connection.close
61
+ return unless connected?
62
+
63
+ ActiveRecord::Base.connection_handler.clear_active_connections!
61
64
  end
62
65
 
63
- def destroy!
64
- return unless ActiveRecord::Base.connected?
66
+ def with_db_connection
67
+ Miteru::Database.connect unless connected?
68
+ yield
69
+ rescue ActiveRecord::StatementInvalid
70
+ Miteru.logger.error("DB migration is not yet complete. Please run 'miteru db migrate'.")
71
+ ensure
72
+ Miteru::Database.close
73
+ end
65
74
 
66
- InitialSchema.migrate(:down)
67
- V11Schema.migrate(:down)
75
+ private
76
+
77
+ def adapter
78
+ return "postgresql" if %w[postgresql postgres].include?(Miteru.config.database_url.scheme)
79
+ return "mysql2" if Miteru.config.database_url.scheme == "mysql2"
80
+
81
+ "sqlite3"
68
82
  end
69
83
  end
70
84
  end
71
85
  end
72
-
73
- Miteru::Database.connect
@@ -2,65 +2,76 @@
2
2
 
3
3
  require "digest"
4
4
  require "fileutils"
5
- require "uri"
5
+
6
+ require "down/http"
6
7
 
7
8
  module Miteru
8
- class Downloader
9
- attr_reader :base_dir, :memo
9
+ class Downloader < Service
10
+ prepend MemoWise
10
11
 
11
- def initialize(base_dir = "/tmp")
12
- @base_dir = base_dir
13
- @memo = {}
14
- raise ArgumentError, "#{base_dir} doesn't exist." unless Dir.exist?(base_dir)
15
- end
12
+ # @return [String]
13
+ attr_reader :base_dir
16
14
 
17
- def download_kits(kits)
18
- kits.each { |kit| download_kit kit }
19
- end
15
+ # @return [Miteru::Kit]
16
+ attr_reader :kit
20
17
 
21
- private
18
+ #
19
+ # <Description>
20
+ #
21
+ # @param [Miteru::Kit] kit
22
+ # @param [String] base_dir
23
+ #
24
+ def initialize(kit, base_dir: Miteru.config.download_to)
25
+ super()
26
+ @kit = kit
27
+ @base_dir = base_dir
28
+ end
22
29
 
23
- def download_kit(kit)
30
+ #
31
+ # @return [String]
32
+ #
33
+ def call
24
34
  destination = kit.filepath_to_download
25
35
 
26
- begin
27
- downloaded_as = HTTPClient.download(kit.url, destination)
28
- rescue Down::Error => e
29
- Miteru.logger.error "Failed to download: #{kit.url} (#{e})"
30
- return
31
- end
36
+ # downloader.download(kit.url, destination:, max_size:)
37
+ downloader.download(kit.url, destination:, max_size:)
32
38
 
33
- # check filesize
34
- size = File.size downloaded_as
35
- if size > Miteru.configuration.file_maxsize
36
- Miteru.logger.info "#{kit.url}'s filesize exceeds the limit: #{size}"
37
- FileUtils.rm downloaded_as
38
- return
39
+ unless Record.unique_sha256?(sha256(destination))
40
+ FileUtils.rm destination
41
+ raise UniquenessError, "Kit:#{sha256(destination)} is registered already."
39
42
  end
40
43
 
41
- hash = sha256(downloaded_as)
44
+ # Record a kit in DB
45
+ Record.create_by_kit_and_hash(kit, sha256: sha256(destination))
46
+ Miteru.logger.info "Download #{kit.url} as #{destination}"
47
+
48
+ destination
49
+ end
42
50
 
43
- ActiveRecord::Base.connection_pool.with_connection do
44
- # Remove a downloaded file if it is not unique
45
- unless Record.unique_hash?(hash)
46
- Miteru.logger.info "Don't download #{kit.url}. The same hash is already recorded. (SHA256: #{hash})."
47
- FileUtils.rm downloaded_as
48
- return
49
- end
51
+ private
50
52
 
51
- # Record a kit in DB
52
- Record.create_by_kit_and_hash(kit, hash)
53
- Miteru.logger.info "Download #{kit.url} as #{downloaded_as}"
53
+ def timeout
54
+ Miteru.config.download_timeout
55
+ end
56
+
57
+ def downloader
58
+ Down::Http.new(ssl_context:) { |client| client.timeout(timeout) }
59
+ end
60
+
61
+ def ssl_context
62
+ OpenSSL::SSL::SSLContext.new.tap do |ctx|
63
+ ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
54
64
  end
55
65
  end
56
66
 
57
- def sha256(path)
58
- return memo[path] if memo.key?(path)
67
+ def max_size
68
+ Miteru.config.file_max_size
69
+ end
59
70
 
71
+ def sha256(path)
60
72
  digest = Digest::SHA256.file(path)
61
- hash = digest.hexdigest
62
- memo[path] = hash
63
- hash
73
+ digest.hexdigest
64
74
  end
75
+ memo_wise :sha256
65
76
  end
66
77
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "http"
4
+
5
+ module Miteru
6
+ class Error < StandardError; end
7
+
8
+ class FileSizeError < Error; end
9
+
10
+ class DownloadError < Error; end
11
+
12
+ class UniquenessError < Error; end
13
+
14
+ class StatusError < ::HTTP::Error
15
+ # @return [Integer]
16
+ attr_reader :status_code
17
+
18
+ # @return [String, nil]
19
+ attr_reader :body
20
+
21
+ #
22
+ # @param [String] msg
23
+ # @param [Integer] status_code
24
+ # @param [String, nil] body
25
+ #
26
+ def initialize(msg, status_code, body)
27
+ super(msg)
28
+
29
+ @status_code = status_code
30
+ @body = body
31
+ end
32
+
33
+ def detail
34
+ {status_code:, body:}
35
+ end
36
+ end
37
+ end
@@ -1,34 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "json"
4
- require "uri"
5
-
6
3
  module Miteru
7
4
  class Feeds
8
- class Ayashige < Feed
5
+ class Ayashige < Base
9
6
  HOST = "ayashige.herokuapp.com"
10
- URL = "https://#{HOST}"
7
+ URL = "https://#{HOST}".freeze
11
8
 
12
- def urls
13
- url = url_for("/api/v1/domains/")
14
- res = JSON.parse(get(url))
9
+ def initialize(base_url = "https://ayashige.herokuapp.com")
10
+ super(base_url)
11
+ end
15
12
 
16
- domains = res.map { |item| item["fqdn"] }
17
- domains.map do |domain|
18
- [
19
- "https://#{domain}",
20
- "http://#{domain}"
21
- ]
22
- end.flatten
23
- rescue HTTPResponseError, HTTP::Error, JSON::ParserError => e
24
- Miteru.logger.error "Failed to load ayashige feed (#{e})"
25
- []
13
+ def urls
14
+ json.map { |item| item["fqdn"] }.map { |fqdn| "https://#{fqdn}" }
26
15
  end
27
16
 
28
17
  private
29
18
 
30
- def url_for(path)
31
- URI(URL + path)
19
+ def json
20
+ get_json "/api/v1/domains/"
32
21
  end
33
22
  end
34
23
  end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ class Feeds
5
+ class Base < Service
6
+ IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
7
+
8
+ # @return [String]
9
+ attr_reader :base_url
10
+
11
+ # @return [Hash]
12
+ attr_reader :headers
13
+
14
+ #
15
+ # @param [String] base_url
16
+ #
17
+ def initialize(base_url)
18
+ super()
19
+
20
+ @base_url = base_url
21
+ @headers = {}
22
+ end
23
+
24
+ def source
25
+ @source ||= self.class.to_s.split("::").last
26
+ end
27
+
28
+ #
29
+ # Return URLs
30
+ #
31
+ # @return [Array<String>] URLs
32
+ #
33
+ def urls
34
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
35
+ end
36
+
37
+ #
38
+ # Return decomposed URLs
39
+ #
40
+ # @return [Array<String>] Decomposed URLs
41
+ #
42
+ def decomposed_urls
43
+ urls.uniq.select { |url| url.start_with?("http://", "https://") }.map { |url| decompose(url) }.flatten.uniq
44
+ end
45
+
46
+ #
47
+ # @return [Array<Miteru::Website>]
48
+ #
49
+ def call
50
+ decomposed_urls.map { |url| Website.new(url, source:) }
51
+ end
52
+
53
+ class << self
54
+ def inherited(child)
55
+ super
56
+ Miteru.feeds << child
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def timeout
63
+ Miteru.config.api_timeout
64
+ end
65
+
66
+ def directory_traveling?
67
+ Miteru.config.directory_traveling
68
+ end
69
+
70
+ #
71
+ # Validate extension of a URL
72
+ #
73
+ # @param [String] url
74
+ #
75
+ # @return [Boolean]
76
+ #
77
+ def invalid_extension?(url)
78
+ IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
79
+ end
80
+
81
+ #
82
+ # Decompose a URL into URLs
83
+ #
84
+ # @param [String] url
85
+ #
86
+ # @return [Array<String>]
87
+ #
88
+ def decompose(url)
89
+ Try[URI::InvalidURIError] do
90
+ parsed = URI.parse(url)
91
+
92
+ base = "#{parsed.scheme}://#{parsed.hostname}"
93
+ return [base] unless directory_traveling?
94
+
95
+ segments = parsed.path.split("/")
96
+ return [base] if segments.empty?
97
+
98
+ urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
99
+ urls.reject { |url| invalid_extension? url }
100
+ end.recover { [] }.value!
101
+ end
102
+
103
+ #
104
+ # @return [::HTTP::Client]
105
+ #
106
+ def http
107
+ @http ||= HTTP::Factory.build(headers:, timeout:)
108
+ end
109
+
110
+ #
111
+ # @param [String] path
112
+ #
113
+ # @return [URI]
114
+ #
115
+ def url_for(path)
116
+ URI.join base_url, path
117
+ end
118
+
119
+ #
120
+ # @param [String] path
121
+ # @param [Hash, nil] params
122
+ #
123
+ # @return [::HTTP::Response]
124
+ #
125
+ def get(path, params: nil)
126
+ http.get(url_for(path), params:)
127
+ end
128
+
129
+ #
130
+ # @param [String] path
131
+ # @param [Hash, nil] params
132
+ #
133
+ # @return [Hash]
134
+ #
135
+ def get_json(path, params: nil)
136
+ res = get(path, params:)
137
+ JSON.parse res.body.to_s
138
+ end
139
+ end
140
+ end
141
+ end
@@ -1,19 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "json"
4
- require "uri"
5
-
6
3
  module Miteru
7
4
  class Feeds
8
- class PhishingDatabase < Feed
9
- URL = "https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE-NOW.txt"
5
+ class PhishingDatabase < Base
6
+ def initialize(base_url = "https://raw.githubusercontent.com")
7
+ super(base_url)
8
+ end
10
9
 
11
10
  def urls
12
- body = get(URL)
13
- body.to_s.lines.map(&:chomp)
14
- rescue HTTPResponseError, HTTP::Error, JSON::ParserError => e
15
- info "Failed to load phishing database feed (#{e})"
16
- []
11
+ text.lines.map(&:chomp)
12
+ end
13
+
14
+ private
15
+
16
+ def text
17
+ get("/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE-NOW.txt").body.to_s
17
18
  end
18
19
  end
19
20
  end
@@ -1,35 +1,63 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  class Feeds
7
- class UrlScan < Feed
8
- attr_reader :size
9
-
10
- def initialize(size = 100)
11
- @size = size
12
- raise ArgumentError, "size must be less than 10,000" if size > 10_000
13
- end
5
+ class UrlScan < Base
6
+ #
7
+ # @param [String] base_url
8
+ #
9
+ def initialize(base_url = "https://urlscan.io")
10
+ super(base_url)
14
11
 
15
- def api
16
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
12
+ @headers = {"api-key": api_key}
17
13
  end
18
14
 
19
15
  def urls
20
- urls_from_community_feed
21
- rescue ::UrlScan::ResponseError => e
22
- Miteru.logger.error "Failed to load urlscan.io feed (#{e})"
23
- []
16
+ search_with_pagination.flat_map do |json|
17
+ (json["results"] || []).map { |result| result.dig("task", "url") }
18
+ end.uniq
24
19
  end
25
20
 
26
21
  private
27
22
 
28
- def urls_from_community_feed
29
- res = api.search("task.method:automatic", size: size)
23
+ def size
24
+ 10_000
25
+ end
26
+
27
+ # @return [<Type>] <description>
28
+ #
29
+ def api_key
30
+ Miteru.config.urlscan_api_key
31
+ end
32
+
33
+ def q
34
+ "task.method:automatic AND date:#{Miteru.config.urlscan_date_condition}"
35
+ end
36
+
37
+ #
38
+ # @param [String, nil] search_after
39
+ #
40
+ # @return [Hash]
41
+ #
42
+ def search(search_after: nil)
43
+ get_json("/api/v1/search/", params: {q:, size:, search_after:}.compact)
44
+ end
45
+
46
+ def search_with_pagination
47
+ search_after = nil
48
+
49
+ Enumerator.new do |y|
50
+ loop do
51
+ res = search(search_after:)
52
+
53
+ y.yield res
54
+
55
+ has_more = res["has_more"]
56
+ break unless has_more
30
57
 
31
- results = res["results"] || []
32
- results.map { |result| result.dig("task", "url") }
58
+ search_after = res["results"].last["sort"].join(",")
59
+ end
60
+ end
33
61
  end
34
62
  end
35
63
  end
@@ -1,35 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  class Feeds
7
- class UrlScanPro < Feed
8
- def api
9
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
5
+ class UrlScanPro < Base
6
+ #
7
+ # @param [String] base_url
8
+ #
9
+ def initialize(base_url = "https://urlscan.io")
10
+ super(base_url)
11
+
12
+ @headers = {"api-key": api_key}
10
13
  end
11
14
 
12
15
  def urls
13
- urls_from_pro_feed
14
- rescue ::UrlScan::ResponseError => e
15
- Miteru.logger.error "Failed to load urlscan.io pro feed (#{e})"
16
- []
16
+ (json["results"] || []).map { |result| result["page_url"] }
17
17
  end
18
18
 
19
19
  private
20
20
 
21
- def api_key?
22
- Miteru.configuration.urlscan_api_key?
21
+ def api_key
22
+ Miteru.config.urlscan_api_key
23
23
  end
24
24
 
25
- def urls_from_pro_feed
26
- return [] unless api_key?
25
+ def q
26
+ "date:#{Miteru.config.urlscan_date_condition}"
27
+ end
28
+
29
+ def format
30
+ "json"
31
+ end
27
32
 
28
- res = api.pro.phishfeed
29
- results = res["results"] || []
30
- results.map { |result| result["page_url"] }
31
- rescue ArgumentError => _e
32
- []
33
+ def json
34
+ get_json("/api/v1/pro/phishfeed", params: {q:, format:})
33
35
  end
34
36
  end
35
37
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "http"
4
+
5
+ module Miteru
6
+ module HTTP
7
+ #
8
+ # Better error handling feature
9
+ #
10
+ class BetterError < ::HTTP::Feature
11
+ def wrap_response(response)
12
+ return response if response.status.success?
13
+
14
+ raise StatusError.new(
15
+ "Unsuccessful response code returned: #{response.code}",
16
+ response.code,
17
+ response.body.to_s
18
+ )
19
+ end
20
+
21
+ ::HTTP::Options.register_feature(:better_error, self)
22
+ end
23
+
24
+ #
25
+ # HTTP client factory
26
+ #
27
+ class Factory
28
+ class << self
29
+ USER_AGENT = "miteru/#{Miteru::VERSION}".freeze
30
+
31
+ #
32
+ # @param [Integer, nil] timeout
33
+ # @param [Hash] headers
34
+ # @param [Boolean] raise_exception
35
+ #
36
+ # @return [::HTTP::Client]
37
+ #
38
+ # @param [Object] raise_exception
39
+ def build(headers: {}, timeout: nil, raise_exception: true)
40
+ client = raise_exception ? ::HTTP.use(:better_error) : ::HTTP
41
+
42
+ headers["User-Agent"] ||= USER_AGENT
43
+
44
+ client = client.headers(headers)
45
+ client = client.timeout(timeout) unless timeout.nil?
46
+ client
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end