miteru 1.2.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/gem.yml +36 -0
  3. data/.github/workflows/{test.yml → ruby.yml} +4 -13
  4. data/.gitignore +4 -1
  5. data/.rspec +1 -1
  6. data/README.md +7 -17
  7. data/docker-compose.yml +12 -0
  8. data/exe/miteru +3 -3
  9. data/lefthook.yml +9 -0
  10. data/lib/miteru/cli/application.rb +27 -0
  11. data/lib/miteru/cli/base.rb +16 -0
  12. data/lib/miteru/cli/database.rb +11 -0
  13. data/lib/miteru/commands/database.rb +23 -0
  14. data/lib/miteru/commands/main.rb +37 -0
  15. data/lib/miteru/commands/sidekiq.rb +35 -0
  16. data/lib/miteru/commands/web.rb +37 -0
  17. data/lib/miteru/concerns/database_connectable.rb +16 -0
  18. data/lib/miteru/concerns/error_unwrappable.rb +30 -0
  19. data/lib/miteru/config.rb +98 -0
  20. data/lib/miteru/crawler.rb +28 -44
  21. data/lib/miteru/database.rb +50 -38
  22. data/lib/miteru/downloader.rb +52 -41
  23. data/lib/miteru/errors.rb +37 -0
  24. data/lib/miteru/feeds/ayashige.rb +9 -20
  25. data/lib/miteru/feeds/base.rb +141 -0
  26. data/lib/miteru/feeds/phishing_database.rb +11 -10
  27. data/lib/miteru/feeds/urlscan.rb +47 -19
  28. data/lib/miteru/feeds/urlscan_pro.rb +20 -18
  29. data/lib/miteru/http.rb +51 -0
  30. data/lib/miteru/kit.rb +28 -20
  31. data/lib/miteru/mixin.rb +2 -29
  32. data/lib/miteru/notifiers/base.rb +10 -3
  33. data/lib/miteru/notifiers/slack.rb +85 -10
  34. data/lib/miteru/notifiers/urlscan.rb +29 -14
  35. data/lib/miteru/orchestrator.rb +51 -0
  36. data/lib/miteru/record.rb +8 -15
  37. data/lib/miteru/service.rb +28 -0
  38. data/lib/miteru/sidekiq/application.rb +13 -0
  39. data/lib/miteru/sidekiq/jobs.rb +21 -0
  40. data/lib/miteru/version.rb +1 -1
  41. data/lib/miteru/web/application.rb +42 -0
  42. data/lib/miteru/website.rb +48 -48
  43. data/lib/miteru.rb +130 -22
  44. data/miteru-sidekiq.service +13 -0
  45. data/miteru.db-shm +0 -0
  46. data/miteru.db-wal +0 -0
  47. data/miteru.gemspec +49 -38
  48. metadata +265 -97
  49. data/.overcommit.yml +0 -12
  50. data/.standard.yml +0 -4
  51. data/lib/miteru/attachement.rb +0 -74
  52. data/lib/miteru/cli.rb +0 -41
  53. data/lib/miteru/configuration.rb +0 -122
  54. data/lib/miteru/error.rb +0 -7
  55. data/lib/miteru/feeds/feed.rb +0 -53
  56. data/lib/miteru/feeds/phishstats.rb +0 -28
  57. data/lib/miteru/feeds.rb +0 -45
  58. data/lib/miteru/http_client.rb +0 -85
@@ -1,11 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "active_record"
4
-
5
- class InitialSchema < ActiveRecord::Migration[7.0]
3
+ class V2Schema < ActiveRecord::Migration[7.0]
6
4
  def change
7
5
  create_table :records, if_not_exists: true do |t|
8
- t.string :hash, null: false, index: { unique: true }
6
+ t.string :sha256, null: false, index: {unique: true}
9
7
  t.string :hostname, null: false
10
8
  t.json :headers, null: false
11
9
  t.text :filename, null: false
@@ -13,61 +11,75 @@ class InitialSchema < ActiveRecord::Migration[7.0]
13
11
  t.integer :filesize, null: false
14
12
  t.string :mime_type, null: false
15
13
  t.text :url, null: false
14
+ t.string :source, null: false
16
15
 
17
16
  t.timestamps
18
17
  end
19
18
  end
20
19
  end
21
20
 
22
- class V11Schema < ActiveRecord::Migration[7.0]
23
- def change
24
- add_column :records, :source, :string, if_not_exists: true
25
- end
26
- end
27
-
28
- def adapter
29
- return "postgresql" if Miteru.configuration.database.start_with?("postgresql://", "postgres://")
30
- return "mysql2" if Miteru.configuration.database.start_with?("mysql2://")
31
-
32
- "sqlite3"
21
+ #
22
+ # @return [Array<ActiveRecord::Migration>] schemas
23
+ #
24
+ def schemas
25
+ [V2Schema]
33
26
  end
34
27
 
35
28
  module Miteru
36
29
  class Database
37
30
  class << self
31
+ #
32
+ # DB migration
33
+ #
34
+ # @param [Symbol] direction
35
+ #
36
+ def migrate(direction)
37
+ schemas.each { |schema| schema.migrate direction }
38
+ end
39
+
40
+ #
41
+ # Establish DB connection
42
+ #
38
43
  def connect
39
- case adapter
40
- when "postgresql", "mysql2"
41
- ActiveRecord::Base.establish_connection(Miteru.configuration.database)
42
- else
43
- ActiveRecord::Base.establish_connection(
44
- adapter: adapter,
45
- database: Miteru.configuration.database
46
- )
47
- end
44
+ return if connected?
48
45
 
49
- # ActiveRecord::Base.logger = Logger.new STDOUT
50
- ActiveRecord::Migration.verbose = false
46
+ ActiveRecord::Base.establish_connection Miteru.config.database_url.to_s
47
+ ActiveRecord::Base.logger = Logger.new($stdout) if Miteru.development?
48
+ end
51
49
 
52
- InitialSchema.migrate(:up)
53
- V11Schema.migrate(:up)
54
- rescue StandardError => _e
55
- # Do nothing
50
+ #
51
+ # @return [Boolean]
52
+ #
53
+ def connected?
54
+ ActiveRecord::Base.connected?
56
55
  end
57
56
 
57
+ #
58
+ # Close DB connection(s)
59
+ #
58
60
  def close
59
- ActiveRecord::Base.clear_active_connections!
60
- ActiveRecord::Base.connection.close
61
+ return unless connected?
62
+
63
+ ActiveRecord::Base.connection_handler.clear_active_connections!
61
64
  end
62
65
 
63
- def destroy!
64
- return unless ActiveRecord::Base.connected?
66
+ def with_db_connection
67
+ Miteru::Database.connect unless connected?
68
+ yield
69
+ rescue ActiveRecord::StatementInvalid
70
+ Miteru.logger.error("DB migration is not yet complete. Please run 'miteru db migrate'.")
71
+ ensure
72
+ Miteru::Database.close
73
+ end
65
74
 
66
- InitialSchema.migrate(:down)
67
- V11Schema.migrate(:down)
75
+ private
76
+
77
+ def adapter
78
+ return "postgresql" if %w[postgresql postgres].include?(Miteru.config.database_url.scheme)
79
+ return "mysql2" if Miteru.config.database_url.scheme == "mysql2"
80
+
81
+ "sqlite3"
68
82
  end
69
83
  end
70
84
  end
71
85
  end
72
-
73
- Miteru::Database.connect
@@ -2,65 +2,76 @@
2
2
 
3
3
  require "digest"
4
4
  require "fileutils"
5
- require "uri"
5
+
6
+ require "down/http"
6
7
 
7
8
  module Miteru
8
- class Downloader
9
- attr_reader :base_dir, :memo
9
+ class Downloader < Service
10
+ prepend MemoWise
10
11
 
11
- def initialize(base_dir = "/tmp")
12
- @base_dir = base_dir
13
- @memo = {}
14
- raise ArgumentError, "#{base_dir} doesn't exist." unless Dir.exist?(base_dir)
15
- end
12
+ # @return [String]
13
+ attr_reader :base_dir
16
14
 
17
- def download_kits(kits)
18
- kits.each { |kit| download_kit kit }
19
- end
15
+ # @return [Miteru::Kit]
16
+ attr_reader :kit
20
17
 
21
- private
18
+ #
19
+ # <Description>
20
+ #
21
+ # @param [Miteru::Kit] kit
22
+ # @param [String] base_dir
23
+ #
24
+ def initialize(kit, base_dir: Miteru.config.download_to)
25
+ super()
26
+ @kit = kit
27
+ @base_dir = base_dir
28
+ end
22
29
 
23
- def download_kit(kit)
30
+ #
31
+ # @return [String]
32
+ #
33
+ def call
24
34
  destination = kit.filepath_to_download
25
35
 
26
- begin
27
- downloaded_as = HTTPClient.download(kit.url, destination)
28
- rescue Down::Error => e
29
- Miteru.logger.error "Failed to download: #{kit.url} (#{e})"
30
- return
31
- end
36
+ # downloader.download(kit.url, destination:, max_size:)
37
+ downloader.download(kit.url, destination:, max_size:)
32
38
 
33
- # check filesize
34
- size = File.size downloaded_as
35
- if size > Miteru.configuration.file_maxsize
36
- Miteru.logger.info "#{kit.url}'s filesize exceeds the limit: #{size}"
37
- FileUtils.rm downloaded_as
38
- return
39
+ unless Record.unique_sha256?(sha256(destination))
40
+ FileUtils.rm destination
41
+ raise UniquenessError, "Kit:#{sha256(destination)} is registered already."
39
42
  end
40
43
 
41
- hash = sha256(downloaded_as)
44
+ # Record a kit in DB
45
+ Record.create_by_kit_and_hash(kit, sha256: sha256(destination))
46
+ Miteru.logger.info "Download #{kit.url} as #{destination}"
47
+
48
+ destination
49
+ end
42
50
 
43
- ActiveRecord::Base.connection_pool.with_connection do
44
- # Remove a downloaded file if it is not unique
45
- unless Record.unique_hash?(hash)
46
- Miteru.logger.info "Don't download #{kit.url}. The same hash is already recorded. (SHA256: #{hash})."
47
- FileUtils.rm downloaded_as
48
- return
49
- end
51
+ private
50
52
 
51
- # Record a kit in DB
52
- Record.create_by_kit_and_hash(kit, hash)
53
- Miteru.logger.info "Download #{kit.url} as #{downloaded_as}"
53
+ def timeout
54
+ Miteru.config.download_timeout
55
+ end
56
+
57
+ def downloader
58
+ Down::Http.new(ssl_context:) { |client| client.timeout(timeout) }
59
+ end
60
+
61
+ def ssl_context
62
+ OpenSSL::SSL::SSLContext.new.tap do |ctx|
63
+ ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE
54
64
  end
55
65
  end
56
66
 
57
- def sha256(path)
58
- return memo[path] if memo.key?(path)
67
+ def max_size
68
+ Miteru.config.file_max_size
69
+ end
59
70
 
71
+ def sha256(path)
60
72
  digest = Digest::SHA256.file(path)
61
- hash = digest.hexdigest
62
- memo[path] = hash
63
- hash
73
+ digest.hexdigest
64
74
  end
75
+ memo_wise :sha256
65
76
  end
66
77
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "http"
4
+
5
+ module Miteru
6
+ class Error < StandardError; end
7
+
8
+ class FileSizeError < Error; end
9
+
10
+ class DownloadError < Error; end
11
+
12
+ class UniquenessError < Error; end
13
+
14
+ class StatusError < ::HTTP::Error
15
+ # @return [Integer]
16
+ attr_reader :status_code
17
+
18
+ # @return [String, nil]
19
+ attr_reader :body
20
+
21
+ #
22
+ # @param [String] msg
23
+ # @param [Integer] status_code
24
+ # @param [String, nil] body
25
+ #
26
+ def initialize(msg, status_code, body)
27
+ super(msg)
28
+
29
+ @status_code = status_code
30
+ @body = body
31
+ end
32
+
33
+ def detail
34
+ {status_code:, body:}
35
+ end
36
+ end
37
+ end
@@ -1,34 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "json"
4
- require "uri"
5
-
6
3
  module Miteru
7
4
  class Feeds
8
- class Ayashige < Feed
5
+ class Ayashige < Base
9
6
  HOST = "ayashige.herokuapp.com"
10
- URL = "https://#{HOST}"
7
+ URL = "https://#{HOST}".freeze
11
8
 
12
- def urls
13
- url = url_for("/api/v1/domains/")
14
- res = JSON.parse(get(url))
9
+ def initialize(base_url = "https://ayashige.herokuapp.com")
10
+ super(base_url)
11
+ end
15
12
 
16
- domains = res.map { |item| item["fqdn"] }
17
- domains.map do |domain|
18
- [
19
- "https://#{domain}",
20
- "http://#{domain}"
21
- ]
22
- end.flatten
23
- rescue HTTPResponseError, HTTP::Error, JSON::ParserError => e
24
- Miteru.logger.error "Failed to load ayashige feed (#{e})"
25
- []
13
+ def urls
14
+ json.map { |item| item["fqdn"] }.map { |fqdn| "https://#{fqdn}" }
26
15
  end
27
16
 
28
17
  private
29
18
 
30
- def url_for(path)
31
- URI(URL + path)
19
+ def json
20
+ get_json "/api/v1/domains/"
32
21
  end
33
22
  end
34
23
  end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ class Feeds
5
+ class Base < Service
6
+ IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
7
+
8
+ # @return [String]
9
+ attr_reader :base_url
10
+
11
+ # @return [Hash]
12
+ attr_reader :headers
13
+
14
+ #
15
+ # @param [String] base_url
16
+ #
17
+ def initialize(base_url)
18
+ super()
19
+
20
+ @base_url = base_url
21
+ @headers = {}
22
+ end
23
+
24
+ def source
25
+ @source ||= self.class.to_s.split("::").last
26
+ end
27
+
28
+ #
29
+ # Return URLs
30
+ #
31
+ # @return [Array<String>] URLs
32
+ #
33
+ def urls
34
+ raise NotImplementedError, "You must implement #{self.class}##{__method__}"
35
+ end
36
+
37
+ #
38
+ # Return decomposed URLs
39
+ #
40
+ # @return [Array<String>] Decomposed URLs
41
+ #
42
+ def decomposed_urls
43
+ urls.uniq.select { |url| url.start_with?("http://", "https://") }.map { |url| decompose(url) }.flatten.uniq
44
+ end
45
+
46
+ #
47
+ # @return [Array<Miteru::Website>]
48
+ #
49
+ def call
50
+ decomposed_urls.map { |url| Website.new(url, source:) }
51
+ end
52
+
53
+ class << self
54
+ def inherited(child)
55
+ super
56
+ Miteru.feeds << child
57
+ end
58
+ end
59
+
60
+ private
61
+
62
+ def timeout
63
+ Miteru.config.api_timeout
64
+ end
65
+
66
+ def directory_traveling?
67
+ Miteru.config.directory_traveling
68
+ end
69
+
70
+ #
71
+ # Validate extension of a URL
72
+ #
73
+ # @param [String] url
74
+ #
75
+ # @return [Boolean]
76
+ #
77
+ def invalid_extension?(url)
78
+ IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
79
+ end
80
+
81
+ #
82
+ # Decompose a URL into URLs
83
+ #
84
+ # @param [String] url
85
+ #
86
+ # @return [Array<String>]
87
+ #
88
+ def decompose(url)
89
+ Try[URI::InvalidURIError] do
90
+ parsed = URI.parse(url)
91
+
92
+ base = "#{parsed.scheme}://#{parsed.hostname}"
93
+ return [base] unless directory_traveling?
94
+
95
+ segments = parsed.path.split("/")
96
+ return [base] if segments.empty?
97
+
98
+ urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
99
+ urls.reject { |url| invalid_extension? url }
100
+ end.recover { [] }.value!
101
+ end
102
+
103
+ #
104
+ # @return [::HTTP::Client]
105
+ #
106
+ def http
107
+ @http ||= HTTP::Factory.build(headers:, timeout:)
108
+ end
109
+
110
+ #
111
+ # @param [String] path
112
+ #
113
+ # @return [URI]
114
+ #
115
+ def url_for(path)
116
+ URI.join base_url, path
117
+ end
118
+
119
+ #
120
+ # @param [String] path
121
+ # @param [Hash, nil] params
122
+ #
123
+ # @return [::HTTP::Response]
124
+ #
125
+ def get(path, params: nil)
126
+ http.get(url_for(path), params:)
127
+ end
128
+
129
+ #
130
+ # @param [String] path
131
+ # @param [Hash, nil] params
132
+ #
133
+ # @return [Hash]
134
+ #
135
+ def get_json(path, params: nil)
136
+ res = get(path, params:)
137
+ JSON.parse res.body.to_s
138
+ end
139
+ end
140
+ end
141
+ end
@@ -1,19 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "json"
4
- require "uri"
5
-
6
3
  module Miteru
7
4
  class Feeds
8
- class PhishingDatabase < Feed
9
- URL = "https://raw.githubusercontent.com/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE-NOW.txt"
5
+ class PhishingDatabase < Base
6
+ def initialize(base_url = "https://raw.githubusercontent.com")
7
+ super(base_url)
8
+ end
10
9
 
11
10
  def urls
12
- body = get(URL)
13
- body.to_s.lines.map(&:chomp)
14
- rescue HTTPResponseError, HTTP::Error, JSON::ParserError => e
15
- info "Failed to load phishing database feed (#{e})"
16
- []
11
+ text.lines.map(&:chomp)
12
+ end
13
+
14
+ private
15
+
16
+ def text
17
+ get("/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE-NOW.txt").body.to_s
17
18
  end
18
19
  end
19
20
  end
@@ -1,35 +1,63 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  class Feeds
7
- class UrlScan < Feed
8
- attr_reader :size
9
-
10
- def initialize(size = 100)
11
- @size = size
12
- raise ArgumentError, "size must be less than 10,000" if size > 10_000
13
- end
5
+ class UrlScan < Base
6
+ #
7
+ # @param [String] base_url
8
+ #
9
+ def initialize(base_url = "https://urlscan.io")
10
+ super(base_url)
14
11
 
15
- def api
16
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
12
+ @headers = {"api-key": api_key}
17
13
  end
18
14
 
19
15
  def urls
20
- urls_from_community_feed
21
- rescue ::UrlScan::ResponseError => e
22
- Miteru.logger.error "Failed to load urlscan.io feed (#{e})"
23
- []
16
+ search_with_pagination.flat_map do |json|
17
+ (json["results"] || []).map { |result| result.dig("task", "url") }
18
+ end.uniq
24
19
  end
25
20
 
26
21
  private
27
22
 
28
- def urls_from_community_feed
29
- res = api.search("task.method:automatic", size: size)
23
+ def size
24
+ 10_000
25
+ end
26
+
27
+ # @return [<Type>] <description>
28
+ #
29
+ def api_key
30
+ Miteru.config.urlscan_api_key
31
+ end
32
+
33
+ def q
34
+ "task.method:automatic AND date:#{Miteru.config.urlscan_date_condition}"
35
+ end
36
+
37
+ #
38
+ # @param [String, nil] search_after
39
+ #
40
+ # @return [Hash]
41
+ #
42
+ def search(search_after: nil)
43
+ get_json("/api/v1/search/", params: {q:, size:, search_after:}.compact)
44
+ end
45
+
46
+ def search_with_pagination
47
+ search_after = nil
48
+
49
+ Enumerator.new do |y|
50
+ loop do
51
+ res = search(search_after:)
52
+
53
+ y.yield res
54
+
55
+ has_more = res["has_more"]
56
+ break unless has_more
30
57
 
31
- results = res["results"] || []
32
- results.map { |result| result.dig("task", "url") }
58
+ search_after = res["results"].last["sort"].join(",")
59
+ end
60
+ end
33
61
  end
34
62
  end
35
63
  end
@@ -1,35 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "urlscan"
4
-
5
3
  module Miteru
6
4
  class Feeds
7
- class UrlScanPro < Feed
8
- def api
9
- @api ||= ::UrlScan::API.new(Miteru.configuration.urlscan_api_key)
5
+ class UrlScanPro < Base
6
+ #
7
+ # @param [String] base_url
8
+ #
9
+ def initialize(base_url = "https://urlscan.io")
10
+ super(base_url)
11
+
12
+ @headers = {"api-key": api_key}
10
13
  end
11
14
 
12
15
  def urls
13
- urls_from_pro_feed
14
- rescue ::UrlScan::ResponseError => e
15
- Miteru.logger.error "Failed to load urlscan.io pro feed (#{e})"
16
- []
16
+ (json["results"] || []).map { |result| result["page_url"] }
17
17
  end
18
18
 
19
19
  private
20
20
 
21
- def api_key?
22
- Miteru.configuration.urlscan_api_key?
21
+ def api_key
22
+ Miteru.config.urlscan_api_key
23
23
  end
24
24
 
25
- def urls_from_pro_feed
26
- return [] unless api_key?
25
+ def q
26
+ "date:#{Miteru.config.urlscan_date_condition}"
27
+ end
28
+
29
+ def format
30
+ "json"
31
+ end
27
32
 
28
- res = api.pro.phishfeed
29
- results = res["results"] || []
30
- results.map { |result| result["page_url"] }
31
- rescue ArgumentError => _e
32
- []
33
+ def json
34
+ get_json("/api/v1/pro/phishfeed", params: {q:, format:})
33
35
  end
34
36
  end
35
37
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "http"
4
+
5
+ module Miteru
6
+ module HTTP
7
+ #
8
+ # Better error handling feature
9
+ #
10
+ class BetterError < ::HTTP::Feature
11
+ def wrap_response(response)
12
+ return response if response.status.success?
13
+
14
+ raise StatusError.new(
15
+ "Unsuccessful response code returned: #{response.code}",
16
+ response.code,
17
+ response.body.to_s
18
+ )
19
+ end
20
+
21
+ ::HTTP::Options.register_feature(:better_error, self)
22
+ end
23
+
24
+ #
25
+ # HTTP client factory
26
+ #
27
+ class Factory
28
+ class << self
29
+ USER_AGENT = "miteru/#{Miteru::VERSION}".freeze
30
+
31
+ #
32
+ # @param [Integer, nil] timeout
33
+ # @param [Hash] headers
34
+ # @param [Boolean] raise_exception
35
+ #
36
+ # @return [::HTTP::Client]
37
+ #
38
+ # @param [Object] raise_exception
39
+ def build(headers: {}, timeout: nil, raise_exception: true)
40
+ client = raise_exception ? ::HTTP.use(:better_error) : ::HTTP
41
+
42
+ headers["User-Agent"] ||= USER_AGENT
43
+
44
+ client = client.headers(headers)
45
+ client = client.timeout(timeout) unless timeout.nil?
46
+ client
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end