miteru 1.2.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/gem.yml +36 -0
  3. data/.github/workflows/{test.yml → ruby.yml} +4 -13
  4. data/.gitignore +7 -2
  5. data/.rspec +1 -1
  6. data/README.md +7 -17
  7. data/docker-compose.yml +12 -0
  8. data/exe/miteru +3 -3
  9. data/lefthook.yml +9 -0
  10. data/lib/miteru/cli/application.rb +27 -0
  11. data/lib/miteru/cli/base.rb +16 -0
  12. data/lib/miteru/cli/database.rb +11 -0
  13. data/lib/miteru/commands/database.rb +23 -0
  14. data/lib/miteru/commands/main.rb +37 -0
  15. data/lib/miteru/commands/sidekiq.rb +35 -0
  16. data/lib/miteru/commands/web.rb +37 -0
  17. data/lib/miteru/concerns/database_connectable.rb +16 -0
  18. data/lib/miteru/concerns/error_unwrappable.rb +30 -0
  19. data/lib/miteru/config.rb +98 -0
  20. data/lib/miteru/crawler.rb +28 -44
  21. data/lib/miteru/database.rb +50 -38
  22. data/lib/miteru/downloader.rb +52 -41
  23. data/lib/miteru/errors.rb +37 -0
  24. data/lib/miteru/feeds/ayashige.rb +9 -20
  25. data/lib/miteru/feeds/base.rb +141 -0
  26. data/lib/miteru/feeds/phishing_database.rb +11 -10
  27. data/lib/miteru/feeds/urlscan.rb +47 -19
  28. data/lib/miteru/feeds/urlscan_pro.rb +20 -18
  29. data/lib/miteru/http.rb +51 -0
  30. data/lib/miteru/kit.rb +28 -20
  31. data/lib/miteru/mixin.rb +2 -29
  32. data/lib/miteru/notifiers/base.rb +10 -3
  33. data/lib/miteru/notifiers/slack.rb +85 -10
  34. data/lib/miteru/notifiers/urlscan.rb +29 -14
  35. data/lib/miteru/orchestrator.rb +58 -0
  36. data/lib/miteru/record.rb +8 -15
  37. data/lib/miteru/service.rb +28 -0
  38. data/lib/miteru/sidekiq/application.rb +13 -0
  39. data/lib/miteru/sidekiq/jobs.rb +21 -0
  40. data/lib/miteru/version.rb +1 -1
  41. data/lib/miteru/web/application.rb +42 -0
  42. data/lib/miteru/website.rb +48 -48
  43. data/lib/miteru.rb +130 -22
  44. data/miteru.gemspec +49 -38
  45. metadata +262 -97
  46. data/.overcommit.yml +0 -12
  47. data/.standard.yml +0 -4
  48. data/lib/miteru/attachement.rb +0 -74
  49. data/lib/miteru/cli.rb +0 -41
  50. data/lib/miteru/configuration.rb +0 -122
  51. data/lib/miteru/error.rb +0 -7
  52. data/lib/miteru/feeds/feed.rb +0 -53
  53. data/lib/miteru/feeds/phishstats.rb +0 -28
  54. data/lib/miteru/feeds.rb +0 -45
  55. data/lib/miteru/http_client.rb +0 -85
@@ -3,16 +3,20 @@
3
3
  require "oga"
4
4
 
5
5
  module Miteru
6
- class Website
7
- VALID_EXTENSIONS = Miteru.configuration.valid_extensions
8
-
6
+ class Website < Service
9
7
  # @return [String]
10
8
  attr_reader :url
11
9
 
12
10
  # @return [String]
13
11
  attr_reader :source
14
12
 
15
- def initialize(url, source)
13
+ #
14
+ # @param [String] url
15
+ # @param [String] source
16
+ #
17
+ def initialize(url, source:)
18
+ super()
19
+
16
20
  @url = url
17
21
  @source = source
18
22
  end
@@ -22,86 +26,82 @@ module Miteru
22
26
  end
23
27
 
24
28
  def kits
25
- @kits ||= links.filter_map do |link|
26
- kit = Kit.new(link, source)
27
- kit.valid? ? kit : nil
28
- end
29
- end
30
-
31
- def ok?
32
- response.code == 200
29
+ @kits ||= links.map { |link| Kit.new(link, source:) }.select(&:valid?)
33
30
  end
34
31
 
35
32
  def index?
36
33
  title.to_s.start_with? "Index of"
37
34
  end
38
35
 
39
- def kits?
40
- !kits.empty?
41
- end
42
-
43
36
  def has_kits?
44
- kits?
45
- rescue Addressable::URI::InvalidURIError, ArgumentError, Encoding::CompatibilityError, HTTP::Error, LL::ParserError, OpenSSL::SSL::SSLError => _e
46
- false
47
- end
48
-
49
- def message
50
- return "it doesn't contain a phishing kit." unless kits?
51
-
52
- filename_with_sizes = kits.map(&:filename_with_size).join(", ")
53
- noun = kits.length == 1 ? "a phishing kit" : "phishing kits"
54
- "it might contain #{noun}: #{filename_with_sizes}."
37
+ @has_kits ||= lambda do
38
+ Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
39
+ OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
40
+ !kits.empty?
41
+ end.recover do
42
+ false
43
+ end.value!
44
+ end.call
55
45
  end
56
46
 
57
47
  def links
58
48
  (href_links + possible_file_links).compact.uniq
59
49
  end
60
50
 
51
+ #
52
+ # @return [String]
53
+ #
54
+ def truncated_url
55
+ url.truncate(64)
56
+ end
57
+
61
58
  private
62
59
 
63
- def response
64
- @response ||= get
60
+ def timeout
61
+ Miteru.config.http_timeout
62
+ end
63
+
64
+ def http
65
+ @http ||= HTTP::Factory.build(timeout:)
65
66
  end
66
67
 
67
68
  def get
68
- HTTPClient.get url
69
+ http.get url
69
70
  end
70
71
 
71
- def doc
72
- @doc ||= parse_html(response.body.to_s)
72
+ def response
73
+ @response ||= get
73
74
  end
74
75
 
75
- def parse_html(html)
76
- Oga.parse_html(html)
77
- rescue ArgumentError, Encoding::CompatibilityError, LL::ParserError => _e
78
- nil
76
+ def doc
77
+ Oga.parse_html response.body.to_s
79
78
  end
80
79
 
81
80
  def href_links
82
- if doc && ok? && index?
81
+ Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError,
82
+ OpenSSL::SSL::SSLError, StatusError, ArgumentError] do
83
83
  doc.css("a").filter_map { |a| a.get("href") }.map do |href|
84
84
  href = href.start_with?("/") ? href : "/#{href}"
85
85
  url + href
86
86
  end
87
- else
88
- []
89
- end
90
- rescue Addressable::URI::InvalidURIError, ArgumentError, Encoding::CompatibilityError, HTTP::Error, LL::ParserError, OpenSSL::SSL::SSLError => _e
91
- []
87
+ end.recover { [] }.value!
88
+ end
89
+
90
+ def file_extensions
91
+ Miteru.config.file_extensions
92
92
  end
93
93
 
94
94
  def possible_file_links
95
- uri = URI.parse(url)
95
+ parsed = URI.parse(url)
96
96
 
97
- segments = uri.path.split("/")
98
- return [] if segments.length.zero?
97
+ segments = parsed.path.split("/")
98
+ return [] if segments.empty?
99
99
 
100
100
  last = segments.last
101
- VALID_EXTENSIONS.map do |ext|
101
+ file_extensions.map do |ext|
102
102
  new_segments = segments[0..-2] + ["#{last}#{ext}"]
103
- uri.path = new_segments.join("/")
104
- uri.to_s
103
+ parsed.path = new_segments.join("/")
104
+ parsed.to_s
105
105
  end
106
106
  end
107
107
  end
data/lib/miteru.rb CHANGED
@@ -1,42 +1,150 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "memist"
3
+ # Core standard libraries
4
+ require "cgi"
5
+ require "json"
6
+ require "uri"
7
+ require "uuidtools"
8
+
9
+ # Core 3rd party libraries
10
+ require "colorize"
11
+ require "memo_wise"
12
+ require "parallel"
4
13
  require "semantic_logger"
14
+ require "sentry-ruby"
5
15
 
6
- require "miteru/version"
16
+ require "dry/files"
17
+ require "dry/monads"
7
18
 
8
- require "miteru/configuration"
9
- require "miteru/database"
19
+ # Load .env
20
+ require "dotenv/load"
10
21
 
11
- require "miteru/record"
22
+ # Active Support & Active Record
23
+ require "active_support"
24
+ require "active_record"
12
25
 
13
- require "miteru/mixin"
26
+ # Version
27
+ require "miteru/version"
28
+ # Errors
29
+ require "miteru/errors"
14
30
 
15
- require "miteru/notifiers/base"
16
- require "miteru/notifiers/slack"
17
- require "miteru/notifiers/urlscan"
31
+ # Concerns
32
+ require "miteru/concerns/database_connectable"
33
+ require "miteru/concerns/error_unwrappable"
18
34
 
19
- require "miteru/error"
20
- require "miteru/http_client"
21
- require "miteru/kit"
22
- require "miteru/website"
23
- require "miteru/downloader"
24
- require "miteru/feeds"
25
- require "miteru/attachement"
26
- require "miteru/crawler"
27
- require "miteru/cli"
35
+ # Core classes
36
+ require "miteru/config"
37
+ require "miteru/http"
28
38
 
29
- # Load .env
30
- require "dotenv/load"
39
+ # Database + ActiveRecord
40
+ require "miteru/database"
41
+ require "miteru/record"
31
42
 
32
43
  module Miteru
33
44
  class << self
34
- include Memist::Memoizable
45
+ prepend MemoWise
46
+
47
+ #
48
+ # @return [SematicLogger]
49
+ #
35
50
  def logger
51
+ SemanticLogger.sync! unless sidekiq?
52
+
36
53
  SemanticLogger.default_level = :info
37
54
  SemanticLogger.add_appender(io: $stderr, formatter: :color)
38
55
  SemanticLogger["Miteru"]
39
56
  end
40
- memoize :logger
57
+ memo_wise :logger
58
+
59
+ #
60
+ # @return [Array<Miteru::Feeds::Base>]
61
+ #
62
+ def feeds
63
+ []
64
+ end
65
+ memo_wise :feeds
66
+
67
+ #
68
+ # @return [Array<Miteru::Notifiers::Base>]
69
+ #
70
+ def notifiers
71
+ []
72
+ end
73
+ memo_wise :notifiers
74
+
75
+ #
76
+ # @return [Miteru::Config]
77
+ #
78
+ def config
79
+ @config ||= Config.new
80
+ end
81
+
82
+ #
83
+ # @return [String]
84
+ #
85
+ def env
86
+ ENV["APP_ENV"] || ENV["RACK_ENV"]
87
+ end
88
+
89
+ #
90
+ # @return [Boolean]
91
+ #
92
+ def development?
93
+ env == "development"
94
+ end
95
+
96
+ #
97
+ # @return [Boolean]
98
+ #
99
+ def sidekiq?
100
+ !Miteru.config.sidekiq_redis_url.nil?
101
+ end
102
+
103
+ def sentry?
104
+ !Miteru.config.sentry_dsn.nil?
105
+ end
106
+
107
+ def initialize_sentry
108
+ return if Sentry.initialized?
109
+
110
+ Sentry.init do |config|
111
+ config.dsn = Miteru.config.sentry_dsn
112
+ config.traces_sample_rate = Miteru.config.sentry_trace_sample_rate
113
+ config.breadcrumbs_logger = %i[sentry_logger http_logger]
114
+ end
115
+ end
41
116
  end
42
117
  end
118
+
119
+ # Services
120
+ require "miteru/service"
121
+
122
+ require "miteru/crawler"
123
+ require "miteru/downloader"
124
+ require "miteru/kit"
125
+ require "miteru/orchestrator"
126
+ require "miteru/website"
127
+
128
+ # Notifiers
129
+ require "miteru/notifiers/base"
130
+ require "miteru/notifiers/slack"
131
+ require "miteru/notifiers/urlscan"
132
+
133
+ # Feeds
134
+ require "miteru/feeds/base"
135
+
136
+ require "miteru/feeds/ayashige"
137
+ require "miteru/feeds/phishing_database"
138
+ require "miteru/feeds/urlscan_pro"
139
+ require "miteru/feeds/urlscan"
140
+
141
+ # CLI
142
+ require "miteru/cli/application"
143
+
144
+ # Sidekiq
145
+ require "sidekiq"
146
+
147
+ require "miteru/sidekiq/application"
148
+ require "miteru/sidekiq/jobs"
149
+
150
+ Miteru.initialize_sentry if Miteru.sentry?
data/miteru.gemspec CHANGED
@@ -1,55 +1,66 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path("lib", __dir__)
3
+ lib = File.expand_path('lib', __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
- require "miteru/version"
5
+ require 'miteru/version'
6
6
 
7
7
  Gem::Specification.new do |spec|
8
- spec.name = "miteru"
8
+ spec.name = 'miteru'
9
9
  spec.version = Miteru::VERSION
10
- spec.authors = ["Manabu Niseki"]
11
- spec.email = ["manabu.niseki@gmail.com"]
12
- spec.metadata["rubygems_mfa_required"] = "true"
10
+ spec.authors = ['Manabu Niseki']
11
+ spec.email = ['manabu.niseki@gmail.com']
12
+ spec.metadata['rubygems_mfa_required'] = 'true'
13
13
 
14
- spec.summary = "An experimental phishing kit detector"
15
- spec.description = "An experimental phishing kit detector"
16
- spec.homepage = "https://github.com/ninoseki/miteru"
17
- spec.license = "MIT"
14
+ spec.summary = 'A phishing kit collector for scavengers'
15
+ spec.description = 'A phishing kit collector for scavengers'
16
+ spec.homepage = 'https://github.com/ninoseki/miteru'
17
+ spec.license = 'MIT'
18
18
 
19
19
  # Specify which files should be added to the gem when it is released.
20
20
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
21
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
22
22
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
23
23
  end
24
- spec.bindir = "exe"
24
+ spec.bindir = 'exe'
25
25
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
26
- spec.require_paths = ["lib"]
26
+ spec.require_paths = ['lib']
27
27
 
28
- spec.add_development_dependency "bundler", "~> 2.3"
29
- spec.add_development_dependency "coveralls_reborn", "~> 0.25"
30
- spec.add_development_dependency "glint", "~> 0.1"
31
- spec.add_development_dependency "mysql2", "~> 0.5"
32
- spec.add_development_dependency "overcommit", "~> 0.59"
33
- spec.add_development_dependency "pg", "~> 1.4"
34
- spec.add_development_dependency "rake", "~> 13.0"
35
- spec.add_development_dependency "rspec", "~> 3.11"
36
- spec.add_development_dependency "standard", "~> 1.14"
37
- spec.add_development_dependency "vcr", "~> 6.1"
38
- spec.add_development_dependency "webmock", "~> 3.17"
39
- spec.add_development_dependency "webrick", "~> 1.7.0"
28
+ spec.add_development_dependency 'bundler', '~> 2.5'
29
+ spec.add_development_dependency 'capybara', '~> 3.40'
30
+ spec.add_development_dependency 'coveralls_reborn', '~> 0.28'
31
+ spec.add_development_dependency 'fuubar', '~> 2.5'
32
+ spec.add_development_dependency 'mysql2', '~> 0.5'
33
+ spec.add_development_dependency 'pg', '~> 1.5'
34
+ spec.add_development_dependency 'rake', '~> 13.1'
35
+ spec.add_development_dependency 'rspec', '~> 3.12'
36
+ spec.add_development_dependency 'simplecov-lcov', '~> 0.8'
37
+ spec.add_development_dependency 'standard', '~> 1.33'
38
+ spec.add_development_dependency 'test-prof', '~> 1.3'
39
+ spec.add_development_dependency 'vcr', '~> 6.2'
40
+ spec.add_development_dependency 'webmock', '~> 3.19'
40
41
 
41
- spec.add_dependency "activerecord", "~> 7.0"
42
- spec.add_dependency "colorize", "~> 0.8"
43
- spec.add_dependency "dotenv", "2.8.1"
44
- spec.add_dependency "down", "~> 5.3"
45
- spec.add_dependency "http", "~> 5.1"
46
- spec.add_dependency "memist", "2.0.2"
47
- spec.add_dependency "oga", "~> 3.4"
48
- spec.add_dependency "parallel", "~> 1.22"
49
- spec.add_dependency "semantic_logger", "4.11.0"
50
- spec.add_dependency "slack-notifier", "~> 2.4"
51
- spec.add_dependency "sqlite3", "~> 1.4"
52
- spec.add_dependency "thor", "~> 1.2"
53
- spec.add_dependency "urlscan", "~> 0.8"
54
- spec.add_dependency "uuidtools", "~> 2.2"
42
+ spec.add_dependency 'activerecord', '7.1.3'
43
+ spec.add_dependency 'anyway_config', '2.6.2'
44
+ spec.add_dependency 'colorize', '1.1.0'
45
+ spec.add_dependency 'dotenv', '2.8.1'
46
+ spec.add_dependency 'down', '5.4.1'
47
+ spec.add_dependency 'dry-files', '1.1.0'
48
+ spec.add_dependency 'dry-monads', '1.6.0'
49
+ spec.add_dependency 'http', '5.1.1'
50
+ spec.add_dependency 'memo_wise', '1.8.0'
51
+ spec.add_dependency 'oga', '3.4'
52
+ spec.add_dependency 'parallel', '1.24.0'
53
+ spec.add_dependency 'puma', '6.4.2'
54
+ spec.add_dependency 'rack', '3.0.8'
55
+ spec.add_dependency 'rack-session', '2.0.0'
56
+ spec.add_dependency 'rackup', '2.1.0'
57
+ spec.add_dependency 'semantic_logger', '4.15.0'
58
+ spec.add_dependency 'sentry-ruby', '5.16.1'
59
+ spec.add_dependency 'sentry-sidekiq', '5.16.1'
60
+ spec.add_dependency 'sidekiq', '7.2.1'
61
+ spec.add_dependency 'slack-notifier', '2.4.0'
62
+ spec.add_dependency 'sqlite3', '1.7.1'
63
+ spec.add_dependency 'thor', '1.3.0'
64
+ spec.add_dependency 'thor-hollaback', '0.2.1'
65
+ spec.add_dependency 'uuidtools', '2.2.0'
55
66
  end