miteru 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2217ef5ac9ed2060203df9f5edebbac779fbebc53e4418f54de6d4b080ee81fc
4
- data.tar.gz: 45c934a406c619b1d1cc9d03a00103ae2b732460ca70877953aaac2d36d3b0e0
3
+ metadata.gz: 050c27599e75745a7c215f08b7ed190b43c70388d974a68945702eefdb25b7c2
4
+ data.tar.gz: 64c7429a4178febf6984fe3d79d3970781634971405c5d9fdb0748c61b663a32
5
5
  SHA512:
6
- metadata.gz: 03ebfc42660abedfa61a7e931e0d863b03fc50d615cfe0c81db10c0473ae8e0908619c5bcbc656e48784d40b5ccc5b98d616efe467d2a68ecca1bb57415bacfb
7
- data.tar.gz: a74accbf1d3949e8211792e02c68e6e6911acbe2cc0cd113459eb812bad29ae0dc4c434f56009a1e49fa255d58bfc5485b9cbe8e3d6e936fef68e23d671d7d86
6
+ metadata.gz: ff780a0db3fafdded94261c38272dc0537462ae195f646cceafc2223cd62e61fb8d809ee172f953344314a93f9e6f746b9d3b6b66efdf9b48cda2c5d8c645eb7
7
+ data.tar.gz: c55dfad5120175ebf43bd01a30b42517a9a5bfd6fddd12d8c6ad07acd3ae02bba67a1bc9101d26fab74406f374f16fbeb3f51e791c678dce749a403e9477f82a
@@ -14,8 +14,8 @@ module Miteru
14
14
  @notifier = Notifier.new
15
15
  end
16
16
 
17
- def crawl(url)
18
- website = Website.new(url)
17
+ def crawl(entry)
18
+ website = Website.new(entry.url, entry.source)
19
19
  downloader.download_kits(website.kits) if website.has_kits? && auto_download?
20
20
  notify(website) if website.has_kits? || verbose?
21
21
  rescue OpenSSL::SSL::SSLError, HTTP::Error, Addressable::URI::InvalidURIError => _e
@@ -23,11 +23,11 @@ module Miteru
23
23
  end
24
24
 
25
25
  def execute
26
- suspicious_urls = feeds.suspicious_urls
27
- puts "Loaded #{suspicious_urls.length} URLs to crawl. (crawling in #{threads} threads)" if verbose?
26
+ suspicious_entries = feeds.suspicious_entries
27
+ puts "Loaded #{suspicious_entries.length} URLs to crawl. (crawling in #{threads} threads)" if verbose?
28
28
 
29
- Parallel.each(suspicious_urls, in_threads: threads) do |url|
30
- crawl url
29
+ Parallel.each(suspicious_entries, in_threads: threads) do |entry|
30
+ crawl entry
31
31
  end
32
32
  end
33
33
 
@@ -19,6 +19,12 @@ class InitialSchema < ActiveRecord::Migration[6.1]
19
19
  end
20
20
  end
21
21
 
22
+ class V11Schema < ActiveRecord::Migration[6.1]
23
+ def change
24
+ add_column :records, :source, :string, if_not_exists: true
25
+ end
26
+ end
27
+
22
28
  def adapter
23
29
  return "postgresql" if Miteru.configuration.database.start_with?("postgresql://", "postgres://")
24
30
  return "mysql2" if Miteru.configuration.database.start_with?("mysql2://")
@@ -44,6 +50,7 @@ module Miteru
44
50
  ActiveRecord::Migration.verbose = false
45
51
 
46
52
  InitialSchema.migrate(:up)
53
+ V11Schema.migrate(:up)
47
54
  rescue StandardError => _e
48
55
  # Do nothing
49
56
  end
@@ -57,6 +64,7 @@ module Miteru
57
64
  return unless ActiveRecord::Base.connected?
58
65
 
59
66
  InitialSchema.migrate(:down)
67
+ V11Schema.migrate(:down)
60
68
  end
61
69
  end
62
70
  end
@@ -3,10 +3,43 @@
3
3
  module Miteru
4
4
  class Feeds
5
5
  class Feed
6
+ include Mixins::URL
7
+
8
+ def source
9
+ @source ||= self.class.to_s.split("::").last
10
+ end
11
+
12
+ #
13
+ # Return URLs
14
+ #
15
+ # @return [Array<String>] URLs
16
+ #
6
17
  def urls
7
18
  raise NotImplementedError, "You must implement #{self.class}##{__method__}"
8
19
  end
9
20
 
21
+ #
22
+ # Return entries
23
+ #
24
+ # @return [Array<Miteru::Entry>]
25
+ #
26
+ def entries
27
+ breakdowend_urls.map do |url|
28
+ Entry.new(url, source)
29
+ end
30
+ end
31
+
32
+ #
33
+ # Return breakdowned URLs
34
+ #
35
+ # @return [Array<String>] Breakdowned URLs
36
+ #
37
+ def breakdowend_urls
38
+ urls.select { |url| url.start_with?("http://", "https://") }.map do |url|
39
+ breakdown(url, Miteru.configuration.directory_traveling?)
40
+ end.flatten.uniq
41
+ end
42
+
10
43
  private
11
44
 
12
45
  def get(url)
data/lib/miteru/feeds.rb CHANGED
@@ -8,6 +8,18 @@ require_relative "./feeds/urlscan"
8
8
  require_relative "./feeds/urlscan_pro"
9
9
 
10
10
  module Miteru
11
+ class Entry
12
+ # @return [String]
13
+ attr_reader :url
14
+ # @return [String]
15
+ attr_reader :source
16
+
17
+ def initialize(url, source)
18
+ @url = url
19
+ @source = source
20
+ end
21
+ end
22
+
11
23
  class Feeds
12
24
  IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
13
25
 
@@ -21,43 +33,13 @@ module Miteru
21
33
  ].compact
22
34
  end
23
35
 
24
- def directory_traveling?
25
- Miteru.configuration.directory_traveling?
26
- end
27
-
28
- def suspicious_urls
29
- @suspicious_urls ||= [].tap do |arr|
30
- urls = @feeds.map do |feed|
31
- feed.urls.select { |url| url.start_with?("http://", "https://") }
32
- end.flatten.uniq
33
-
34
- urls.map { |url| breakdown(url) }.flatten.uniq.sort.each { |url| arr << url }
35
- end
36
- end
37
-
38
- def breakdown(url)
39
- begin
40
- uri = URI.parse(url)
41
- rescue URI::InvalidURIError => _e
42
- return []
43
- end
44
-
45
- base = "#{uri.scheme}://#{uri.hostname}"
46
- return [base] unless directory_traveling?
47
-
48
- segments = uri.path.split("/")
49
- return [base] if segments.length.zero?
50
-
51
- urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
52
-
53
- urls.reject do |breakdowned_url|
54
- # Reject a url which ends with specific extension names
55
- invalid_extension? breakdowned_url
56
- end
57
- end
58
-
59
- def invalid_extension?(url)
60
- IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
36
+ #
37
+ # Returns a list of suspicious entries
38
+ #
39
+ # @return [Array<Entry>]
40
+ #
41
+ def suspicious_entries
42
+ @suspicious_entries ||= @feeds.map(&:entries).flatten.uniq(&:url)
61
43
  end
62
44
  end
63
45
  end
data/lib/miteru/kit.rb CHANGED
@@ -9,10 +9,27 @@ module Miteru
9
9
  VALID_EXTENSIONS = Miteru.configuration.valid_extensions
10
10
  VALID_MIME_TYPES = Miteru.configuration.valid_mime_types
11
11
 
12
- attr_reader :url, :status, :content_length, :mime_type, :headers
12
+ # @return [String]
13
+ attr_reader :url
13
14
 
14
- def initialize(url)
15
+ # @return [String]
16
+ attr_reader :source
17
+
18
+ # @return [Integer, nil]
19
+ attr_reader :status
20
+
21
+ # @return [Integer, nil]
22
+ attr_reader :content_length
23
+
24
+ # @return [String, nil]
25
+ attr_reader :mime_type
26
+
27
+ # @return [Hash, nil]
28
+ attr_reader :headers
29
+
30
+ def initialize(url, source)
15
31
  @url = url
32
+ @source = source
16
33
 
17
34
  @content_length = nil
18
35
  @mime_type = nil
@@ -0,0 +1,47 @@
1
+ module Miteru
2
+ module Mixins
3
+ module URL
4
+ IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
5
+
6
+ #
7
+ # Validate extension of a URL
8
+ #
9
+ # @param [String] url
10
+ #
11
+ # @return [Boolean]
12
+ #
13
+ def invalid_extension?(url)
14
+ IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
15
+ end
16
+
17
+ #
18
+ # Breakdown a URL into URLs
19
+ #
20
+ # @param [String] url
21
+ # @param [Boolean] enable_directory_traveling
22
+ #
23
+ # @return [Array<String>]
24
+ #
25
+ def breakdown(url, enable_directory_traveling)
26
+ begin
27
+ uri = URI.parse(url)
28
+ rescue URI::InvalidURIError => _e
29
+ return []
30
+ end
31
+
32
+ base = "#{uri.scheme}://#{uri.hostname}"
33
+ return [base] unless enable_directory_traveling
34
+
35
+ segments = uri.path.split("/")
36
+ return [base] if segments.length.zero?
37
+
38
+ urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
39
+
40
+ urls.reject do |breakdowned_url|
41
+ # Reject a url which ends with specific extension names
42
+ invalid_extension? breakdowned_url
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
data/lib/miteru/record.rb CHANGED
@@ -30,6 +30,7 @@ module Miteru
30
30
  def create_by_kit_and_hash(kit, hash)
31
31
  record = new(
32
32
  hash: hash,
33
+ source: kit.source,
33
34
  hostname: kit.hostname,
34
35
  url: kit.decoded_url,
35
36
  headers: kit.headers,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "1.0.2"
4
+ VERSION = "1.1.0"
5
5
  end
@@ -6,10 +6,15 @@ module Miteru
6
6
  class Website
7
7
  VALID_EXTENSIONS = Miteru.configuration.valid_extensions
8
8
 
9
+ # @return [String]
9
10
  attr_reader :url
10
11
 
11
- def initialize(url)
12
+ # @return [String]
13
+ attr_reader :source
14
+
15
+ def initialize(url, source)
12
16
  @url = url
17
+ @source = source
13
18
  end
14
19
 
15
20
  def title
@@ -18,7 +23,7 @@ module Miteru
18
23
 
19
24
  def kits
20
25
  @kits ||= links.filter_map do |link|
21
- kit = Kit.new(link)
26
+ kit = Kit.new(link, source)
22
27
  kit.valid? ? kit : nil
23
28
  end
24
29
  end
data/lib/miteru.rb CHANGED
@@ -7,6 +7,8 @@ require "miteru/database"
7
7
 
8
8
  require "miteru/record"
9
9
 
10
+ require "miteru/mixin"
11
+
10
12
  require "miteru/error"
11
13
  require "miteru/http_client"
12
14
  require "miteru/kit"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-10 00:00:00.000000000 Z
11
+ date: 2021-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -370,6 +370,7 @@ files:
370
370
  - lib/miteru/feeds/urlscan_pro.rb
371
371
  - lib/miteru/http_client.rb
372
372
  - lib/miteru/kit.rb
373
+ - lib/miteru/mixin.rb
373
374
  - lib/miteru/notifier.rb
374
375
  - lib/miteru/record.rb
375
376
  - lib/miteru/version.rb