miteru 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2217ef5ac9ed2060203df9f5edebbac779fbebc53e4418f54de6d4b080ee81fc
4
- data.tar.gz: 45c934a406c619b1d1cc9d03a00103ae2b732460ca70877953aaac2d36d3b0e0
3
+ metadata.gz: 050c27599e75745a7c215f08b7ed190b43c70388d974a68945702eefdb25b7c2
4
+ data.tar.gz: 64c7429a4178febf6984fe3d79d3970781634971405c5d9fdb0748c61b663a32
5
5
  SHA512:
6
- metadata.gz: 03ebfc42660abedfa61a7e931e0d863b03fc50d615cfe0c81db10c0473ae8e0908619c5bcbc656e48784d40b5ccc5b98d616efe467d2a68ecca1bb57415bacfb
7
- data.tar.gz: a74accbf1d3949e8211792e02c68e6e6911acbe2cc0cd113459eb812bad29ae0dc4c434f56009a1e49fa255d58bfc5485b9cbe8e3d6e936fef68e23d671d7d86
6
+ metadata.gz: ff780a0db3fafdded94261c38272dc0537462ae195f646cceafc2223cd62e61fb8d809ee172f953344314a93f9e6f746b9d3b6b66efdf9b48cda2c5d8c645eb7
7
+ data.tar.gz: c55dfad5120175ebf43bd01a30b42517a9a5bfd6fddd12d8c6ad07acd3ae02bba67a1bc9101d26fab74406f374f16fbeb3f51e791c678dce749a403e9477f82a
@@ -14,8 +14,8 @@ module Miteru
14
14
  @notifier = Notifier.new
15
15
  end
16
16
 
17
- def crawl(url)
18
- website = Website.new(url)
17
+ def crawl(entry)
18
+ website = Website.new(entry.url, entry.source)
19
19
  downloader.download_kits(website.kits) if website.has_kits? && auto_download?
20
20
  notify(website) if website.has_kits? || verbose?
21
21
  rescue OpenSSL::SSL::SSLError, HTTP::Error, Addressable::URI::InvalidURIError => _e
@@ -23,11 +23,11 @@ module Miteru
23
23
  end
24
24
 
25
25
  def execute
26
- suspicious_urls = feeds.suspicious_urls
27
- puts "Loaded #{suspicious_urls.length} URLs to crawl. (crawling in #{threads} threads)" if verbose?
26
+ suspicious_entries = feeds.suspicious_entries
27
+ puts "Loaded #{suspicious_entries.length} URLs to crawl. (crawling in #{threads} threads)" if verbose?
28
28
 
29
- Parallel.each(suspicious_urls, in_threads: threads) do |url|
30
- crawl url
29
+ Parallel.each(suspicious_entries, in_threads: threads) do |entry|
30
+ crawl entry
31
31
  end
32
32
  end
33
33
 
@@ -19,6 +19,12 @@ class InitialSchema < ActiveRecord::Migration[6.1]
19
19
  end
20
20
  end
21
21
 
22
+ class V11Schema < ActiveRecord::Migration[6.1]
23
+ def change
24
+ add_column :records, :source, :string, if_not_exists: true
25
+ end
26
+ end
27
+
22
28
  def adapter
23
29
  return "postgresql" if Miteru.configuration.database.start_with?("postgresql://", "postgres://")
24
30
  return "mysql2" if Miteru.configuration.database.start_with?("mysql2://")
@@ -44,6 +50,7 @@ module Miteru
44
50
  ActiveRecord::Migration.verbose = false
45
51
 
46
52
  InitialSchema.migrate(:up)
53
+ V11Schema.migrate(:up)
47
54
  rescue StandardError => _e
48
55
  # Do nothing
49
56
  end
@@ -57,6 +64,7 @@ module Miteru
57
64
  return unless ActiveRecord::Base.connected?
58
65
 
59
66
  InitialSchema.migrate(:down)
67
+ V11Schema.migrate(:down)
60
68
  end
61
69
  end
62
70
  end
@@ -3,10 +3,43 @@
3
3
  module Miteru
4
4
  class Feeds
5
5
  class Feed
6
+ include Mixins::URL
7
+
8
+ def source
9
+ @source ||= self.class.to_s.split("::").last
10
+ end
11
+
12
+ #
13
+ # Return URLs
14
+ #
15
+ # @return [Array<String>] URLs
16
+ #
6
17
  def urls
7
18
  raise NotImplementedError, "You must implement #{self.class}##{__method__}"
8
19
  end
9
20
 
21
+ #
22
+ # Return entries
23
+ #
24
+ # @return [Array<Miteru::Entry>]
25
+ #
26
+ def entries
27
+ breakdowend_urls.map do |url|
28
+ Entry.new(url, source)
29
+ end
30
+ end
31
+
32
+ #
33
+ # Return breakdowned URLs
34
+ #
35
+ # @return [Array<String>] Breakdowned URLs
36
+ #
37
+ def breakdowend_urls
38
+ urls.select { |url| url.start_with?("http://", "https://") }.map do |url|
39
+ breakdown(url, Miteru.configuration.directory_traveling?)
40
+ end.flatten.uniq
41
+ end
42
+
10
43
  private
11
44
 
12
45
  def get(url)
data/lib/miteru/feeds.rb CHANGED
@@ -8,6 +8,18 @@ require_relative "./feeds/urlscan"
8
8
  require_relative "./feeds/urlscan_pro"
9
9
 
10
10
  module Miteru
11
+ class Entry
12
+ # @return [String]
13
+ attr_reader :url
14
+ # @return [String]
15
+ attr_reader :source
16
+
17
+ def initialize(url, source)
18
+ @url = url
19
+ @source = source
20
+ end
21
+ end
22
+
11
23
  class Feeds
12
24
  IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
13
25
 
@@ -21,43 +33,13 @@ module Miteru
21
33
  ].compact
22
34
  end
23
35
 
24
- def directory_traveling?
25
- Miteru.configuration.directory_traveling?
26
- end
27
-
28
- def suspicious_urls
29
- @suspicious_urls ||= [].tap do |arr|
30
- urls = @feeds.map do |feed|
31
- feed.urls.select { |url| url.start_with?("http://", "https://") }
32
- end.flatten.uniq
33
-
34
- urls.map { |url| breakdown(url) }.flatten.uniq.sort.each { |url| arr << url }
35
- end
36
- end
37
-
38
- def breakdown(url)
39
- begin
40
- uri = URI.parse(url)
41
- rescue URI::InvalidURIError => _e
42
- return []
43
- end
44
-
45
- base = "#{uri.scheme}://#{uri.hostname}"
46
- return [base] unless directory_traveling?
47
-
48
- segments = uri.path.split("/")
49
- return [base] if segments.length.zero?
50
-
51
- urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
52
-
53
- urls.reject do |breakdowned_url|
54
- # Reject a url which ends with specific extension names
55
- invalid_extension? breakdowned_url
56
- end
57
- end
58
-
59
- def invalid_extension?(url)
60
- IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
36
+ #
37
+ # Returns a list of suspicious entries
38
+ #
39
+ # @return [Array<Entry>]
40
+ #
41
+ def suspicious_entries
42
+ @suspicious_entries ||= @feeds.map(&:entries).flatten.uniq(&:url)
61
43
  end
62
44
  end
63
45
  end
data/lib/miteru/kit.rb CHANGED
@@ -9,10 +9,27 @@ module Miteru
9
9
  VALID_EXTENSIONS = Miteru.configuration.valid_extensions
10
10
  VALID_MIME_TYPES = Miteru.configuration.valid_mime_types
11
11
 
12
- attr_reader :url, :status, :content_length, :mime_type, :headers
12
+ # @return [String]
13
+ attr_reader :url
13
14
 
14
- def initialize(url)
15
+ # @return [String]
16
+ attr_reader :source
17
+
18
+ # @return [Integer, nil]
19
+ attr_reader :status
20
+
21
+ # @return [Integer, nil]
22
+ attr_reader :content_length
23
+
24
+ # @return [String, nil]
25
+ attr_reader :mime_type
26
+
27
+ # @return [Hash, nil]
28
+ attr_reader :headers
29
+
30
+ def initialize(url, source)
15
31
  @url = url
32
+ @source = source
16
33
 
17
34
  @content_length = nil
18
35
  @mime_type = nil
@@ -0,0 +1,47 @@
1
+ module Miteru
2
+ module Mixins
3
+ module URL
4
+ IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze
5
+
6
+ #
7
+ # Validate extension of a URL
8
+ #
9
+ # @param [String] url
10
+ #
11
+ # @return [Boolean]
12
+ #
13
+ def invalid_extension?(url)
14
+ IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext }
15
+ end
16
+
17
+ #
18
+ # Breakdown a URL into URLs
19
+ #
20
+ # @param [String] url
21
+ # @param [Boolean] enable_directory_traveling
22
+ #
23
+ # @return [Array<String>]
24
+ #
25
+ def breakdown(url, enable_directory_traveling)
26
+ begin
27
+ uri = URI.parse(url)
28
+ rescue URI::InvalidURIError => _e
29
+ return []
30
+ end
31
+
32
+ base = "#{uri.scheme}://#{uri.hostname}"
33
+ return [base] unless enable_directory_traveling
34
+
35
+ segments = uri.path.split("/")
36
+ return [base] if segments.length.zero?
37
+
38
+ urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" }
39
+
40
+ urls.reject do |breakdowned_url|
41
+ # Reject a url which ends with specific extension names
42
+ invalid_extension? breakdowned_url
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
data/lib/miteru/record.rb CHANGED
@@ -30,6 +30,7 @@ module Miteru
30
30
  def create_by_kit_and_hash(kit, hash)
31
31
  record = new(
32
32
  hash: hash,
33
+ source: kit.source,
33
34
  hostname: kit.hostname,
34
35
  url: kit.decoded_url,
35
36
  headers: kit.headers,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "1.0.2"
4
+ VERSION = "1.1.0"
5
5
  end
@@ -6,10 +6,15 @@ module Miteru
6
6
  class Website
7
7
  VALID_EXTENSIONS = Miteru.configuration.valid_extensions
8
8
 
9
+ # @return [String]
9
10
  attr_reader :url
10
11
 
11
- def initialize(url)
12
+ # @return [String]
13
+ attr_reader :source
14
+
15
+ def initialize(url, source)
12
16
  @url = url
17
+ @source = source
13
18
  end
14
19
 
15
20
  def title
@@ -18,7 +23,7 @@ module Miteru
18
23
 
19
24
  def kits
20
25
  @kits ||= links.filter_map do |link|
21
- kit = Kit.new(link)
26
+ kit = Kit.new(link, source)
22
27
  kit.valid? ? kit : nil
23
28
  end
24
29
  end
data/lib/miteru.rb CHANGED
@@ -7,6 +7,8 @@ require "miteru/database"
7
7
 
8
8
  require "miteru/record"
9
9
 
10
+ require "miteru/mixin"
11
+
10
12
  require "miteru/error"
11
13
  require "miteru/http_client"
12
14
  require "miteru/kit"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-09-10 00:00:00.000000000 Z
11
+ date: 2021-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -370,6 +370,7 @@ files:
370
370
  - lib/miteru/feeds/urlscan_pro.rb
371
371
  - lib/miteru/http_client.rb
372
372
  - lib/miteru/kit.rb
373
+ - lib/miteru/mixin.rb
373
374
  - lib/miteru/notifier.rb
374
375
  - lib/miteru/record.rb
375
376
  - lib/miteru/version.rb