miteru 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62e5976eed670c3d2f2b0309e7723434c1e5c04ad337d4db519a58362618d566
4
- data.tar.gz: fe54d9316505b380ac50ba9ab8cf520f2fd5b0e76cf88f0e520d7fb0c18a3b56
3
+ metadata.gz: 3767b21d7f569a43215e2fdc0f83f9fc4f29fe2a4bb11fc3548a5d1657812774
4
+ data.tar.gz: 7877fb357548c619210ec63f3bd26c61c3fda68dc6d21910aa22fc0d7144ebf1
5
5
  SHA512:
6
- metadata.gz: 122f011baecb0a38639610ffaf68645c48f454ef7aa6ba79d5e812f71129f303a4386308cf639e233ff43473907f9567f90625b3651e027d18d6324e694ba00e
7
- data.tar.gz: 0f21e5a5b6882fb9705d9988647de16aad44693ddfbbd9983a01f16c029923629322c40165a28dcb33e4a49482bcddbc1d272825beae7b0017400c560e16a57d
6
+ metadata.gz: '02480c9a5604e5c7959ae4adb9e3a657d4b59f5c668be51b7d1fe120bd46e5548ecc9a26404d88d001729bf19f4f3d1b1545be80820c0717c0e027b7995030cd'
7
+ data.tar.gz: f2c1adc686393f3979ccf77e87c1bc5dba9cad9023903620ab61bbf6e6a2a4406dbbdd3ffb9ca73f5297c2135436dfd995f9572085fd9fde02fb5b72eac6032b
data/lib/miteru/cache.rb CHANGED
@@ -19,8 +19,8 @@ module Miteru
19
19
  # @param [Integer. nil] ex
20
20
  #
21
21
  def set(key, value, ex:)
22
- value = redis.set("#{prefix}:#{key}", value, ex:)
23
- Miteru.logger.info("Cache:#{key} is set.") if verbose?
22
+ value = redis.set("#{cache_prefix}:#{key}", value, ex:)
23
+ logger.info("Cache:#{key} is set.") if verbose?
24
24
  value
25
25
  end
26
26
 
@@ -28,21 +28,13 @@ module Miteru
28
28
  # @param [String] key
29
29
  #
30
30
  def cached?(key)
31
- value = redis.exists?("#{prefix}:#{key}")
32
- Miteru.logger.info("Cache:#{key} found.") if verbose?
31
+ value = redis.exists?("#{cache_prefix}:#{key}")
32
+ logger.info("Cache:#{key} found.") if verbose?
33
33
  value
34
34
  end
35
35
 
36
36
  private
37
37
 
38
- def verbose?
39
- Miteru.config.verbose
40
- end
41
-
42
- def prefix
43
- Miteru.config.cache_prefix
44
- end
45
-
46
38
  #
47
39
  # @return [Redis]
48
40
  #
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ module Concerns
5
+ module UrlTruncatable
6
+ extend ActiveSupport::Concern
7
+
8
+ def decoded_url
9
+ @decoded_url ||= URI.decode_www_form_component(url)
10
+ end
11
+
12
+ #
13
+ # @return [String]
14
+ #
15
+ def truncated_url
16
+ @truncated_url ||= decoded_url.truncate(64)
17
+ end
18
+
19
+ def defanged_truncated_url
20
+ @defanged_truncated_url ||= truncated_url.to_s.gsub(".", "[.]")
21
+ end
22
+ end
23
+ end
24
+ end
@@ -11,23 +11,21 @@ module Miteru
11
11
  Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
12
12
  info = "Website:#{website.info}."
13
13
  info = info.colorize(:red) if website.kits?
14
- Miteru.logger.info(info)
14
+ logger.info(info)
15
15
 
16
16
  website.kits.each do |kit|
17
17
  downloader = Downloader.new(kit)
18
18
  result = downloader.result
19
-
20
19
  unless result.success?
21
- Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
20
+ logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
22
21
  next
23
22
  end
24
-
25
23
  destination = result.value!
26
- Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
24
+ logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
27
25
  # Remove downloaded file if auto_download is not allowed
28
26
  FileUtils.rm(destination, force: true) unless auto_download?
29
- # Notify the website
30
- notify website
27
+ # Notify the kit
28
+ notify(kit)
31
29
  end
32
30
 
33
31
  # Cache the website
@@ -37,39 +35,22 @@ module Miteru
37
35
 
38
36
  private
39
37
 
40
- def cache?
41
- Miteru.cache?
42
- end
43
-
44
- def cache
45
- Miteru.cache
46
- end
47
-
48
- def cache_ex
49
- Miteru.config.cache_ex
50
- end
51
-
52
- def auto_download?
53
- Miteru.config.auto_download
54
- end
55
-
56
38
  #
57
- # @param [Miteru::Website] website
39
+ # @param [Miteru::Kit] kit
58
40
  #
59
- def notify(website)
41
+ def notify(kit)
60
42
  notifiers.each do |notifier|
61
- result = notifier.result(website)
43
+ result = notifier.result(kit)
62
44
  if result.success?
63
- Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
45
+ logger.info("Notifier:#{notifier.name} succeeded.")
64
46
  else
65
- Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
47
+ logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
66
48
  end
67
49
  end
68
50
  end
69
51
 
70
- #
71
- # @return [Array<Miteru::Notifiers::Base>]
72
- #
52
+ private
53
+
73
54
  def notifiers
74
55
  @notifiers ||= Miteru.notifiers.map(&:new)
75
56
  end
@@ -43,7 +43,7 @@ module Miteru
43
43
 
44
44
  # Record a kit in DB
45
45
  Record.create_by_kit_and_hash(kit, sha256: sha256(destination))
46
- Miteru.logger.info "Download #{kit.url} as #{destination}"
46
+ logger.info "Download #{kit.url} as #{destination}"
47
47
 
48
48
  destination
49
49
  end
data/lib/miteru/kit.rb CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  module Miteru
4
4
  class Kit < Service
5
+ include Concerns::UrlTruncatable
6
+
5
7
  # @return [String]
6
8
  attr_reader :url
7
9
 
@@ -85,17 +87,6 @@ module Miteru
85
87
  @hostname ||= URI(url).hostname
86
88
  end
87
89
 
88
- def decoded_url
89
- @decoded_url ||= URI.decode_www_form_component(url)
90
- end
91
-
92
- #
93
- # @return [String]
94
- #
95
- def truncated_url
96
- url.truncate(64)
97
- end
98
-
99
90
  private
100
91
 
101
92
  def filename_to_download
@@ -3,7 +3,7 @@
3
3
  module Miteru
4
4
  module Notifiers
5
5
  class Base < Service
6
- def call(website)
6
+ def call(kit)
7
7
  raise NotImplementedError
8
8
  end
9
9
 
@@ -77,13 +77,13 @@ module Miteru
77
77
  #
78
78
  # Notifiy to Slack
79
79
  #
80
- # @param [Miteru::Website] website
80
+ # @param [Miteru::Kit] kit
81
81
  #
82
- def call(website)
82
+ def call(kit)
83
83
  return unless callable?
84
84
 
85
- attachment = SlackAttachment.new(website.url)
86
- notifier.post(text: website.info, attachments: attachment.to_a) if website.kits?
85
+ attachment = SlackAttachment.new(kit.url)
86
+ notifier.post(text: kit.defanged_truncated_url, attachments: attachment.to_a)
87
87
  end
88
88
 
89
89
  def callable?
@@ -4,12 +4,12 @@ module Miteru
4
4
  module Notifiers
5
5
  class UrlScan < Base
6
6
  #
7
- # @param [Miteru::Website] website
7
+ # @param [Miteru::Kit] kit
8
8
  #
9
- def call(website)
9
+ def call(kit)
10
10
  return unless callable?
11
11
 
12
- website.kits.each { |kit| submit(kit.url, source: website.source) }
12
+ submit(kit.decoded_url, source: kit.source)
13
13
  end
14
14
 
15
15
  def callable?
@@ -3,24 +3,31 @@
3
3
  module Miteru
4
4
  class Orchestrator < Service
5
5
  def call
6
- Miteru.logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
6
+ logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
7
7
 
8
- if Miteru.sidekiq?
9
- non_cached_websites.each do |website|
10
- Jobs::CrawleJob.perform_async(website.url, website.source)
11
- Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
12
- end
8
+ if sidekiq?
9
+ sidekiq_call
13
10
  else
14
- Miteru.logger.info("Use #{threads} thread(s).") if verbose?
15
- Parallel.each(non_cached_websites, in_threads: threads) do |website|
16
- Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
11
+ parallel_call
12
+ end
13
+ end
17
14
 
18
- result = Crawler.result(website)
19
- if result.success?
20
- Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
21
- else
22
- Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
23
- end
15
+ def sidekiq_call
16
+ non_cached_websites.each do |website|
17
+ Jobs::CrawleJob.perform_async(website.url, website.source)
18
+ logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
19
+ end
20
+ end
21
+
22
+ def parallel_call
23
+ logger.info("Use #{threads} thread(s).") if verbose?
24
+ Parallel.each(non_cached_websites, in_threads: threads) do |website|
25
+ logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
26
+ result = Crawler.result(website)
27
+ if result.success?
28
+ logger.info("Crawler:#{website.truncated_url} succeeded.")
29
+ else
30
+ logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
24
31
  end
25
32
  end
26
33
  end
@@ -34,44 +41,33 @@ module Miteru
34
41
  result = feed.result
35
42
  if result.success?
36
43
  websites = result.value!
37
- Miteru.logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
44
+ logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
38
45
  out << websites
39
46
  else
40
- Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
47
+ logger.warn("Feed:#{feed.source} failed - #{result.failure}")
41
48
  end
42
49
  end
43
50
  end.flatten.uniq(&:url)
44
51
  end
45
52
 
53
+ #
54
+ # @return [Array<Miteru::Website>]
55
+ #
46
56
  def non_cached_websites
47
- return websites unless cache?
48
-
49
- websites.reject { |website| cache.cached?(website.url) }
57
+ @non_cached_websites ||= [].tap do |out|
58
+ out << if cache?
59
+ websites.reject { |website| cache.cached?(website.url) }
60
+ else
61
+ websites
62
+ end
63
+ end.flatten.uniq(&:url)
50
64
  end
51
65
 
52
66
  #
53
67
  # @return [Array<Miteru::Feeds::Base>]
54
68
  #
55
69
  def feeds
56
- Miteru.feeds.map(&:new)
57
- end
58
-
59
- private
60
-
61
- def cache?
62
- Miteru.cache?
63
- end
64
-
65
- def cache
66
- Miteru.cache
67
- end
68
-
69
- def threads
70
- Miteru.config.threads
71
- end
72
-
73
- def verbose?
74
- Miteru.config.verbose
70
+ @feeds ||= Miteru.feeds.map(&:new)
75
71
  end
76
72
  end
77
73
  end
@@ -24,5 +24,49 @@ module Miteru
24
24
  new.result(...)
25
25
  end
26
26
  end
27
+
28
+ private
29
+
30
+ def auto_download?
31
+ Miteru.config.auto_download
32
+ end
33
+
34
+ #
35
+ # @return [SemanticLogger]
36
+ #
37
+ def logger
38
+ Miteru.logger
39
+ end
40
+
41
+ def cache?
42
+ Miteru.cache?
43
+ end
44
+
45
+ def sidekiq?
46
+ Miteru.sidekiq?
47
+ end
48
+
49
+ #
50
+ # @return [Miteru::Cache]
51
+ #
52
+ def cache
53
+ Miteru.cache
54
+ end
55
+
56
+ def threads
57
+ Miteru.config.threads
58
+ end
59
+
60
+ def verbose?
61
+ Miteru.config.verbose
62
+ end
63
+
64
+ def cache_prefix
65
+ Miteru.config.cache_prefix
66
+ end
67
+
68
+ def cache_ex
69
+ Miteru.config.cache_ex
70
+ end
27
71
  end
28
72
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "2.2.0"
4
+ VERSION = "2.2.1"
5
5
  end
@@ -4,6 +4,8 @@ require "oga"
4
4
 
5
5
  module Miteru
6
6
  class Website < Service
7
+ include Concerns::UrlTruncatable
8
+
7
9
  # @return [String]
8
10
  attr_reader :url
9
11
 
@@ -41,17 +43,6 @@ module Miteru
41
43
  (href_links + possible_file_links).compact.uniq
42
44
  end
43
45
 
44
- #
45
- # @return [String]
46
- #
47
- def truncated_url
48
- url.truncate(64)
49
- end
50
-
51
- def defanged_truncated_url
52
- truncated_url.to_s.gsub(".", "[.]")
53
- end
54
-
55
46
  def info
56
47
  "#{defanged_truncated_url} has #{kits.length} kit(s) (Source: #{source})"
57
48
  end
data/lib/miteru.rb CHANGED
@@ -31,6 +31,7 @@ require "miteru/errors"
31
31
  # Concerns
32
32
  require "miteru/concerns/database_connectable"
33
33
  require "miteru/concerns/error_unwrappable"
34
+ require "miteru/concerns/url_truncatable"
34
35
 
35
36
  # Core classes
36
37
  require "miteru/service"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-08 00:00:00.000000000 Z
11
+ date: 2024-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -590,6 +590,7 @@ files:
590
590
  - lib/miteru/commands/web.rb
591
591
  - lib/miteru/concerns/database_connectable.rb
592
592
  - lib/miteru/concerns/error_unwrappable.rb
593
+ - lib/miteru/concerns/url_truncatable.rb
593
594
  - lib/miteru/config.rb
594
595
  - lib/miteru/crawler.rb
595
596
  - lib/miteru/database.rb