miteru 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 62e5976eed670c3d2f2b0309e7723434c1e5c04ad337d4db519a58362618d566
4
- data.tar.gz: fe54d9316505b380ac50ba9ab8cf520f2fd5b0e76cf88f0e520d7fb0c18a3b56
3
+ metadata.gz: 3767b21d7f569a43215e2fdc0f83f9fc4f29fe2a4bb11fc3548a5d1657812774
4
+ data.tar.gz: 7877fb357548c619210ec63f3bd26c61c3fda68dc6d21910aa22fc0d7144ebf1
5
5
  SHA512:
6
- metadata.gz: 122f011baecb0a38639610ffaf68645c48f454ef7aa6ba79d5e812f71129f303a4386308cf639e233ff43473907f9567f90625b3651e027d18d6324e694ba00e
7
- data.tar.gz: 0f21e5a5b6882fb9705d9988647de16aad44693ddfbbd9983a01f16c029923629322c40165a28dcb33e4a49482bcddbc1d272825beae7b0017400c560e16a57d
6
+ metadata.gz: '02480c9a5604e5c7959ae4adb9e3a657d4b59f5c668be51b7d1fe120bd46e5548ecc9a26404d88d001729bf19f4f3d1b1545be80820c0717c0e027b7995030cd'
7
+ data.tar.gz: f2c1adc686393f3979ccf77e87c1bc5dba9cad9023903620ab61bbf6e6a2a4406dbbdd3ffb9ca73f5297c2135436dfd995f9572085fd9fde02fb5b72eac6032b
data/lib/miteru/cache.rb CHANGED
@@ -19,8 +19,8 @@ module Miteru
19
19
  # @param [Integer. nil] ex
20
20
  #
21
21
  def set(key, value, ex:)
22
- value = redis.set("#{prefix}:#{key}", value, ex:)
23
- Miteru.logger.info("Cache:#{key} is set.") if verbose?
22
+ value = redis.set("#{cache_prefix}:#{key}", value, ex:)
23
+ logger.info("Cache:#{key} is set.") if verbose?
24
24
  value
25
25
  end
26
26
 
@@ -28,21 +28,13 @@ module Miteru
28
28
  # @param [String] key
29
29
  #
30
30
  def cached?(key)
31
- value = redis.exists?("#{prefix}:#{key}")
32
- Miteru.logger.info("Cache:#{key} found.") if verbose?
31
+ value = redis.exists?("#{cache_prefix}:#{key}")
32
+ logger.info("Cache:#{key} found.") if verbose?
33
33
  value
34
34
  end
35
35
 
36
36
  private
37
37
 
38
- def verbose?
39
- Miteru.config.verbose
40
- end
41
-
42
- def prefix
43
- Miteru.config.cache_prefix
44
- end
45
-
46
38
  #
47
39
  # @return [Redis]
48
40
  #
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Miteru
4
+ module Concerns
5
+ module UrlTruncatable
6
+ extend ActiveSupport::Concern
7
+
8
+ def decoded_url
9
+ @decoded_url ||= URI.decode_www_form_component(url)
10
+ end
11
+
12
+ #
13
+ # @return [String]
14
+ #
15
+ def truncated_url
16
+ @truncated_url ||= decoded_url.truncate(64)
17
+ end
18
+
19
+ def defanged_truncated_url
20
+ @defanged_truncated_url ||= truncated_url.to_s.gsub(".", "[.]")
21
+ end
22
+ end
23
+ end
24
+ end
@@ -11,23 +11,21 @@ module Miteru
11
11
  Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do
12
12
  info = "Website:#{website.info}."
13
13
  info = info.colorize(:red) if website.kits?
14
- Miteru.logger.info(info)
14
+ logger.info(info)
15
15
 
16
16
  website.kits.each do |kit|
17
17
  downloader = Downloader.new(kit)
18
18
  result = downloader.result
19
-
20
19
  unless result.success?
21
- Miteru.logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
20
+ logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.")
22
21
  next
23
22
  end
24
-
25
23
  destination = result.value!
26
- Miteru.logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
24
+ logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.")
27
25
  # Remove downloaded file if auto_download is not allowed
28
26
  FileUtils.rm(destination, force: true) unless auto_download?
29
- # Notify the website
30
- notify website
27
+ # Notify the kit
28
+ notify(kit)
31
29
  end
32
30
 
33
31
  # Cache the website
@@ -37,39 +35,22 @@ module Miteru
37
35
 
38
36
  private
39
37
 
40
- def cache?
41
- Miteru.cache?
42
- end
43
-
44
- def cache
45
- Miteru.cache
46
- end
47
-
48
- def cache_ex
49
- Miteru.config.cache_ex
50
- end
51
-
52
- def auto_download?
53
- Miteru.config.auto_download
54
- end
55
-
56
38
  #
57
- # @param [Miteru::Website] website
39
+ # @param [Miteru::Kit] kit
58
40
  #
59
- def notify(website)
41
+ def notify(kit)
60
42
  notifiers.each do |notifier|
61
- result = notifier.result(website)
43
+ result = notifier.result(kit)
62
44
  if result.success?
63
- Miteru.logger.info("Notifier:#{notifier.name} succeeded.")
45
+ logger.info("Notifier:#{notifier.name} succeeded.")
64
46
  else
65
- Miteru.logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
47
+ logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.")
66
48
  end
67
49
  end
68
50
  end
69
51
 
70
- #
71
- # @return [Array<Miteru::Notifiers::Base>]
72
- #
52
+ private
53
+
73
54
  def notifiers
74
55
  @notifiers ||= Miteru.notifiers.map(&:new)
75
56
  end
@@ -43,7 +43,7 @@ module Miteru
43
43
 
44
44
  # Record a kit in DB
45
45
  Record.create_by_kit_and_hash(kit, sha256: sha256(destination))
46
- Miteru.logger.info "Download #{kit.url} as #{destination}"
46
+ logger.info "Download #{kit.url} as #{destination}"
47
47
 
48
48
  destination
49
49
  end
data/lib/miteru/kit.rb CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  module Miteru
4
4
  class Kit < Service
5
+ include Concerns::UrlTruncatable
6
+
5
7
  # @return [String]
6
8
  attr_reader :url
7
9
 
@@ -85,17 +87,6 @@ module Miteru
85
87
  @hostname ||= URI(url).hostname
86
88
  end
87
89
 
88
- def decoded_url
89
- @decoded_url ||= URI.decode_www_form_component(url)
90
- end
91
-
92
- #
93
- # @return [String]
94
- #
95
- def truncated_url
96
- url.truncate(64)
97
- end
98
-
99
90
  private
100
91
 
101
92
  def filename_to_download
@@ -3,7 +3,7 @@
3
3
  module Miteru
4
4
  module Notifiers
5
5
  class Base < Service
6
- def call(website)
6
+ def call(kit)
7
7
  raise NotImplementedError
8
8
  end
9
9
 
@@ -77,13 +77,13 @@ module Miteru
77
77
  #
78
78
  # Notifiy to Slack
79
79
  #
80
- # @param [Miteru::Website] website
80
+ # @param [Miteru::Kit] kit
81
81
  #
82
- def call(website)
82
+ def call(kit)
83
83
  return unless callable?
84
84
 
85
- attachment = SlackAttachment.new(website.url)
86
- notifier.post(text: website.info, attachments: attachment.to_a) if website.kits?
85
+ attachment = SlackAttachment.new(kit.url)
86
+ notifier.post(text: kit.defanged_truncated_url, attachments: attachment.to_a)
87
87
  end
88
88
 
89
89
  def callable?
@@ -4,12 +4,12 @@ module Miteru
4
4
  module Notifiers
5
5
  class UrlScan < Base
6
6
  #
7
- # @param [Miteru::Website] website
7
+ # @param [Miteru::Kit] kit
8
8
  #
9
- def call(website)
9
+ def call(kit)
10
10
  return unless callable?
11
11
 
12
- website.kits.each { |kit| submit(kit.url, source: website.source) }
12
+ submit(kit.decoded_url, source: kit.source)
13
13
  end
14
14
 
15
15
  def callable?
@@ -3,24 +3,31 @@
3
3
  module Miteru
4
4
  class Orchestrator < Service
5
5
  def call
6
- Miteru.logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
6
+ logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
7
7
 
8
- if Miteru.sidekiq?
9
- non_cached_websites.each do |website|
10
- Jobs::CrawleJob.perform_async(website.url, website.source)
11
- Miteru.logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
12
- end
8
+ if sidekiq?
9
+ sidekiq_call
13
10
  else
14
- Miteru.logger.info("Use #{threads} thread(s).") if verbose?
15
- Parallel.each(non_cached_websites, in_threads: threads) do |website|
16
- Miteru.logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
11
+ parallel_call
12
+ end
13
+ end
17
14
 
18
- result = Crawler.result(website)
19
- if result.success?
20
- Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
21
- else
22
- Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
23
- end
15
+ def sidekiq_call
16
+ non_cached_websites.each do |website|
17
+ Jobs::CrawleJob.perform_async(website.url, website.source)
18
+ logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
19
+ end
20
+ end
21
+
22
+ def parallel_call
23
+ logger.info("Use #{threads} thread(s).") if verbose?
24
+ Parallel.each(non_cached_websites, in_threads: threads) do |website|
25
+ logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
26
+ result = Crawler.result(website)
27
+ if result.success?
28
+ logger.info("Crawler:#{website.truncated_url} succeeded.")
29
+ else
30
+ logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
24
31
  end
25
32
  end
26
33
  end
@@ -34,44 +41,33 @@ module Miteru
34
41
  result = feed.result
35
42
  if result.success?
36
43
  websites = result.value!
37
- Miteru.logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
44
+ logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose?
38
45
  out << websites
39
46
  else
40
- Miteru.logger.warn("Feed:#{feed.source} failed - #{result.failure}")
47
+ logger.warn("Feed:#{feed.source} failed - #{result.failure}")
41
48
  end
42
49
  end
43
50
  end.flatten.uniq(&:url)
44
51
  end
45
52
 
53
+ #
54
+ # @return [Array<Miteru::Website>]
55
+ #
46
56
  def non_cached_websites
47
- return websites unless cache?
48
-
49
- websites.reject { |website| cache.cached?(website.url) }
57
+ @non_cached_websites ||= [].tap do |out|
58
+ out << if cache?
59
+ websites.reject { |website| cache.cached?(website.url) }
60
+ else
61
+ websites
62
+ end
63
+ end.flatten.uniq(&:url)
50
64
  end
51
65
 
52
66
  #
53
67
  # @return [Array<Miteru::Feeds::Base>]
54
68
  #
55
69
  def feeds
56
- Miteru.feeds.map(&:new)
57
- end
58
-
59
- private
60
-
61
- def cache?
62
- Miteru.cache?
63
- end
64
-
65
- def cache
66
- Miteru.cache
67
- end
68
-
69
- def threads
70
- Miteru.config.threads
71
- end
72
-
73
- def verbose?
74
- Miteru.config.verbose
70
+ @feeds ||= Miteru.feeds.map(&:new)
75
71
  end
76
72
  end
77
73
  end
@@ -24,5 +24,49 @@ module Miteru
24
24
  new.result(...)
25
25
  end
26
26
  end
27
+
28
+ private
29
+
30
+ def auto_download?
31
+ Miteru.config.auto_download
32
+ end
33
+
34
+ #
35
+ # @return [SemanticLogger]
36
+ #
37
+ def logger
38
+ Miteru.logger
39
+ end
40
+
41
+ def cache?
42
+ Miteru.cache?
43
+ end
44
+
45
+ def sidekiq?
46
+ Miteru.sidekiq?
47
+ end
48
+
49
+ #
50
+ # @return [Miteru::Cache]
51
+ #
52
+ def cache
53
+ Miteru.cache
54
+ end
55
+
56
+ def threads
57
+ Miteru.config.threads
58
+ end
59
+
60
+ def verbose?
61
+ Miteru.config.verbose
62
+ end
63
+
64
+ def cache_prefix
65
+ Miteru.config.cache_prefix
66
+ end
67
+
68
+ def cache_ex
69
+ Miteru.config.cache_ex
70
+ end
27
71
  end
28
72
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "2.2.0"
4
+ VERSION = "2.2.1"
5
5
  end
@@ -4,6 +4,8 @@ require "oga"
4
4
 
5
5
  module Miteru
6
6
  class Website < Service
7
+ include Concerns::UrlTruncatable
8
+
7
9
  # @return [String]
8
10
  attr_reader :url
9
11
 
@@ -41,17 +43,6 @@ module Miteru
41
43
  (href_links + possible_file_links).compact.uniq
42
44
  end
43
45
 
44
- #
45
- # @return [String]
46
- #
47
- def truncated_url
48
- url.truncate(64)
49
- end
50
-
51
- def defanged_truncated_url
52
- truncated_url.to_s.gsub(".", "[.]")
53
- end
54
-
55
46
  def info
56
47
  "#{defanged_truncated_url} has #{kits.length} kit(s) (Source: #{source})"
57
48
  end
data/lib/miteru.rb CHANGED
@@ -31,6 +31,7 @@ require "miteru/errors"
31
31
  # Concerns
32
32
  require "miteru/concerns/database_connectable"
33
33
  require "miteru/concerns/error_unwrappable"
34
+ require "miteru/concerns/url_truncatable"
34
35
 
35
36
  # Core classes
36
37
  require "miteru/service"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-02-08 00:00:00.000000000 Z
11
+ date: 2024-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -590,6 +590,7 @@ files:
590
590
  - lib/miteru/commands/web.rb
591
591
  - lib/miteru/concerns/database_connectable.rb
592
592
  - lib/miteru/concerns/error_unwrappable.rb
593
+ - lib/miteru/concerns/url_truncatable.rb
593
594
  - lib/miteru/config.rb
594
595
  - lib/miteru/crawler.rb
595
596
  - lib/miteru/database.rb