miteru 2.3.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dd3ba5f3cd08b4af7eb269c458323f65214491a6111a4d5277b0102913629a77
4
- data.tar.gz: 47542263b5b2e60e5dfadf149d15bac3446f00b5e8aec15e9c83f60629328546
3
+ metadata.gz: 5d1c74e7ec6c2b56b4fa872ab3d7acb5c69be947a343eed838382bfb1dbc9e16
4
+ data.tar.gz: e57575a1fd2d5a6a859e045209b30ed1f6b80aef9e930a1146bfb2784c16bcd7
5
5
  SHA512:
6
- metadata.gz: '028f612c45b87b4b9ec30cffad2168ec6b3983756e9bdd865835a38ba3c9533ccbe6238240f263f9f1cbf460cad8347e3ed1ec00623ef6cdbb1cfa4ebcf5da47'
7
- data.tar.gz: bc1a770f94eda268ce90735a80d6b45fd6e2f3a900300de52c1499debf15d5e6f7e705d8e7bee068a5234d0f7cf744f6167a50c61914438be78fb68bed38a8e9
6
+ metadata.gz: 4da382b9c07c6a9ad1961487335eeea1509b1d82586dddc718c753a94ca63523cb460f948db8fc26dff231fdf151c418d207bcbce4edb435d72e4bd996f7e655
7
+ data.tar.gz: 475e3848581d32070621018992343a160f1133002f0d240204730d1342be0daddd5048d511424f824b56953935ebb3b04efe37b2ead84d46c85bd57794ce9002
@@ -35,7 +35,7 @@ jobs:
35
35
  --health-retries=3
36
36
  strategy:
37
37
  matrix:
38
- ruby: [3.1, 3.2, 3.3]
38
+ ruby: [3.2, 3.3]
39
39
  steps:
40
40
  - uses: actions/checkout@v4
41
41
  - name: Set up Ruby
@@ -49,10 +49,8 @@ jobs:
49
49
  - name: Test with PostgreSQL
50
50
  env:
51
51
  MITERU_DATABASE: postgresql://postgres:postgres@localhost:5432/test
52
- run: |
53
- bundle exec rake
52
+ run: bundle exec rake
54
53
  - name: Test with MySQL
55
54
  env:
56
55
  MITERU_DATABASE: mysql2://mysql:mysql@127.0.0.1:3306/test
57
- run: |
58
- bundle exec rake
56
+ run: bundle exec rake
@@ -13,7 +13,6 @@ module Miteru
13
13
  method_option :directory_traveling, type: :boolean, default: false,
14
14
  desc: "Enable or disable directory traveling"
15
15
  method_option :download_to, type: :string, default: "/tmp", desc: "Directory to download phishing kits"
16
- method_option :threads, type: :numeric, desc: "Number of threads to use", default: Parallel.processor_count
17
16
  method_option :verbose, type: :boolean, default: true
18
17
  desc "execute", "Execute the crawler"
19
18
  around :with_db_connection
@@ -22,10 +21,8 @@ module Miteru
22
21
  config.auto_download = options["auto_download"]
23
22
  config.directory_traveling = options["directory_traveling"]
24
23
  config.download_to = options["download_to"]
25
- config.threads = options["threads"]
26
24
  config.verbose = options["verbose"]
27
25
  end
28
-
29
26
  Orchestrator.call
30
27
  end
31
28
  default_command :execute
data/lib/miteru/config.rb CHANGED
@@ -26,14 +26,15 @@ module Miteru
26
26
  download_timeout: 60,
27
27
  sentry_dsn: nil,
28
28
  sentry_trace_sample_rate: 0.25,
29
- sidekiq_redis_url: nil,
29
+ sidekiq_redis_url: "redis://localhost:6379",
30
30
  sidekiq_job_retry: 0,
31
+ sidekiq_batch_size: 50,
32
+ sidekiq_job_timeout: 600,
31
33
  cache_redis_url: nil,
32
34
  cache_ex: nil,
33
35
  cache_prefix: "miteru:cache",
34
36
  slack_channel: "#general",
35
37
  slack_webhook_url: nil,
36
- threads: Parallel.processor_count,
37
38
  urlscan_api_key: nil,
38
39
  urlscan_submit_visibility: "public",
39
40
  urlscan_date_condition: "date:>now-1h",
@@ -48,11 +49,17 @@ module Miteru
48
49
  # @return [Float]
49
50
 
50
51
  # @!attribute [r] sidekiq_redis_url
51
- # @return [String, nil]
52
+ # @return [String]
52
53
 
53
54
  # @!attribute [r] sidekiq_job_retry
54
55
  # @return [Integer]
55
56
 
57
+ # @!attribute [r] sidekiq_batch_size
58
+ # @return [Integer]
59
+
60
+ # @!attribute [r] sidekiq_job_timeout
61
+ # @return [Integer]
62
+
56
63
  # @!attribute [r] cache_redis_url
57
64
  # @return [String, nil]
58
65
 
@@ -80,9 +87,6 @@ module Miteru
80
87
  # @!attribute [rw] download_to
81
88
  # @return [String]
82
89
 
83
- # @!attribute [rw] threads
84
- # @return [Integer]
85
-
86
90
  # @!attribute [r] cache_redis_url
87
91
  # @return [String, nil]
88
92
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class V2Schema < ActiveRecord::Migration[7.2]
3
+ class V2Schema < ActiveRecord::Migration[8.0]
4
4
  def change
5
5
  create_table :records, if_not_exists: true do |t|
6
6
  t.string :sha256, null: false, index: {unique: true}
@@ -4,32 +4,8 @@ module Miteru
4
4
  class Orchestrator < Service
5
5
  def call
6
6
  logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
7
-
8
- if sidekiq?
9
- sidekiq_call
10
- else
11
- parallel_call
12
- end
13
- end
14
-
15
- def sidekiq_call
16
- non_cached_websites.each do |website|
17
- Jobs::CrawleJob.perform_async(website.url, website.source)
18
- logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
19
- end
20
- end
21
-
22
- def parallel_call
23
- logger.info("Use #{threads} thread(s).") if verbose?
24
- Parallel.each(non_cached_websites, in_threads: threads) do |website|
25
- logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
26
- result = Crawler.result(website)
27
- if result.success?
28
- logger.info("Crawler:#{website.truncated_url} succeeded.")
29
- else
30
- logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
31
- end
32
- end
7
+ array_of_args = non_cached_websites.map { |website| [website.url, website.source] }
8
+ Jobs::CrawleJob.perform_bulk(array_of_args, batch_size: Miteru.config.sidekiq_batch_size)
33
9
  end
34
10
 
35
11
  #
@@ -6,7 +6,10 @@ require "miteru/sidekiq/jobs"
6
6
 
7
7
  Sidekiq.configure_server do |config|
8
8
  config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
9
- config.default_job_options = {"retry" => Miteru.config.sidekiq_job_retry}
9
+ config.default_job_options = {
10
+ retry: Miteru.config.sidekiq_job_retry,
11
+ expires_in: 0.second
12
+ }
10
13
  end
11
14
 
12
15
  Sidekiq.configure_client do |config|
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sidekiq"
4
+ require "timeout"
4
5
 
5
6
  module Miteru
6
7
  module Jobs
@@ -14,12 +15,15 @@ module Miteru
14
15
  #
15
16
  def perform(url, source)
16
17
  website = Miteru::Website.new(url, source:)
18
+
17
19
  with_db_connection do
18
- result = Crawler.result(website)
19
- if result.success?
20
- Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
21
- else
22
- Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
20
+ Timeout.timeout(Miteru.config.sidekiq_job_timeout) do
21
+ result = Crawler.result(website)
22
+ if result.success?
23
+ Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
24
+ else
25
+ Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
26
+ end
23
27
  end
24
28
  end
25
29
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "2.3.2"
4
+ VERSION = "3.0.0"
5
5
  end
data/lib/miteru.rb CHANGED
@@ -9,7 +9,6 @@ require "uuidtools"
9
9
  # Core 3rd party libraries
10
10
  require "colorize"
11
11
  require "memo_wise"
12
- require "parallel"
13
12
  require "semantic_logger"
14
13
  require "sentry-ruby"
15
14
 
@@ -108,13 +107,6 @@ module Miteru
108
107
  @cache ||= Cache.new(Miteru.config.cache_redis_url)
109
108
  end
110
109
 
111
- #
112
- # @return [Boolean]
113
- #
114
- def sidekiq?
115
- !Miteru.config.sidekiq_redis_url.nil?
116
- end
117
-
118
110
  def sentry?
119
111
  !Miteru.config.sentry_dsn.nil?
120
112
  end
data/miteru.gemspec CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
16
16
  spec.homepage = "https://github.com/ninoseki/miteru"
17
17
  spec.license = "MIT"
18
18
 
19
+ spec.required_ruby_version = ">= 3.2"
20
+
19
21
  # Specify which files should be added to the gem when it is released.
20
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
23
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
@@ -30,38 +32,37 @@ Gem::Specification.new do |spec|
30
32
  spec.add_development_dependency "coveralls_reborn", "~> 0.28"
31
33
  spec.add_development_dependency "fuubar", "~> 2.5.1"
32
34
  spec.add_development_dependency "mysql2", "~> 0.5.6"
33
- spec.add_development_dependency "pg", "~> 1.5.8"
35
+ spec.add_development_dependency "pg", "~> 1.5.9"
34
36
  spec.add_development_dependency "rake", "~> 13.2.1"
35
37
  spec.add_development_dependency "rspec", "~> 3.13"
36
38
  spec.add_development_dependency "simplecov-lcov", "~> 0.8"
37
- spec.add_development_dependency "standard", "~> 1.40.0"
39
+ spec.add_development_dependency "standard", "~> 1.42.0"
38
40
  spec.add_development_dependency "test-prof", "~> 1.4.2"
39
41
  spec.add_development_dependency "vcr", "~> 6.3.1"
40
- spec.add_development_dependency "webmock", "~> 3.23.1"
42
+ spec.add_development_dependency "webmock", "~> 3.24.0"
41
43
 
42
- spec.add_dependency "activerecord", "7.2.1"
44
+ spec.add_dependency "activerecord", "8.0.0"
43
45
  spec.add_dependency "addressable", "2.8.7"
44
46
  spec.add_dependency "anyway_config", "2.6.4"
45
47
  spec.add_dependency "colorize", "1.1.0"
46
- spec.add_dependency "dotenv", "3.1.2"
48
+ spec.add_dependency "dotenv", "3.1.4"
47
49
  spec.add_dependency "down", "5.4.2"
48
50
  spec.add_dependency "dry-files", "1.1.0"
49
51
  spec.add_dependency "dry-monads", "1.6.0"
50
52
  spec.add_dependency "http", "5.2.0"
51
- spec.add_dependency "memo_wise", "1.9.0"
53
+ spec.add_dependency "memo_wise", "1.10.0"
52
54
  spec.add_dependency "oga", "3.4"
53
- spec.add_dependency "parallel", "1.26.3"
54
- spec.add_dependency "puma", "6.4.2"
55
- spec.add_dependency "rack", "3.1.7"
55
+ spec.add_dependency "puma", "6.4.3"
56
+ spec.add_dependency "rack", "3.1.8"
56
57
  spec.add_dependency "rack-session", "2.0.0"
57
- spec.add_dependency "rackup", "2.1.0"
58
+ spec.add_dependency "rackup", "2.2.1"
58
59
  spec.add_dependency "redis", "5.3.0"
59
60
  spec.add_dependency "semantic_logger", "4.16.1"
60
- spec.add_dependency "sentry-ruby", "5.19.0"
61
- spec.add_dependency "sentry-sidekiq", "5.19.0"
62
- spec.add_dependency "sidekiq", "7.3.2"
61
+ spec.add_dependency "sentry-ruby", "5.21.0"
62
+ spec.add_dependency "sentry-sidekiq", "5.21.0"
63
+ spec.add_dependency "sidekiq", "7.3.5"
63
64
  spec.add_dependency "slack-notifier", "2.4.0"
64
- spec.add_dependency "sqlite3", "1.7.2"
65
+ spec.add_dependency "sqlite3", "2.2.0"
65
66
  spec.add_dependency "thor", "1.3.2"
66
67
  spec.add_dependency "thor-hollaback", "0.2.1"
67
68
  spec.add_dependency "uuidtools", "2.2.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-09-10 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 1.5.8
89
+ version: 1.5.9
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 1.5.8
96
+ version: 1.5.9
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rake
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: 1.40.0
145
+ version: 1.42.0
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: 1.40.0
152
+ version: 1.42.0
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: test-prof
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -184,28 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: 3.23.1
187
+ version: 3.24.0
188
188
  type: :development
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
- version: 3.23.1
194
+ version: 3.24.0
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: activerecord
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - '='
200
200
  - !ruby/object:Gem::Version
201
- version: 7.2.1
201
+ version: 8.0.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - '='
207
207
  - !ruby/object:Gem::Version
208
- version: 7.2.1
208
+ version: 8.0.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: addressable
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -254,14 +254,14 @@ dependencies:
254
254
  requirements:
255
255
  - - '='
256
256
  - !ruby/object:Gem::Version
257
- version: 3.1.2
257
+ version: 3.1.4
258
258
  type: :runtime
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - '='
263
263
  - !ruby/object:Gem::Version
264
- version: 3.1.2
264
+ version: 3.1.4
265
265
  - !ruby/object:Gem::Dependency
266
266
  name: down
267
267
  requirement: !ruby/object:Gem::Requirement
@@ -324,14 +324,14 @@ dependencies:
324
324
  requirements:
325
325
  - - '='
326
326
  - !ruby/object:Gem::Version
327
- version: 1.9.0
327
+ version: 1.10.0
328
328
  type: :runtime
329
329
  prerelease: false
330
330
  version_requirements: !ruby/object:Gem::Requirement
331
331
  requirements:
332
332
  - - '='
333
333
  - !ruby/object:Gem::Version
334
- version: 1.9.0
334
+ version: 1.10.0
335
335
  - !ruby/object:Gem::Dependency
336
336
  name: oga
337
337
  requirement: !ruby/object:Gem::Requirement
@@ -346,48 +346,34 @@ dependencies:
346
346
  - - '='
347
347
  - !ruby/object:Gem::Version
348
348
  version: '3.4'
349
- - !ruby/object:Gem::Dependency
350
- name: parallel
351
- requirement: !ruby/object:Gem::Requirement
352
- requirements:
353
- - - '='
354
- - !ruby/object:Gem::Version
355
- version: 1.26.3
356
- type: :runtime
357
- prerelease: false
358
- version_requirements: !ruby/object:Gem::Requirement
359
- requirements:
360
- - - '='
361
- - !ruby/object:Gem::Version
362
- version: 1.26.3
363
349
  - !ruby/object:Gem::Dependency
364
350
  name: puma
365
351
  requirement: !ruby/object:Gem::Requirement
366
352
  requirements:
367
353
  - - '='
368
354
  - !ruby/object:Gem::Version
369
- version: 6.4.2
355
+ version: 6.4.3
370
356
  type: :runtime
371
357
  prerelease: false
372
358
  version_requirements: !ruby/object:Gem::Requirement
373
359
  requirements:
374
360
  - - '='
375
361
  - !ruby/object:Gem::Version
376
- version: 6.4.2
362
+ version: 6.4.3
377
363
  - !ruby/object:Gem::Dependency
378
364
  name: rack
379
365
  requirement: !ruby/object:Gem::Requirement
380
366
  requirements:
381
367
  - - '='
382
368
  - !ruby/object:Gem::Version
383
- version: 3.1.7
369
+ version: 3.1.8
384
370
  type: :runtime
385
371
  prerelease: false
386
372
  version_requirements: !ruby/object:Gem::Requirement
387
373
  requirements:
388
374
  - - '='
389
375
  - !ruby/object:Gem::Version
390
- version: 3.1.7
376
+ version: 3.1.8
391
377
  - !ruby/object:Gem::Dependency
392
378
  name: rack-session
393
379
  requirement: !ruby/object:Gem::Requirement
@@ -408,14 +394,14 @@ dependencies:
408
394
  requirements:
409
395
  - - '='
410
396
  - !ruby/object:Gem::Version
411
- version: 2.1.0
397
+ version: 2.2.1
412
398
  type: :runtime
413
399
  prerelease: false
414
400
  version_requirements: !ruby/object:Gem::Requirement
415
401
  requirements:
416
402
  - - '='
417
403
  - !ruby/object:Gem::Version
418
- version: 2.1.0
404
+ version: 2.2.1
419
405
  - !ruby/object:Gem::Dependency
420
406
  name: redis
421
407
  requirement: !ruby/object:Gem::Requirement
@@ -450,42 +436,42 @@ dependencies:
450
436
  requirements:
451
437
  - - '='
452
438
  - !ruby/object:Gem::Version
453
- version: 5.19.0
439
+ version: 5.21.0
454
440
  type: :runtime
455
441
  prerelease: false
456
442
  version_requirements: !ruby/object:Gem::Requirement
457
443
  requirements:
458
444
  - - '='
459
445
  - !ruby/object:Gem::Version
460
- version: 5.19.0
446
+ version: 5.21.0
461
447
  - !ruby/object:Gem::Dependency
462
448
  name: sentry-sidekiq
463
449
  requirement: !ruby/object:Gem::Requirement
464
450
  requirements:
465
451
  - - '='
466
452
  - !ruby/object:Gem::Version
467
- version: 5.19.0
453
+ version: 5.21.0
468
454
  type: :runtime
469
455
  prerelease: false
470
456
  version_requirements: !ruby/object:Gem::Requirement
471
457
  requirements:
472
458
  - - '='
473
459
  - !ruby/object:Gem::Version
474
- version: 5.19.0
460
+ version: 5.21.0
475
461
  - !ruby/object:Gem::Dependency
476
462
  name: sidekiq
477
463
  requirement: !ruby/object:Gem::Requirement
478
464
  requirements:
479
465
  - - '='
480
466
  - !ruby/object:Gem::Version
481
- version: 7.3.2
467
+ version: 7.3.5
482
468
  type: :runtime
483
469
  prerelease: false
484
470
  version_requirements: !ruby/object:Gem::Requirement
485
471
  requirements:
486
472
  - - '='
487
473
  - !ruby/object:Gem::Version
488
- version: 7.3.2
474
+ version: 7.3.5
489
475
  - !ruby/object:Gem::Dependency
490
476
  name: slack-notifier
491
477
  requirement: !ruby/object:Gem::Requirement
@@ -506,14 +492,14 @@ dependencies:
506
492
  requirements:
507
493
  - - '='
508
494
  - !ruby/object:Gem::Version
509
- version: 1.7.2
495
+ version: 2.2.0
510
496
  type: :runtime
511
497
  prerelease: false
512
498
  version_requirements: !ruby/object:Gem::Requirement
513
499
  requirements:
514
500
  - - '='
515
501
  - !ruby/object:Gem::Version
516
- version: 1.7.2
502
+ version: 2.2.0
517
503
  - !ruby/object:Gem::Dependency
518
504
  name: thor
519
505
  requirement: !ruby/object:Gem::Requirement
@@ -632,14 +618,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
632
618
  requirements:
633
619
  - - ">="
634
620
  - !ruby/object:Gem::Version
635
- version: '0'
621
+ version: '3.2'
636
622
  required_rubygems_version: !ruby/object:Gem::Requirement
637
623
  requirements:
638
624
  - - ">="
639
625
  - !ruby/object:Gem::Version
640
626
  version: '0'
641
627
  requirements: []
642
- rubygems_version: 3.5.16
628
+ rubygems_version: 3.5.22
643
629
  signing_key:
644
630
  specification_version: 4
645
631
  summary: A phishing kit collector for scavengers