miteru 2.3.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dd3ba5f3cd08b4af7eb269c458323f65214491a6111a4d5277b0102913629a77
4
- data.tar.gz: 47542263b5b2e60e5dfadf149d15bac3446f00b5e8aec15e9c83f60629328546
3
+ metadata.gz: 5d1c74e7ec6c2b56b4fa872ab3d7acb5c69be947a343eed838382bfb1dbc9e16
4
+ data.tar.gz: e57575a1fd2d5a6a859e045209b30ed1f6b80aef9e930a1146bfb2784c16bcd7
5
5
  SHA512:
6
- metadata.gz: '028f612c45b87b4b9ec30cffad2168ec6b3983756e9bdd865835a38ba3c9533ccbe6238240f263f9f1cbf460cad8347e3ed1ec00623ef6cdbb1cfa4ebcf5da47'
7
- data.tar.gz: bc1a770f94eda268ce90735a80d6b45fd6e2f3a900300de52c1499debf15d5e6f7e705d8e7bee068a5234d0f7cf744f6167a50c61914438be78fb68bed38a8e9
6
+ metadata.gz: 4da382b9c07c6a9ad1961487335eeea1509b1d82586dddc718c753a94ca63523cb460f948db8fc26dff231fdf151c418d207bcbce4edb435d72e4bd996f7e655
7
+ data.tar.gz: 475e3848581d32070621018992343a160f1133002f0d240204730d1342be0daddd5048d511424f824b56953935ebb3b04efe37b2ead84d46c85bd57794ce9002
@@ -35,7 +35,7 @@ jobs:
35
35
  --health-retries=3
36
36
  strategy:
37
37
  matrix:
38
- ruby: [3.1, 3.2, 3.3]
38
+ ruby: [3.2, 3.3]
39
39
  steps:
40
40
  - uses: actions/checkout@v4
41
41
  - name: Set up Ruby
@@ -49,10 +49,8 @@ jobs:
49
49
  - name: Test with PostgreSQL
50
50
  env:
51
51
  MITERU_DATABASE: postgresql://postgres:postgres@localhost:5432/test
52
- run: |
53
- bundle exec rake
52
+ run: bundle exec rake
54
53
  - name: Test with MySQL
55
54
  env:
56
55
  MITERU_DATABASE: mysql2://mysql:mysql@127.0.0.1:3306/test
57
- run: |
58
- bundle exec rake
56
+ run: bundle exec rake
@@ -13,7 +13,6 @@ module Miteru
13
13
  method_option :directory_traveling, type: :boolean, default: false,
14
14
  desc: "Enable or disable directory traveling"
15
15
  method_option :download_to, type: :string, default: "/tmp", desc: "Directory to download phishing kits"
16
- method_option :threads, type: :numeric, desc: "Number of threads to use", default: Parallel.processor_count
17
16
  method_option :verbose, type: :boolean, default: true
18
17
  desc "execute", "Execute the crawler"
19
18
  around :with_db_connection
@@ -22,10 +21,8 @@ module Miteru
22
21
  config.auto_download = options["auto_download"]
23
22
  config.directory_traveling = options["directory_traveling"]
24
23
  config.download_to = options["download_to"]
25
- config.threads = options["threads"]
26
24
  config.verbose = options["verbose"]
27
25
  end
28
-
29
26
  Orchestrator.call
30
27
  end
31
28
  default_command :execute
data/lib/miteru/config.rb CHANGED
@@ -26,14 +26,15 @@ module Miteru
26
26
  download_timeout: 60,
27
27
  sentry_dsn: nil,
28
28
  sentry_trace_sample_rate: 0.25,
29
- sidekiq_redis_url: nil,
29
+ sidekiq_redis_url: "redis://localhost:6379",
30
30
  sidekiq_job_retry: 0,
31
+ sidekiq_batch_size: 50,
32
+ sidekiq_job_timeout: 600,
31
33
  cache_redis_url: nil,
32
34
  cache_ex: nil,
33
35
  cache_prefix: "miteru:cache",
34
36
  slack_channel: "#general",
35
37
  slack_webhook_url: nil,
36
- threads: Parallel.processor_count,
37
38
  urlscan_api_key: nil,
38
39
  urlscan_submit_visibility: "public",
39
40
  urlscan_date_condition: "date:>now-1h",
@@ -48,11 +49,17 @@ module Miteru
48
49
  # @return [Float]
49
50
 
50
51
  # @!attribute [r] sidekiq_redis_url
51
- # @return [String, nil]
52
+ # @return [String]
52
53
 
53
54
  # @!attribute [r] sidekiq_job_retry
54
55
  # @return [Integer]
55
56
 
57
+ # @!attribute [r] sidekiq_batch_size
58
+ # @return [Integer]
59
+
60
+ # @!attribute [r] sidekiq_job_timeout
61
+ # @return [Integer]
62
+
56
63
  # @!attribute [r] cache_redis_url
57
64
  # @return [String, nil]
58
65
 
@@ -80,9 +87,6 @@ module Miteru
80
87
  # @!attribute [rw] download_to
81
88
  # @return [String]
82
89
 
83
- # @!attribute [rw] threads
84
- # @return [Integer]
85
-
86
90
  # @!attribute [r] cache_redis_url
87
91
  # @return [String, nil]
88
92
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class V2Schema < ActiveRecord::Migration[7.2]
3
+ class V2Schema < ActiveRecord::Migration[8.0]
4
4
  def change
5
5
  create_table :records, if_not_exists: true do |t|
6
6
  t.string :sha256, null: false, index: {unique: true}
@@ -4,32 +4,8 @@ module Miteru
4
4
  class Orchestrator < Service
5
5
  def call
6
6
  logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose?
7
-
8
- if sidekiq?
9
- sidekiq_call
10
- else
11
- parallel_call
12
- end
13
- end
14
-
15
- def sidekiq_call
16
- non_cached_websites.each do |website|
17
- Jobs::CrawleJob.perform_async(website.url, website.source)
18
- logger.info("Website:#{website.truncated_url} crawler job queued.") if verbose?
19
- end
20
- end
21
-
22
- def parallel_call
23
- logger.info("Use #{threads} thread(s).") if verbose?
24
- Parallel.each(non_cached_websites, in_threads: threads) do |website|
25
- logger.info("Website:#{website.truncated_url} crawling started.") if verbose?
26
- result = Crawler.result(website)
27
- if result.success?
28
- logger.info("Crawler:#{website.truncated_url} succeeded.")
29
- else
30
- logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
31
- end
32
- end
7
+ array_of_args = non_cached_websites.map { |website| [website.url, website.source] }
8
+ Jobs::CrawleJob.perform_bulk(array_of_args, batch_size: Miteru.config.sidekiq_batch_size)
33
9
  end
34
10
 
35
11
  #
@@ -6,7 +6,10 @@ require "miteru/sidekiq/jobs"
6
6
 
7
7
  Sidekiq.configure_server do |config|
8
8
  config.redis = {url: Miteru.config.sidekiq_redis_url.to_s}
9
- config.default_job_options = {"retry" => Miteru.config.sidekiq_job_retry}
9
+ config.default_job_options = {
10
+ retry: Miteru.config.sidekiq_job_retry,
11
+ expires_in: 0.second
12
+ }
10
13
  end
11
14
 
12
15
  Sidekiq.configure_client do |config|
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "sidekiq"
4
+ require "timeout"
4
5
 
5
6
  module Miteru
6
7
  module Jobs
@@ -14,12 +15,15 @@ module Miteru
14
15
  #
15
16
  def perform(url, source)
16
17
  website = Miteru::Website.new(url, source:)
18
+
17
19
  with_db_connection do
18
- result = Crawler.result(website)
19
- if result.success?
20
- Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
21
- else
22
- Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
20
+ Timeout.timeout(Miteru.config.sidekiq_job_timeout) do
21
+ result = Crawler.result(website)
22
+ if result.success?
23
+ Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.")
24
+ else
25
+ Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.")
26
+ end
23
27
  end
24
28
  end
25
29
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Miteru
4
- VERSION = "2.3.2"
4
+ VERSION = "3.0.0"
5
5
  end
data/lib/miteru.rb CHANGED
@@ -9,7 +9,6 @@ require "uuidtools"
9
9
  # Core 3rd party libraries
10
10
  require "colorize"
11
11
  require "memo_wise"
12
- require "parallel"
13
12
  require "semantic_logger"
14
13
  require "sentry-ruby"
15
14
 
@@ -108,13 +107,6 @@ module Miteru
108
107
  @cache ||= Cache.new(Miteru.config.cache_redis_url)
109
108
  end
110
109
 
111
- #
112
- # @return [Boolean]
113
- #
114
- def sidekiq?
115
- !Miteru.config.sidekiq_redis_url.nil?
116
- end
117
-
118
110
  def sentry?
119
111
  !Miteru.config.sentry_dsn.nil?
120
112
  end
data/miteru.gemspec CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |spec|
16
16
  spec.homepage = "https://github.com/ninoseki/miteru"
17
17
  spec.license = "MIT"
18
18
 
19
+ spec.required_ruby_version = ">= 3.2"
20
+
19
21
  # Specify which files should be added to the gem when it is released.
20
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
21
23
  spec.files = Dir.chdir(File.expand_path(__dir__)) do
@@ -30,38 +32,37 @@ Gem::Specification.new do |spec|
30
32
  spec.add_development_dependency "coveralls_reborn", "~> 0.28"
31
33
  spec.add_development_dependency "fuubar", "~> 2.5.1"
32
34
  spec.add_development_dependency "mysql2", "~> 0.5.6"
33
- spec.add_development_dependency "pg", "~> 1.5.8"
35
+ spec.add_development_dependency "pg", "~> 1.5.9"
34
36
  spec.add_development_dependency "rake", "~> 13.2.1"
35
37
  spec.add_development_dependency "rspec", "~> 3.13"
36
38
  spec.add_development_dependency "simplecov-lcov", "~> 0.8"
37
- spec.add_development_dependency "standard", "~> 1.40.0"
39
+ spec.add_development_dependency "standard", "~> 1.42.0"
38
40
  spec.add_development_dependency "test-prof", "~> 1.4.2"
39
41
  spec.add_development_dependency "vcr", "~> 6.3.1"
40
- spec.add_development_dependency "webmock", "~> 3.23.1"
42
+ spec.add_development_dependency "webmock", "~> 3.24.0"
41
43
 
42
- spec.add_dependency "activerecord", "7.2.1"
44
+ spec.add_dependency "activerecord", "8.0.0"
43
45
  spec.add_dependency "addressable", "2.8.7"
44
46
  spec.add_dependency "anyway_config", "2.6.4"
45
47
  spec.add_dependency "colorize", "1.1.0"
46
- spec.add_dependency "dotenv", "3.1.2"
48
+ spec.add_dependency "dotenv", "3.1.4"
47
49
  spec.add_dependency "down", "5.4.2"
48
50
  spec.add_dependency "dry-files", "1.1.0"
49
51
  spec.add_dependency "dry-monads", "1.6.0"
50
52
  spec.add_dependency "http", "5.2.0"
51
- spec.add_dependency "memo_wise", "1.9.0"
53
+ spec.add_dependency "memo_wise", "1.10.0"
52
54
  spec.add_dependency "oga", "3.4"
53
- spec.add_dependency "parallel", "1.26.3"
54
- spec.add_dependency "puma", "6.4.2"
55
- spec.add_dependency "rack", "3.1.7"
55
+ spec.add_dependency "puma", "6.4.3"
56
+ spec.add_dependency "rack", "3.1.8"
56
57
  spec.add_dependency "rack-session", "2.0.0"
57
- spec.add_dependency "rackup", "2.1.0"
58
+ spec.add_dependency "rackup", "2.2.1"
58
59
  spec.add_dependency "redis", "5.3.0"
59
60
  spec.add_dependency "semantic_logger", "4.16.1"
60
- spec.add_dependency "sentry-ruby", "5.19.0"
61
- spec.add_dependency "sentry-sidekiq", "5.19.0"
62
- spec.add_dependency "sidekiq", "7.3.2"
61
+ spec.add_dependency "sentry-ruby", "5.21.0"
62
+ spec.add_dependency "sentry-sidekiq", "5.21.0"
63
+ spec.add_dependency "sidekiq", "7.3.5"
63
64
  spec.add_dependency "slack-notifier", "2.4.0"
64
- spec.add_dependency "sqlite3", "1.7.2"
65
+ spec.add_dependency "sqlite3", "2.2.0"
65
66
  spec.add_dependency "thor", "1.3.2"
66
67
  spec.add_dependency "thor-hollaback", "0.2.1"
67
68
  spec.add_dependency "uuidtools", "2.2.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miteru
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.2
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manabu Niseki
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-09-10 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 1.5.8
89
+ version: 1.5.9
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 1.5.8
96
+ version: 1.5.9
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rake
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +142,14 @@ dependencies:
142
142
  requirements:
143
143
  - - "~>"
144
144
  - !ruby/object:Gem::Version
145
- version: 1.40.0
145
+ version: 1.42.0
146
146
  type: :development
147
147
  prerelease: false
148
148
  version_requirements: !ruby/object:Gem::Requirement
149
149
  requirements:
150
150
  - - "~>"
151
151
  - !ruby/object:Gem::Version
152
- version: 1.40.0
152
+ version: 1.42.0
153
153
  - !ruby/object:Gem::Dependency
154
154
  name: test-prof
155
155
  requirement: !ruby/object:Gem::Requirement
@@ -184,28 +184,28 @@ dependencies:
184
184
  requirements:
185
185
  - - "~>"
186
186
  - !ruby/object:Gem::Version
187
- version: 3.23.1
187
+ version: 3.24.0
188
188
  type: :development
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
192
  - - "~>"
193
193
  - !ruby/object:Gem::Version
194
- version: 3.23.1
194
+ version: 3.24.0
195
195
  - !ruby/object:Gem::Dependency
196
196
  name: activerecord
197
197
  requirement: !ruby/object:Gem::Requirement
198
198
  requirements:
199
199
  - - '='
200
200
  - !ruby/object:Gem::Version
201
- version: 7.2.1
201
+ version: 8.0.0
202
202
  type: :runtime
203
203
  prerelease: false
204
204
  version_requirements: !ruby/object:Gem::Requirement
205
205
  requirements:
206
206
  - - '='
207
207
  - !ruby/object:Gem::Version
208
- version: 7.2.1
208
+ version: 8.0.0
209
209
  - !ruby/object:Gem::Dependency
210
210
  name: addressable
211
211
  requirement: !ruby/object:Gem::Requirement
@@ -254,14 +254,14 @@ dependencies:
254
254
  requirements:
255
255
  - - '='
256
256
  - !ruby/object:Gem::Version
257
- version: 3.1.2
257
+ version: 3.1.4
258
258
  type: :runtime
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - '='
263
263
  - !ruby/object:Gem::Version
264
- version: 3.1.2
264
+ version: 3.1.4
265
265
  - !ruby/object:Gem::Dependency
266
266
  name: down
267
267
  requirement: !ruby/object:Gem::Requirement
@@ -324,14 +324,14 @@ dependencies:
324
324
  requirements:
325
325
  - - '='
326
326
  - !ruby/object:Gem::Version
327
- version: 1.9.0
327
+ version: 1.10.0
328
328
  type: :runtime
329
329
  prerelease: false
330
330
  version_requirements: !ruby/object:Gem::Requirement
331
331
  requirements:
332
332
  - - '='
333
333
  - !ruby/object:Gem::Version
334
- version: 1.9.0
334
+ version: 1.10.0
335
335
  - !ruby/object:Gem::Dependency
336
336
  name: oga
337
337
  requirement: !ruby/object:Gem::Requirement
@@ -346,48 +346,34 @@ dependencies:
346
346
  - - '='
347
347
  - !ruby/object:Gem::Version
348
348
  version: '3.4'
349
- - !ruby/object:Gem::Dependency
350
- name: parallel
351
- requirement: !ruby/object:Gem::Requirement
352
- requirements:
353
- - - '='
354
- - !ruby/object:Gem::Version
355
- version: 1.26.3
356
- type: :runtime
357
- prerelease: false
358
- version_requirements: !ruby/object:Gem::Requirement
359
- requirements:
360
- - - '='
361
- - !ruby/object:Gem::Version
362
- version: 1.26.3
363
349
  - !ruby/object:Gem::Dependency
364
350
  name: puma
365
351
  requirement: !ruby/object:Gem::Requirement
366
352
  requirements:
367
353
  - - '='
368
354
  - !ruby/object:Gem::Version
369
- version: 6.4.2
355
+ version: 6.4.3
370
356
  type: :runtime
371
357
  prerelease: false
372
358
  version_requirements: !ruby/object:Gem::Requirement
373
359
  requirements:
374
360
  - - '='
375
361
  - !ruby/object:Gem::Version
376
- version: 6.4.2
362
+ version: 6.4.3
377
363
  - !ruby/object:Gem::Dependency
378
364
  name: rack
379
365
  requirement: !ruby/object:Gem::Requirement
380
366
  requirements:
381
367
  - - '='
382
368
  - !ruby/object:Gem::Version
383
- version: 3.1.7
369
+ version: 3.1.8
384
370
  type: :runtime
385
371
  prerelease: false
386
372
  version_requirements: !ruby/object:Gem::Requirement
387
373
  requirements:
388
374
  - - '='
389
375
  - !ruby/object:Gem::Version
390
- version: 3.1.7
376
+ version: 3.1.8
391
377
  - !ruby/object:Gem::Dependency
392
378
  name: rack-session
393
379
  requirement: !ruby/object:Gem::Requirement
@@ -408,14 +394,14 @@ dependencies:
408
394
  requirements:
409
395
  - - '='
410
396
  - !ruby/object:Gem::Version
411
- version: 2.1.0
397
+ version: 2.2.1
412
398
  type: :runtime
413
399
  prerelease: false
414
400
  version_requirements: !ruby/object:Gem::Requirement
415
401
  requirements:
416
402
  - - '='
417
403
  - !ruby/object:Gem::Version
418
- version: 2.1.0
404
+ version: 2.2.1
419
405
  - !ruby/object:Gem::Dependency
420
406
  name: redis
421
407
  requirement: !ruby/object:Gem::Requirement
@@ -450,42 +436,42 @@ dependencies:
450
436
  requirements:
451
437
  - - '='
452
438
  - !ruby/object:Gem::Version
453
- version: 5.19.0
439
+ version: 5.21.0
454
440
  type: :runtime
455
441
  prerelease: false
456
442
  version_requirements: !ruby/object:Gem::Requirement
457
443
  requirements:
458
444
  - - '='
459
445
  - !ruby/object:Gem::Version
460
- version: 5.19.0
446
+ version: 5.21.0
461
447
  - !ruby/object:Gem::Dependency
462
448
  name: sentry-sidekiq
463
449
  requirement: !ruby/object:Gem::Requirement
464
450
  requirements:
465
451
  - - '='
466
452
  - !ruby/object:Gem::Version
467
- version: 5.19.0
453
+ version: 5.21.0
468
454
  type: :runtime
469
455
  prerelease: false
470
456
  version_requirements: !ruby/object:Gem::Requirement
471
457
  requirements:
472
458
  - - '='
473
459
  - !ruby/object:Gem::Version
474
- version: 5.19.0
460
+ version: 5.21.0
475
461
  - !ruby/object:Gem::Dependency
476
462
  name: sidekiq
477
463
  requirement: !ruby/object:Gem::Requirement
478
464
  requirements:
479
465
  - - '='
480
466
  - !ruby/object:Gem::Version
481
- version: 7.3.2
467
+ version: 7.3.5
482
468
  type: :runtime
483
469
  prerelease: false
484
470
  version_requirements: !ruby/object:Gem::Requirement
485
471
  requirements:
486
472
  - - '='
487
473
  - !ruby/object:Gem::Version
488
- version: 7.3.2
474
+ version: 7.3.5
489
475
  - !ruby/object:Gem::Dependency
490
476
  name: slack-notifier
491
477
  requirement: !ruby/object:Gem::Requirement
@@ -506,14 +492,14 @@ dependencies:
506
492
  requirements:
507
493
  - - '='
508
494
  - !ruby/object:Gem::Version
509
- version: 1.7.2
495
+ version: 2.2.0
510
496
  type: :runtime
511
497
  prerelease: false
512
498
  version_requirements: !ruby/object:Gem::Requirement
513
499
  requirements:
514
500
  - - '='
515
501
  - !ruby/object:Gem::Version
516
- version: 1.7.2
502
+ version: 2.2.0
517
503
  - !ruby/object:Gem::Dependency
518
504
  name: thor
519
505
  requirement: !ruby/object:Gem::Requirement
@@ -632,14 +618,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
632
618
  requirements:
633
619
  - - ">="
634
620
  - !ruby/object:Gem::Version
635
- version: '0'
621
+ version: '3.2'
636
622
  required_rubygems_version: !ruby/object:Gem::Requirement
637
623
  requirements:
638
624
  - - ">="
639
625
  - !ruby/object:Gem::Version
640
626
  version: '0'
641
627
  requirements: []
642
- rubygems_version: 3.5.16
628
+ rubygems_version: 3.5.22
643
629
  signing_key:
644
630
  specification_version: 4
645
631
  summary: A phishing kit collector for scavengers