wayfarer 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci.yaml +1 -1
- data/Gemfile.lock +20 -15
- data/docs/cookbook/user_agent.md +1 -1
- data/docs/guides/browser_automation/capybara.md +64 -1
- data/docs/guides/browser_automation/custom_adapters.md +100 -0
- data/docs/guides/browser_automation/ferrum.md +3 -3
- data/docs/guides/browser_automation/selenium.md +7 -5
- data/docs/guides/callbacks.md +117 -10
- data/docs/guides/configuration.md +16 -10
- data/docs/guides/error_handling.md +9 -5
- data/docs/guides/networking.md +77 -3
- data/docs/index.md +9 -1
- data/docs/reference/api/base.md +4 -4
- data/docs/reference/configuration_keys.md +42 -0
- data/docs/reference/environment_variables.md +25 -27
- data/lib/wayfarer/base.rb +7 -17
- data/lib/wayfarer/callbacks.rb +71 -0
- data/lib/wayfarer/cli/base.rb +5 -1
- data/lib/wayfarer/cli/job.rb +7 -3
- data/lib/wayfarer/cli/route.rb +2 -2
- data/lib/wayfarer/cli/route_printer.rb +7 -7
- data/lib/wayfarer/config/capybara.rb +10 -0
- data/lib/wayfarer/config/ferrum.rb +11 -0
- data/lib/wayfarer/config/networking.rb +26 -0
- data/lib/wayfarer/config/redis.rb +14 -0
- data/lib/wayfarer/config/root.rb +11 -0
- data/lib/wayfarer/config/selenium.rb +21 -0
- data/lib/wayfarer/config/strconv.rb +45 -0
- data/lib/wayfarer/config/struct.rb +72 -0
- data/lib/wayfarer/gc.rb +3 -7
- data/lib/wayfarer/middleware/fetch.rb +7 -3
- data/lib/wayfarer/middleware/router.rb +2 -2
- data/lib/wayfarer/middleware/worker.rb +12 -9
- data/lib/wayfarer/networking/capybara.rb +28 -0
- data/lib/wayfarer/networking/context.rb +36 -0
- data/lib/wayfarer/networking/ferrum.rb +17 -52
- data/lib/wayfarer/networking/http.rb +34 -0
- data/lib/wayfarer/networking/pool.rb +15 -10
- data/lib/wayfarer/networking/result.rb +1 -1
- data/lib/wayfarer/networking/selenium.rb +20 -47
- data/lib/wayfarer/networking/strategy.rb +38 -0
- data/lib/wayfarer/page.rb +2 -3
- data/lib/wayfarer/redis/pool.rb +3 -1
- data/lib/wayfarer/routing/dsl.rb +8 -8
- data/lib/wayfarer/routing/matchers/custom.rb +23 -0
- data/lib/wayfarer/routing/matchers/host.rb +19 -0
- data/lib/wayfarer/routing/matchers/path.rb +48 -0
- data/lib/wayfarer/routing/matchers/query.rb +63 -0
- data/lib/wayfarer/routing/matchers/scheme.rb +17 -0
- data/lib/wayfarer/routing/matchers/suffix.rb +17 -0
- data/lib/wayfarer/routing/matchers/url.rb +17 -0
- data/lib/wayfarer/routing/route.rb +1 -1
- data/lib/wayfarer.rb +9 -9
- data/spec/base_spec.rb +14 -0
- data/spec/callbacks_spec.rb +102 -0
- data/spec/cli/job_spec.rb +6 -6
- data/spec/config/capybara_spec.rb +18 -0
- data/spec/config/ferrum_spec.rb +24 -0
- data/spec/config/networking_spec.rb +73 -0
- data/spec/config/redis_spec.rb +32 -0
- data/spec/config/root_spec.rb +31 -0
- data/spec/config/selenium_spec.rb +56 -0
- data/spec/config/strconv_spec.rb +58 -0
- data/spec/config/struct_spec.rb +66 -0
- data/spec/gc_spec.rb +8 -6
- data/spec/middleware/fetch_spec.rb +20 -8
- data/spec/middleware/router_spec.rb +7 -0
- data/spec/middleware/worker_spec.rb +64 -27
- data/spec/networking/capybara_spec.rb +12 -0
- data/spec/networking/context_spec.rb +127 -0
- data/spec/networking/ferrum_spec.rb +6 -22
- data/spec/networking/http_spec.rb +12 -0
- data/spec/networking/pool_spec.rb +37 -12
- data/spec/networking/selenium_spec.rb +6 -22
- data/spec/networking/strategy.rb +170 -0
- data/spec/redis/pool_spec.rb +1 -1
- data/spec/routing/dsl_spec.rb +10 -10
- data/spec/routing/integration_spec.rb +22 -22
- data/spec/routing/{custom_matcher_spec.rb → matchers/custom_spec.rb} +4 -4
- data/spec/routing/{host_matcher_spec.rb → matchers/host_spec.rb} +6 -6
- data/spec/routing/{path_matcher_spec.rb → matchers/path_spec.rb} +6 -6
- data/spec/routing/{query_matcher_spec.rb → matchers/query_spec.rb} +15 -15
- data/spec/routing/{scheme_matcher_spec.rb → matchers/scheme_spec.rb} +4 -4
- data/spec/routing/{suffix_matcher_spec.rb → matchers/suffix_spec.rb} +4 -4
- data/spec/routing/{uri_matcher_spec.rb → matchers/uri_spec.rb} +4 -4
- data/spec/routing/path_finder_spec.rb +1 -1
- data/spec/routing/root_route_spec.rb +2 -2
- data/spec/routing/route_spec.rb +2 -2
- data/spec/spec_helpers.rb +13 -5
- data/spec/wayfarer_spec.rb +1 -1
- data/wayfarer.gemspec +8 -7
- metadata +74 -33
- data/lib/wayfarer/config.rb +0 -67
- data/lib/wayfarer/networking/healer.rb +0 -21
- data/lib/wayfarer/networking/net_http.rb +0 -52
- data/lib/wayfarer/routing/custom_matcher.rb +0 -21
- data/lib/wayfarer/routing/host_matcher.rb +0 -23
- data/lib/wayfarer/routing/path_matcher.rb +0 -46
- data/lib/wayfarer/routing/query_matcher.rb +0 -67
- data/lib/wayfarer/routing/scheme_matcher.rb +0 -21
- data/lib/wayfarer/routing/suffix_matcher.rb +0 -21
- data/lib/wayfarer/routing/url_matcher.rb +0 -21
- data/spec/config_spec.rb +0 -144
- data/spec/networking/adapter.rb +0 -135
- data/spec/networking/healer_spec.rb +0 -46
- data/spec/networking/net_http_spec.rb +0 -37
data/wayfarer.gemspec
CHANGED
@@ -2,12 +2,12 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "wayfarer"
|
5
|
-
s.version = "0.4.
|
5
|
+
s.version = "0.4.1"
|
6
6
|
s.license = "MIT"
|
7
7
|
|
8
8
|
s.homepage = "http://github.com/bauerd/wayfarer"
|
9
|
-
s.
|
10
|
-
s.
|
9
|
+
s.summary = "Versatile web crawling with Ruby"
|
10
|
+
s.description = "Web crawling framework based on ActiveJob"
|
11
11
|
|
12
12
|
s.date = "2021-09-26"
|
13
13
|
s.authors = ["Dominic Bauer"]
|
@@ -23,8 +23,8 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.required_ruby_version = "~> 2.7"
|
24
24
|
|
25
25
|
s.add_runtime_dependency "activejob", "~> 6.0"
|
26
|
-
s.add_runtime_dependency "
|
27
|
-
s.add_runtime_dependency "
|
26
|
+
s.add_runtime_dependency "addressable", "~> 2.8"
|
27
|
+
s.add_runtime_dependency "capybara", "~> 3.0"
|
28
28
|
s.add_runtime_dependency "connection_pool", "~> 2.2"
|
29
29
|
s.add_runtime_dependency "docile", "~> 1.1"
|
30
30
|
s.add_runtime_dependency "ferrum", "~> 0.9"
|
@@ -32,13 +32,14 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.add_runtime_dependency "mime-types", "~> 3.0"
|
33
33
|
s.add_runtime_dependency "mustermann", "~> 1.1"
|
34
34
|
s.add_runtime_dependency "mock_redis", "~> 0.29"
|
35
|
-
s.add_runtime_dependency "net-http-persistent", "~>
|
35
|
+
s.add_runtime_dependency "net-http-persistent", "~> 3.0"
|
36
36
|
s.add_runtime_dependency "nokogiri", "~> 1.11"
|
37
37
|
s.add_runtime_dependency "normalize_url", "~> 0.0.6"
|
38
|
-
s.add_runtime_dependency "redis", "~> 4.
|
38
|
+
s.add_runtime_dependency "redis", "~> 4.4", "< 4.5" # redis 4.5.x broke SMISMEMBER
|
39
39
|
s.add_runtime_dependency "selenium-webdriver", "~> 3.4"
|
40
40
|
s.add_runtime_dependency "thor", "~> 1.0"
|
41
41
|
|
42
|
+
s.add_development_dependency "cuprite", "~> 0.13"
|
42
43
|
s.add_development_dependency "factory_bot", "~> 6.0"
|
43
44
|
s.add_development_dependency "faker", "~> 1.7"
|
44
45
|
s.add_development_dependency "pry", "~> 0.10"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wayfarer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dominic Bauer
|
@@ -25,33 +25,33 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '6.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: addressable
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '2.
|
33
|
+
version: '2.8'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '2.
|
40
|
+
version: '2.8'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: capybara
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0
|
47
|
+
version: '3.0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0
|
54
|
+
version: '3.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: connection_pool
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,14 +156,14 @@ dependencies:
|
|
156
156
|
requirements:
|
157
157
|
- - "~>"
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: '
|
159
|
+
version: '3.0'
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: '
|
166
|
+
version: '3.0'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
168
|
name: nokogiri
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -198,14 +198,20 @@ dependencies:
|
|
198
198
|
requirements:
|
199
199
|
- - "~>"
|
200
200
|
- !ruby/object:Gem::Version
|
201
|
-
version: '4.
|
201
|
+
version: '4.4'
|
202
|
+
- - "<"
|
203
|
+
- !ruby/object:Gem::Version
|
204
|
+
version: '4.5'
|
202
205
|
type: :runtime
|
203
206
|
prerelease: false
|
204
207
|
version_requirements: !ruby/object:Gem::Requirement
|
205
208
|
requirements:
|
206
209
|
- - "~>"
|
207
210
|
- !ruby/object:Gem::Version
|
208
|
-
version: '4.
|
211
|
+
version: '4.4'
|
212
|
+
- - "<"
|
213
|
+
- !ruby/object:Gem::Version
|
214
|
+
version: '4.5'
|
209
215
|
- !ruby/object:Gem::Dependency
|
210
216
|
name: selenium-webdriver
|
211
217
|
requirement: !ruby/object:Gem::Requirement
|
@@ -234,6 +240,20 @@ dependencies:
|
|
234
240
|
- - "~>"
|
235
241
|
- !ruby/object:Gem::Version
|
236
242
|
version: '1.0'
|
243
|
+
- !ruby/object:Gem::Dependency
|
244
|
+
name: cuprite
|
245
|
+
requirement: !ruby/object:Gem::Requirement
|
246
|
+
requirements:
|
247
|
+
- - "~>"
|
248
|
+
- !ruby/object:Gem::Version
|
249
|
+
version: '0.13'
|
250
|
+
type: :development
|
251
|
+
prerelease: false
|
252
|
+
version_requirements: !ruby/object:Gem::Requirement
|
253
|
+
requirements:
|
254
|
+
- - "~>"
|
255
|
+
- !ruby/object:Gem::Version
|
256
|
+
version: '0.13'
|
237
257
|
- !ruby/object:Gem::Dependency
|
238
258
|
name: factory_bot
|
239
259
|
requirement: !ruby/object:Gem::Requirement
|
@@ -346,7 +366,7 @@ dependencies:
|
|
346
366
|
- - "~>"
|
347
367
|
- !ruby/object:Gem::Version
|
348
368
|
version: '0.9'
|
349
|
-
description:
|
369
|
+
description: Web crawling framework based on ActiveJob
|
350
370
|
email: mail@bauerd.com
|
351
371
|
executables:
|
352
372
|
- wayfarer
|
@@ -372,6 +392,7 @@ files:
|
|
372
392
|
- docs/cookbook/screenshots.md
|
373
393
|
- docs/cookbook/user_agent.md
|
374
394
|
- docs/guides/browser_automation/capybara.md
|
395
|
+
- docs/guides/browser_automation/custom_adapters.md
|
375
396
|
- docs/guides/browser_automation/ferrum.md
|
376
397
|
- docs/guides/browser_automation/selenium.md
|
377
398
|
- docs/guides/callbacks.md
|
@@ -386,9 +407,11 @@ files:
|
|
386
407
|
- docs/reference/api/base.md
|
387
408
|
- docs/reference/api/route.md
|
388
409
|
- docs/reference/cli.md
|
410
|
+
- docs/reference/configuration_keys.md
|
389
411
|
- docs/reference/environment_variables.md
|
390
412
|
- lib/wayfarer.rb
|
391
413
|
- lib/wayfarer/base.rb
|
414
|
+
- lib/wayfarer/callbacks.rb
|
392
415
|
- lib/wayfarer/cli/base.rb
|
393
416
|
- lib/wayfarer/cli/generate.rb
|
394
417
|
- lib/wayfarer/cli/job.rb
|
@@ -397,7 +420,14 @@ files:
|
|
397
420
|
- lib/wayfarer/cli/runner.rb
|
398
421
|
- lib/wayfarer/cli/templates/Gemfile.tt
|
399
422
|
- lib/wayfarer/cli/templates/job.rb.tt
|
400
|
-
- lib/wayfarer/config.rb
|
423
|
+
- lib/wayfarer/config/capybara.rb
|
424
|
+
- lib/wayfarer/config/ferrum.rb
|
425
|
+
- lib/wayfarer/config/networking.rb
|
426
|
+
- lib/wayfarer/config/redis.rb
|
427
|
+
- lib/wayfarer/config/root.rb
|
428
|
+
- lib/wayfarer/config/selenium.rb
|
429
|
+
- lib/wayfarer/config/strconv.rb
|
430
|
+
- lib/wayfarer/config/struct.rb
|
401
431
|
- lib/wayfarer/gc.rb
|
402
432
|
- lib/wayfarer/middleware/chain.rb
|
403
433
|
- lib/wayfarer/middleware/dedup.rb
|
@@ -406,12 +436,14 @@ files:
|
|
406
436
|
- lib/wayfarer/middleware/router.rb
|
407
437
|
- lib/wayfarer/middleware/stage.rb
|
408
438
|
- lib/wayfarer/middleware/worker.rb
|
439
|
+
- lib/wayfarer/networking/capybara.rb
|
440
|
+
- lib/wayfarer/networking/context.rb
|
409
441
|
- lib/wayfarer/networking/ferrum.rb
|
410
|
-
- lib/wayfarer/networking/
|
411
|
-
- lib/wayfarer/networking/net_http.rb
|
442
|
+
- lib/wayfarer/networking/http.rb
|
412
443
|
- lib/wayfarer/networking/pool.rb
|
413
444
|
- lib/wayfarer/networking/result.rb
|
414
445
|
- lib/wayfarer/networking/selenium.rb
|
446
|
+
- lib/wayfarer/networking/strategy.rb
|
415
447
|
- lib/wayfarer/page.rb
|
416
448
|
- lib/wayfarer/parsing/json.rb
|
417
449
|
- lib/wayfarer/parsing/xml.rb
|
@@ -420,29 +452,37 @@ files:
|
|
420
452
|
- lib/wayfarer/redis/counter.rb
|
421
453
|
- lib/wayfarer/redis/pool.rb
|
422
454
|
- lib/wayfarer/redis/version.rb
|
423
|
-
- lib/wayfarer/routing/custom_matcher.rb
|
424
455
|
- lib/wayfarer/routing/dsl.rb
|
425
|
-
- lib/wayfarer/routing/
|
456
|
+
- lib/wayfarer/routing/matchers/custom.rb
|
457
|
+
- lib/wayfarer/routing/matchers/host.rb
|
458
|
+
- lib/wayfarer/routing/matchers/path.rb
|
459
|
+
- lib/wayfarer/routing/matchers/query.rb
|
460
|
+
- lib/wayfarer/routing/matchers/scheme.rb
|
461
|
+
- lib/wayfarer/routing/matchers/suffix.rb
|
462
|
+
- lib/wayfarer/routing/matchers/url.rb
|
426
463
|
- lib/wayfarer/routing/path_finder.rb
|
427
|
-
- lib/wayfarer/routing/path_matcher.rb
|
428
|
-
- lib/wayfarer/routing/query_matcher.rb
|
429
464
|
- lib/wayfarer/routing/result.rb
|
430
465
|
- lib/wayfarer/routing/root_route.rb
|
431
466
|
- lib/wayfarer/routing/route.rb
|
432
|
-
- lib/wayfarer/routing/scheme_matcher.rb
|
433
|
-
- lib/wayfarer/routing/suffix_matcher.rb
|
434
467
|
- lib/wayfarer/routing/target_route.rb
|
435
|
-
- lib/wayfarer/routing/url_matcher.rb
|
436
468
|
- lib/wayfarer/serializer.rb
|
437
469
|
- lib/wayfarer/stringify.rb
|
438
470
|
- lib/wayfarer/task.rb
|
439
471
|
- mkdocs.yml
|
440
472
|
- requirements.txt
|
441
473
|
- spec/base_spec.rb
|
474
|
+
- spec/callbacks_spec.rb
|
442
475
|
- spec/cli/generate_spec.rb
|
443
476
|
- spec/cli/job_spec.rb
|
444
477
|
- spec/cli/version_spec.rb
|
445
|
-
- spec/
|
478
|
+
- spec/config/capybara_spec.rb
|
479
|
+
- spec/config/ferrum_spec.rb
|
480
|
+
- spec/config/networking_spec.rb
|
481
|
+
- spec/config/redis_spec.rb
|
482
|
+
- spec/config/root_spec.rb
|
483
|
+
- spec/config/selenium_spec.rb
|
484
|
+
- spec/config/strconv_spec.rb
|
485
|
+
- spec/config/struct_spec.rb
|
446
486
|
- spec/factories/queue/chain.rb
|
447
487
|
- spec/factories/queue/middleware.rb
|
448
488
|
- spec/factories/queue/page.rb
|
@@ -456,12 +496,13 @@ files:
|
|
456
496
|
- spec/middleware/router_spec.rb
|
457
497
|
- spec/middleware/stage_spec.rb
|
458
498
|
- spec/middleware/worker_spec.rb
|
459
|
-
- spec/networking/
|
499
|
+
- spec/networking/capybara_spec.rb
|
500
|
+
- spec/networking/context_spec.rb
|
460
501
|
- spec/networking/ferrum_spec.rb
|
461
|
-
- spec/networking/
|
462
|
-
- spec/networking/net_http_spec.rb
|
502
|
+
- spec/networking/http_spec.rb
|
463
503
|
- spec/networking/pool_spec.rb
|
464
504
|
- spec/networking/selenium_spec.rb
|
505
|
+
- spec/networking/strategy.rb
|
465
506
|
- spec/page_spec.rb
|
466
507
|
- spec/parsing/json_spec.rb
|
467
508
|
- spec/parsing/xml_spec.rb
|
@@ -469,18 +510,18 @@ files:
|
|
469
510
|
- spec/redis/counter_spec.rb
|
470
511
|
- spec/redis/pool_spec.rb
|
471
512
|
- spec/redis/version_spec.rb
|
472
|
-
- spec/routing/custom_matcher_spec.rb
|
473
513
|
- spec/routing/dsl_spec.rb
|
474
|
-
- spec/routing/host_matcher_spec.rb
|
475
514
|
- spec/routing/integration_spec.rb
|
515
|
+
- spec/routing/matchers/custom_spec.rb
|
516
|
+
- spec/routing/matchers/host_spec.rb
|
517
|
+
- spec/routing/matchers/path_spec.rb
|
518
|
+
- spec/routing/matchers/query_spec.rb
|
519
|
+
- spec/routing/matchers/scheme_spec.rb
|
520
|
+
- spec/routing/matchers/suffix_spec.rb
|
521
|
+
- spec/routing/matchers/uri_spec.rb
|
476
522
|
- spec/routing/path_finder_spec.rb
|
477
|
-
- spec/routing/path_matcher_spec.rb
|
478
|
-
- spec/routing/query_matcher_spec.rb
|
479
523
|
- spec/routing/root_route_spec.rb
|
480
524
|
- spec/routing/route_spec.rb
|
481
|
-
- spec/routing/scheme_matcher_spec.rb
|
482
|
-
- spec/routing/suffix_matcher_spec.rb
|
483
|
-
- spec/routing/uri_matcher_spec.rb
|
484
525
|
- spec/spec_helpers.rb
|
485
526
|
- spec/stringify_spec.rb
|
486
527
|
- spec/support/static/finders.html
|
data/lib/wayfarer/config.rb
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
# rubocop:disable Lint/ConstantDefinitionInBlock
|
5
|
-
Config = Struct.new(:adapter,
|
6
|
-
:adapter_pool_size,
|
7
|
-
:adapter_pool_timeout,
|
8
|
-
:ferrum_options,
|
9
|
-
:selenium_argv,
|
10
|
-
:redis_url,
|
11
|
-
:redis_factory,
|
12
|
-
:http_headers) do
|
13
|
-
DEFAULTS = {
|
14
|
-
adapter: :net_http,
|
15
|
-
adapter_pool_size: 3,
|
16
|
-
adapter_pool_timeout: ::ConnectionPool::DEFAULTS[:timeout],
|
17
|
-
ferrum_options: {},
|
18
|
-
selenium_argv: %i[chrome],
|
19
|
-
redis_url: "redis://localhost:6379",
|
20
|
-
redis_factory: -> { ::Redis.new(url: Wayfarer.config.redis_url) },
|
21
|
-
http_headers: {}
|
22
|
-
}.freeze
|
23
|
-
|
24
|
-
def self.default
|
25
|
-
new(*DEFAULTS.values)
|
26
|
-
end
|
27
|
-
|
28
|
-
# rubocop:disable Metrics/AbcSize
|
29
|
-
def self.from_environment(env = ENV)
|
30
|
-
default.tap do |config|
|
31
|
-
config.adapter = env["WAYFARER_ADAPTER"].to_sym if env.key?("WAYFARER_ADAPTER")
|
32
|
-
config.adapter_pool_size = cast(env["WAYFARER_POOL_SIZE"]) if env.key?("WAYFARER_POOL_SIZE")
|
33
|
-
config.adapter_pool_timeout = cast(env["WAYFARER_POOL_TIMEOUT"]) if env.key?("WAYFARER_POOL_TIMEOUT")
|
34
|
-
config.ferrum_options = parse_hash(env["WAYFARER_FERRUM_OPTIONS"]) if env.key?("WAYFARER_FERRUM_OPTIONS")
|
35
|
-
config.selenium_argv = parse_array(env["WAYFARER_SELENIUM_ARGV"]) if env.key?("WAYFARER_SELENIUM_ARGV")
|
36
|
-
config.redis_url = env["WAYFARER_REDIS_URL"] if env.key?("WAYFARER_REDIS_URL")
|
37
|
-
config.http_headers = parse_hash(env["WAYFARER_HTTP_HEADERS"]) if env.key?("WAYFARER_HTTP_HEADERS")
|
38
|
-
end
|
39
|
-
end
|
40
|
-
# rubocop:enable Metrics/AbcSize
|
41
|
-
|
42
|
-
def self.parse_hash(str)
|
43
|
-
parse_array(str).reduce({}) do |acc, pair|
|
44
|
-
k, v = pair.split(":", 2)
|
45
|
-
next acc unless k && v
|
46
|
-
|
47
|
-
acc.merge({ k.to_sym => cast(v) })
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
def self.parse_array(str)
|
52
|
-
str.split(",").map(&:strip)
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.cast(str)
|
56
|
-
return true if str == "true"
|
57
|
-
return false if str == "false"
|
58
|
-
|
59
|
-
begin
|
60
|
-
Integer(str)
|
61
|
-
rescue StandardError
|
62
|
-
str
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
# rubocop:enable Lint/ConstantDefinitionInBlock
|
67
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# An adapter that renews another adapter on certain errors.
|
4
|
-
# This wrapper is required because connection_pool assumes
|
5
|
-
# all connections are self-healing.
|
6
|
-
module Wayfarer
|
7
|
-
module Networking
|
8
|
-
Healer = Struct.new(:adapter) do
|
9
|
-
extend Forwardable
|
10
|
-
|
11
|
-
delegate %i[body browser capybara free] => :adapter
|
12
|
-
|
13
|
-
def fetch(url)
|
14
|
-
adapter.fetch(url)
|
15
|
-
rescue *adapter.class.renew_on => e
|
16
|
-
adapter.renew
|
17
|
-
raise e
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Networking
|
5
|
-
class NetHTTP
|
6
|
-
include Singleton
|
7
|
-
|
8
|
-
def self.renew_on
|
9
|
-
[]
|
10
|
-
end
|
11
|
-
|
12
|
-
attr_reader :conn,
|
13
|
-
:body
|
14
|
-
|
15
|
-
def initialize
|
16
|
-
@conn = Net::HTTP::Persistent.new("wayfarer-#{SecureRandom.uuid}")
|
17
|
-
|
18
|
-
Wayfarer.config.http_headers.each do |key, val|
|
19
|
-
conn.override_headers[key] = val
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def renew; end
|
24
|
-
|
25
|
-
def browser; end
|
26
|
-
|
27
|
-
def capybara; end
|
28
|
-
|
29
|
-
def fetch(url)
|
30
|
-
res = conn.request(URI(url))
|
31
|
-
|
32
|
-
return Result::Redirect.new(url, res["location"]) if res.is_a?(Net::HTTPRedirection)
|
33
|
-
|
34
|
-
@body = res.body
|
35
|
-
page = Wayfarer::Page.new(url: url,
|
36
|
-
status_code: res.code.to_i,
|
37
|
-
body: body,
|
38
|
-
headers: res.to_hash)
|
39
|
-
|
40
|
-
Result::Success.new(page)
|
41
|
-
end
|
42
|
-
|
43
|
-
def live(current_page)
|
44
|
-
current_page
|
45
|
-
end
|
46
|
-
|
47
|
-
def free
|
48
|
-
conn.shutdown
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class CustomMatcher
|
6
|
-
attr_reader :delegate
|
7
|
-
|
8
|
-
def initialize(delegate = proc)
|
9
|
-
@delegate = delegate
|
10
|
-
end
|
11
|
-
|
12
|
-
def match(url)
|
13
|
-
!!delegate.call(url)
|
14
|
-
end
|
15
|
-
|
16
|
-
def params(_)
|
17
|
-
{}
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class HostMatcher
|
6
|
-
attr_reader :host
|
7
|
-
|
8
|
-
def initialize(host)
|
9
|
-
@host = host
|
10
|
-
end
|
11
|
-
|
12
|
-
# rubocop:disable Style/CaseEquality
|
13
|
-
def match(url)
|
14
|
-
@host === url.host
|
15
|
-
end
|
16
|
-
# rubocop:enable Style/CaseEquality
|
17
|
-
|
18
|
-
def params(_)
|
19
|
-
{}
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class PathMatcher
|
6
|
-
attr_reader :path,
|
7
|
-
:route,
|
8
|
-
:peeking,
|
9
|
-
:matcher
|
10
|
-
|
11
|
-
def initialize(path, route)
|
12
|
-
@path = path
|
13
|
-
@route = route
|
14
|
-
@peeking = false
|
15
|
-
@matcher = Mustermann.new(path, type: "sinatra")
|
16
|
-
end
|
17
|
-
|
18
|
-
def match(url)
|
19
|
-
route.accept(self)
|
20
|
-
|
21
|
-
# If the route's branch contains other path matchers in child routes,
|
22
|
-
# match the beginning of the path (peeking), instead of the whole path.
|
23
|
-
!!(if peeking
|
24
|
-
matcher.peek(url.path)
|
25
|
-
else
|
26
|
-
matcher.match(url.path)
|
27
|
-
end)
|
28
|
-
end
|
29
|
-
|
30
|
-
def params(url)
|
31
|
-
return {} unless match(url)
|
32
|
-
|
33
|
-
matcher.params(url.path) || {}
|
34
|
-
end
|
35
|
-
|
36
|
-
def visit(route)
|
37
|
-
return true if route == self.route
|
38
|
-
|
39
|
-
return unless route.matcher.is_a?(self.class)
|
40
|
-
|
41
|
-
@peeking = true
|
42
|
-
false
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class QueryMatcher
|
6
|
-
attr_reader :fields
|
7
|
-
|
8
|
-
def initialize(fields)
|
9
|
-
@fields = fields
|
10
|
-
end
|
11
|
-
|
12
|
-
def match(url)
|
13
|
-
query = url.query
|
14
|
-
|
15
|
-
# CGI::parse throws a NoMethodError if the query is an empty string
|
16
|
-
return false if query.nil? || query.empty?
|
17
|
-
|
18
|
-
CGI.parse(query).none? { |field, vals| violates?(field, vals) }
|
19
|
-
end
|
20
|
-
|
21
|
-
def params(url)
|
22
|
-
return {} unless match(url)
|
23
|
-
|
24
|
-
CGI.parse(url.query)
|
25
|
-
.select { |(k, _)| fields.keys.include?(k.to_sym) }
|
26
|
-
.transform_values(&:last)
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
# rubocop:disable Lint/AssignmentInCondition
|
32
|
-
def violates?(field, vals)
|
33
|
-
return false unless constraint = fields[field.to_sym]
|
34
|
-
|
35
|
-
violates_constraint?(constraint, vals)
|
36
|
-
end
|
37
|
-
# rubocop:enable Lint/AssignmentInCondition
|
38
|
-
|
39
|
-
def violates_constraint?(constraint, vals)
|
40
|
-
case constraint
|
41
|
-
when String then violates_string?(constraint, vals)
|
42
|
-
when Integer then violates_integer?(constraint, vals)
|
43
|
-
when Regexp then violates_regexp?(constraint, vals)
|
44
|
-
when Range then violates_range?(constraint, vals)
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def violates_string?(str, vals)
|
49
|
-
vals.none? { |val| str == val }
|
50
|
-
end
|
51
|
-
|
52
|
-
def violates_integer?(int, vals)
|
53
|
-
vals.none? { |val| int == Integer(val) }
|
54
|
-
rescue ArgumentError
|
55
|
-
true
|
56
|
-
end
|
57
|
-
|
58
|
-
def violates_regexp?(regexp, vals)
|
59
|
-
vals.none? { |val| regexp.match(val) }
|
60
|
-
end
|
61
|
-
|
62
|
-
def violates_range?(range, vals)
|
63
|
-
vals.none? { |val| range.include?(val.to_i) }
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class SchemeMatcher
|
6
|
-
attr_reader :scheme
|
7
|
-
|
8
|
-
def initialize(scheme)
|
9
|
-
@scheme = scheme
|
10
|
-
end
|
11
|
-
|
12
|
-
def match(url)
|
13
|
-
url.scheme == scheme.to_s
|
14
|
-
end
|
15
|
-
|
16
|
-
def params(_)
|
17
|
-
{}
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class SuffixMatcher
|
6
|
-
attr_reader :suffix
|
7
|
-
|
8
|
-
def initialize(suffix)
|
9
|
-
@suffix = suffix
|
10
|
-
end
|
11
|
-
|
12
|
-
def match(url)
|
13
|
-
url.path.end_with?(suffix)
|
14
|
-
end
|
15
|
-
|
16
|
-
def params(_)
|
17
|
-
{}
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Wayfarer
|
4
|
-
module Routing
|
5
|
-
class URLMatcher
|
6
|
-
attr_reader :url
|
7
|
-
|
8
|
-
def initialize(url)
|
9
|
-
@url = url
|
10
|
-
end
|
11
|
-
|
12
|
-
def match(url)
|
13
|
-
url == URI(@url)
|
14
|
-
end
|
15
|
-
|
16
|
-
def params(_)
|
17
|
-
{}
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|