wayfarer-jruby 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rbenv-gemsets +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +21 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/.yardopts +3 -0
- data/Gemfile +11 -0
- data/LICENSE +19 -0
- data/README.md +19 -0
- data/Rakefile +114 -0
- data/benchmark/frontiers.rb +143 -0
- data/bin/wayfarer +116 -0
- data/docs/.gitignore +2 -0
- data/docs/_config.yml +15 -0
- data/docs/_includes/base.html +7 -0
- data/docs/_includes/head.html +10 -0
- data/docs/_includes/navigation.html +172 -0
- data/docs/_layouts/default.html +42 -0
- data/docs/_sass/base.scss +439 -0
- data/docs/_sass/variables.scss +24 -0
- data/docs/_sass/vendor/bourbon/_bourbon-deprecate.scss +19 -0
- data/docs/_sass/vendor/bourbon/_bourbon-deprecated-upcoming.scss +425 -0
- data/docs/_sass/vendor/bourbon/_bourbon.scss +90 -0
- data/docs/_sass/vendor/bourbon/addons/_border-color.scss +29 -0
- data/docs/_sass/vendor/bourbon/addons/_border-radius.scss +48 -0
- data/docs/_sass/vendor/bourbon/addons/_border-style.scss +28 -0
- data/docs/_sass/vendor/bourbon/addons/_border-width.scss +28 -0
- data/docs/_sass/vendor/bourbon/addons/_buttons.scss +69 -0
- data/docs/_sass/vendor/bourbon/addons/_clearfix.scss +25 -0
- data/docs/_sass/vendor/bourbon/addons/_ellipsis.scss +30 -0
- data/docs/_sass/vendor/bourbon/addons/_font-stacks.scss +31 -0
- data/docs/_sass/vendor/bourbon/addons/_hide-text.scss +27 -0
- data/docs/_sass/vendor/bourbon/addons/_margin.scss +29 -0
- data/docs/_sass/vendor/bourbon/addons/_padding.scss +29 -0
- data/docs/_sass/vendor/bourbon/addons/_position.scss +51 -0
- data/docs/_sass/vendor/bourbon/addons/_prefixer.scss +66 -0
- data/docs/_sass/vendor/bourbon/addons/_retina-image.scss +27 -0
- data/docs/_sass/vendor/bourbon/addons/_size.scss +56 -0
- data/docs/_sass/vendor/bourbon/addons/_text-inputs.scss +118 -0
- data/docs/_sass/vendor/bourbon/addons/_timing-functions.scss +34 -0
- data/docs/_sass/vendor/bourbon/addons/_triangle.scss +63 -0
- data/docs/_sass/vendor/bourbon/addons/_word-wrap.scss +29 -0
- data/docs/_sass/vendor/bourbon/css3/_animation.scss +61 -0
- data/docs/_sass/vendor/bourbon/css3/_appearance.scss +5 -0
- data/docs/_sass/vendor/bourbon/css3/_backface-visibility.scss +5 -0
- data/docs/_sass/vendor/bourbon/css3/_background-image.scss +44 -0
- data/docs/_sass/vendor/bourbon/css3/_background.scss +57 -0
- data/docs/_sass/vendor/bourbon/css3/_border-image.scss +61 -0
- data/docs/_sass/vendor/bourbon/css3/_calc.scss +6 -0
- data/docs/_sass/vendor/bourbon/css3/_columns.scss +67 -0
- data/docs/_sass/vendor/bourbon/css3/_filter.scss +6 -0
- data/docs/_sass/vendor/bourbon/css3/_flex-box.scss +327 -0
- data/docs/_sass/vendor/bourbon/css3/_font-face.scss +29 -0
- data/docs/_sass/vendor/bourbon/css3/_font-feature-settings.scss +6 -0
- data/docs/_sass/vendor/bourbon/css3/_hidpi-media-query.scss +12 -0
- data/docs/_sass/vendor/bourbon/css3/_hyphens.scss +6 -0
- data/docs/_sass/vendor/bourbon/css3/_image-rendering.scss +15 -0
- data/docs/_sass/vendor/bourbon/css3/_keyframes.scss +38 -0
- data/docs/_sass/vendor/bourbon/css3/_linear-gradient.scss +40 -0
- data/docs/_sass/vendor/bourbon/css3/_perspective.scss +12 -0
- data/docs/_sass/vendor/bourbon/css3/_placeholder.scss +10 -0
- data/docs/_sass/vendor/bourbon/css3/_radial-gradient.scss +40 -0
- data/docs/_sass/vendor/bourbon/css3/_selection.scss +44 -0
- data/docs/_sass/vendor/bourbon/css3/_text-decoration.scss +27 -0
- data/docs/_sass/vendor/bourbon/css3/_transform.scss +21 -0
- data/docs/_sass/vendor/bourbon/css3/_transition.scss +81 -0
- data/docs/_sass/vendor/bourbon/css3/_user-select.scss +5 -0
- data/docs/_sass/vendor/bourbon/functions/_assign-inputs.scss +16 -0
- data/docs/_sass/vendor/bourbon/functions/_contains-falsy.scss +25 -0
- data/docs/_sass/vendor/bourbon/functions/_contains.scss +31 -0
- data/docs/_sass/vendor/bourbon/functions/_is-length.scss +16 -0
- data/docs/_sass/vendor/bourbon/functions/_is-light.scss +26 -0
- data/docs/_sass/vendor/bourbon/functions/_is-number.scss +16 -0
- data/docs/_sass/vendor/bourbon/functions/_is-size.scss +23 -0
- data/docs/_sass/vendor/bourbon/functions/_modular-scale.scss +74 -0
- data/docs/_sass/vendor/bourbon/functions/_px-to-em.scss +24 -0
- data/docs/_sass/vendor/bourbon/functions/_px-to-rem.scss +26 -0
- data/docs/_sass/vendor/bourbon/functions/_shade.scss +24 -0
- data/docs/_sass/vendor/bourbon/functions/_strip-units.scss +22 -0
- data/docs/_sass/vendor/bourbon/functions/_tint.scss +24 -0
- data/docs/_sass/vendor/bourbon/functions/_transition-property-name.scss +37 -0
- data/docs/_sass/vendor/bourbon/functions/_unpack.scss +32 -0
- data/docs/_sass/vendor/bourbon/helpers/_convert-units.scss +26 -0
- data/docs/_sass/vendor/bourbon/helpers/_directional-values.scss +108 -0
- data/docs/_sass/vendor/bourbon/helpers/_font-source-declaration.scss +53 -0
- data/docs/_sass/vendor/bourbon/helpers/_gradient-positions-parser.scss +24 -0
- data/docs/_sass/vendor/bourbon/helpers/_linear-angle-parser.scss +35 -0
- data/docs/_sass/vendor/bourbon/helpers/_linear-gradient-parser.scss +51 -0
- data/docs/_sass/vendor/bourbon/helpers/_linear-positions-parser.scss +77 -0
- data/docs/_sass/vendor/bourbon/helpers/_linear-side-corner-parser.scss +41 -0
- data/docs/_sass/vendor/bourbon/helpers/_radial-arg-parser.scss +74 -0
- data/docs/_sass/vendor/bourbon/helpers/_radial-gradient-parser.scss +55 -0
- data/docs/_sass/vendor/bourbon/helpers/_radial-positions-parser.scss +28 -0
- data/docs/_sass/vendor/bourbon/helpers/_render-gradients.scss +31 -0
- data/docs/_sass/vendor/bourbon/helpers/_shape-size-stripper.scss +15 -0
- data/docs/_sass/vendor/bourbon/helpers/_str-to-num.scss +55 -0
- data/docs/_sass/vendor/bourbon/settings/_asset-pipeline.scss +7 -0
- data/docs/_sass/vendor/bourbon/settings/_deprecation-warnings.scss +8 -0
- data/docs/_sass/vendor/bourbon/settings/_prefixer.scss +9 -0
- data/docs/_sass/vendor/bourbon/settings/_px-to-em.scss +1 -0
- data/docs/_sass/vendor/neat/_neat-helpers.scss +11 -0
- data/docs/_sass/vendor/neat/_neat.scss +23 -0
- data/docs/_sass/vendor/neat/functions/_new-breakpoint.scss +49 -0
- data/docs/_sass/vendor/neat/functions/_private.scss +114 -0
- data/docs/_sass/vendor/neat/grid/_box-sizing.scss +15 -0
- data/docs/_sass/vendor/neat/grid/_direction-context.scss +33 -0
- data/docs/_sass/vendor/neat/grid/_display-context.scss +28 -0
- data/docs/_sass/vendor/neat/grid/_fill-parent.scss +22 -0
- data/docs/_sass/vendor/neat/grid/_media.scss +92 -0
- data/docs/_sass/vendor/neat/grid/_omega.scss +87 -0
- data/docs/_sass/vendor/neat/grid/_outer-container.scss +34 -0
- data/docs/_sass/vendor/neat/grid/_pad.scss +25 -0
- data/docs/_sass/vendor/neat/grid/_private.scss +35 -0
- data/docs/_sass/vendor/neat/grid/_row.scss +52 -0
- data/docs/_sass/vendor/neat/grid/_shift.scss +50 -0
- data/docs/_sass/vendor/neat/grid/_span-columns.scss +94 -0
- data/docs/_sass/vendor/neat/grid/_to-deprecate.scss +97 -0
- data/docs/_sass/vendor/neat/grid/_visual-grid.scss +42 -0
- data/docs/_sass/vendor/neat/mixins/_clearfix.scss +25 -0
- data/docs/_sass/vendor/neat/settings/_disable-warnings.scss +13 -0
- data/docs/_sass/vendor/neat/settings/_grid.scss +51 -0
- data/docs/_sass/vendor/neat/settings/_visual-grid.scss +27 -0
- data/docs/_sass/vendor/normalize-3.0.2.scss +427 -0
- data/docs/_sass/vendor/pygments.scss +356 -0
- data/docs/automating_browsers/capybara.md +70 -0
- data/docs/css/screen.scss +7 -0
- data/docs/guides/callbacks.md +45 -0
- data/docs/guides/cli.md +52 -0
- data/docs/guides/configuration.md +184 -0
- data/docs/guides/error_handling.md +46 -0
- data/docs/guides/frontiers.md +93 -0
- data/docs/guides/halting.md +23 -0
- data/docs/guides/job_queues.md +26 -0
- data/docs/guides/locals.md +36 -0
- data/docs/guides/logging.md +22 -0
- data/docs/guides/page_objects.md +67 -0
- data/docs/guides/peeking.md +46 -0
- data/docs/guides/selenium_capybara.md +100 -0
- data/docs/guides/tutorial.md +452 -0
- data/docs/index.md +82 -0
- data/docs/js/navigation.js +11 -0
- data/docs/misc/contributing.md +20 -0
- data/docs/misc/testing.md +11 -0
- data/docs/recipes/authentication.md +23 -0
- data/docs/recipes/csv.md +29 -0
- data/docs/recipes/javascript.md +20 -0
- data/docs/recipes/multiple_uris.md +18 -0
- data/docs/recipes/screenshots.md +20 -0
- data/docs/routing/host_rules.md +24 -0
- data/docs/routing/path_rules.md +33 -0
- data/docs/routing/query_rules.md +69 -0
- data/docs/routing/routes.md +96 -0
- data/docs/routing/uri_rules.md +18 -0
- data/examples/collect_github_issues.rb +65 -0
- data/examples/find_foobar_on_wikipedia.rb +23 -0
- data/lib/wayfarer.rb +65 -0
- data/lib/wayfarer/configuration.rb +86 -0
- data/lib/wayfarer/crawl.rb +79 -0
- data/lib/wayfarer/crawl_observer.rb +103 -0
- data/lib/wayfarer/dispatcher.rb +104 -0
- data/lib/wayfarer/finders.rb +61 -0
- data/lib/wayfarer/frontiers/frontier.rb +79 -0
- data/lib/wayfarer/frontiers/memory_bloomfilter.rb +32 -0
- data/lib/wayfarer/frontiers/memory_frontier.rb +76 -0
- data/lib/wayfarer/frontiers/memory_trie_frontier.rb +39 -0
- data/lib/wayfarer/frontiers/normalize_uris.rb +48 -0
- data/lib/wayfarer/frontiers/redis_bloomfilter.rb +34 -0
- data/lib/wayfarer/frontiers/redis_frontier.rb +83 -0
- data/lib/wayfarer/http_adapters/adapter_pool.rb +62 -0
- data/lib/wayfarer/http_adapters/net_http_adapter.rb +77 -0
- data/lib/wayfarer/http_adapters/selenium_adapter.rb +80 -0
- data/lib/wayfarer/job.rb +192 -0
- data/lib/wayfarer/locals.rb +40 -0
- data/lib/wayfarer/page.rb +94 -0
- data/lib/wayfarer/parsers/json_parser.rb +20 -0
- data/lib/wayfarer/parsers/xml_parser.rb +27 -0
- data/lib/wayfarer/processor.rb +103 -0
- data/lib/wayfarer/routing/host_rule.rb +19 -0
- data/lib/wayfarer/routing/path_rule.rb +54 -0
- data/lib/wayfarer/routing/query_rule.rb +59 -0
- data/lib/wayfarer/routing/router.rb +71 -0
- data/lib/wayfarer/routing/rule.rb +102 -0
- data/lib/wayfarer/routing/uri_rule.rb +21 -0
- data/spec/configuration_spec.rb +26 -0
- data/spec/crawl_spec.rb +48 -0
- data/spec/finders_spec.rb +49 -0
- data/spec/frontiers/memory_bloomfilter_spec.rb +6 -0
- data/spec/frontiers/memory_frontier_spec.rb +6 -0
- data/spec/frontiers/memory_trie_frontier_spec.rb +6 -0
- data/spec/frontiers/normalize_uris_spec.rb +59 -0
- data/spec/frontiers/redis_bloomfilter_spec.rb +6 -0
- data/spec/frontiers/redis_frontier_spec.rb +6 -0
- data/spec/http_adapters/adapter_pool_spec.rb +33 -0
- data/spec/http_adapters/net_http_adapter_spec.rb +83 -0
- data/spec/http_adapters/selenium_adapter_spec.rb +53 -0
- data/spec/integration/callbacks_spec.rb +42 -0
- data/spec/integration/locals_spec.rb +106 -0
- data/spec/job_spec.rb +86 -0
- data/spec/page_spec.rb +38 -0
- data/spec/parsers/json_parser_spec.rb +30 -0
- data/spec/parsers/xml_parser_spec.rb +24 -0
- data/spec/processor_spec.rb +31 -0
- data/spec/routing/host_rule_spec.rb +48 -0
- data/spec/routing/path_rule_spec.rb +66 -0
- data/spec/routing/query_rule_spec.rb +124 -0
- data/spec/routing/router_spec.rb +67 -0
- data/spec/routing/rule_spec.rb +218 -0
- data/spec/routing/uri_rule_spec.rb +24 -0
- data/spec/shared/frontier.rb +96 -0
- data/spec/spec_helpers.rb +62 -0
- data/spec/wayfarer_spec.rb +24 -0
- data/support/static/finders.html +38 -0
- data/support/static/graph/details/a.html +10 -0
- data/support/static/graph/details/b.html +10 -0
- data/support/static/graph/index.html +20 -0
- data/support/static/json/dummy.json +13 -0
- data/support/static/links/links.html +28 -0
- data/support/static/xml/dummy.xml +120 -0
- data/support/test_app.rb +45 -0
- data/wayfarer-jruby.gemspec +49 -0
- data/wayfarer.gemspec +53 -0
- metadata +616 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "concurrent"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
# @api private
|
7
|
+
module Locals
|
8
|
+
def self.thread_safe_counterpart(value)
|
9
|
+
case value
|
10
|
+
when Array then Concurrent::Array.new(value)
|
11
|
+
when Hash then Concurrent::Hash[value]
|
12
|
+
when TrueClass then Concurrent::AtomicBoolean.new(value)
|
13
|
+
when FalseClass then Concurrent::AtomicBoolean.new(value)
|
14
|
+
when Integer then Concurrent::AtomicFixnum.new(value)
|
15
|
+
else value
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.included(base)
|
20
|
+
base.extend(ClassMethods)
|
21
|
+
end
|
22
|
+
|
23
|
+
module ClassMethods
|
24
|
+
attr_reader :locals
|
25
|
+
|
26
|
+
def let(key)
|
27
|
+
raise "#let called without a block" unless block_given?
|
28
|
+
locals[key] = yield
|
29
|
+
end
|
30
|
+
|
31
|
+
def locals
|
32
|
+
@locals ||= {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def locals=(locals)
|
36
|
+
@locals = locals
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ostruct"
|
4
|
+
require "forwardable"
|
5
|
+
require "mime/types"
|
6
|
+
require "mime-types"
|
7
|
+
|
8
|
+
require "pismo" unless RUBY_PLATFORM == "java"
|
9
|
+
|
10
|
+
module Wayfarer
|
11
|
+
# The representation of fetched pages
|
12
|
+
class Page
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
include Finders
|
16
|
+
|
17
|
+
# @!attribute [r] uri
|
18
|
+
# @return [URI] the URI of the page.
|
19
|
+
attr_reader :uri
|
20
|
+
|
21
|
+
# @!attribute [r] status_code
|
22
|
+
# @return [Fixnum] the response status code.
|
23
|
+
attr_reader :status_code
|
24
|
+
|
25
|
+
# @!attribute [r] body
|
26
|
+
# @return [String] the response body.
|
27
|
+
attr_accessor :body
|
28
|
+
|
29
|
+
# @!attribute [r] headers
|
30
|
+
# @return [Hash] the response headers.
|
31
|
+
attr_reader :headers
|
32
|
+
|
33
|
+
def initialize(attrs = {})
|
34
|
+
@uri = attrs[:uri]
|
35
|
+
@status_code = attrs[:status_code]
|
36
|
+
@body = attrs[:body]
|
37
|
+
@headers = attrs[:headers]
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns a parsed representation of the fetched document depending on the
|
41
|
+
# Content-Type field.
|
42
|
+
# @return [OpenStruct] if the Content-Type field's sub-type is "json".
|
43
|
+
# @return [Nokogiri::XML::Document] if the Content-Type field's sub-type is "xml".
|
44
|
+
# @return [Nokogiri::HTML::Document] otherwise.
|
45
|
+
def doc
|
46
|
+
return @doc if @doc
|
47
|
+
|
48
|
+
# If no Content-Type field is present, assume HTML/XML
|
49
|
+
# TODO: Test
|
50
|
+
unless @headers["content-type"]
|
51
|
+
return @doc = Parsers::XMLParser.parse_html(@body)
|
52
|
+
end
|
53
|
+
|
54
|
+
content_type = @headers["content-type"].first
|
55
|
+
sub_type = MIME::Types[content_type].first.sub_type
|
56
|
+
|
57
|
+
# TODO: Tests
|
58
|
+
@doc = case sub_type
|
59
|
+
when "json"
|
60
|
+
Parsers::JSONParser.parse(@body)
|
61
|
+
when "xml"
|
62
|
+
Parsers::XMLParser.parse_xml(@body)
|
63
|
+
else
|
64
|
+
Parsers::XMLParser.parse_html(@body)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Pismo is not supported on JRuby.
|
69
|
+
unless RUBY_PLATFORM == "java"
|
70
|
+
# `#images` is included from the Helpers module
|
71
|
+
# `#body` is an attribute reader defined above
|
72
|
+
delegate (Pismo::Document::ATTRIBUTE_METHODS - %i[images body]) => :pismo
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
# Returns a Pismo document.
|
78
|
+
# @note Not available on JRuby.
|
79
|
+
# @note Only succeeds when {#doc} returns a `Nokogiri::HTML::Document`.
|
80
|
+
# @return [Pismo::Document]
|
81
|
+
def pismo
|
82
|
+
@pismo_doc ||= instantiate_pismo_document
|
83
|
+
end
|
84
|
+
|
85
|
+
def instantiate_pismo_document
|
86
|
+
doc = Pismo::Document.allocate
|
87
|
+
doc.instance_variable_set(:@options, {})
|
88
|
+
doc.instance_variable_set(:@url, uri)
|
89
|
+
doc.instance_variable_set(:@html, body)
|
90
|
+
doc.instance_variable_set(:@doc, self.doc)
|
91
|
+
doc
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "oj" unless RUBY_PLATFORM == "java"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
module Parsers
|
7
|
+
# A wrapper class for parsing JSON.
|
8
|
+
# @private
|
9
|
+
module JSONParser
|
10
|
+
module_function
|
11
|
+
|
12
|
+
# Parses a JSON string.
|
13
|
+
# @param [String] json_str the JSON string to parse.
|
14
|
+
# @return [OpenStruct]
|
15
|
+
def parse(json_str)
|
16
|
+
RUBY_PLATFORM == "java" ? JSON.parse(json_str) : Oj.load(json_str)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "nokogiri"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
module Parsers
|
7
|
+
# A wrapper class for parsing HTML/XML.
|
8
|
+
# @private
|
9
|
+
module XMLParser
|
10
|
+
module_function
|
11
|
+
|
12
|
+
# Parses an XML string.
|
13
|
+
# @param [String] xml_str the XML string to parse.
|
14
|
+
# @return [Nokogiri::XML::Document]
|
15
|
+
def parse_xml(xml_str)
|
16
|
+
Nokogiri::XML(xml_str)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Parses a HTML string.
|
20
|
+
# @param [String] html_str the HTML string to parse.
|
21
|
+
# @return [Nokogiri::HTML::Document]
|
22
|
+
def parse_html(html_str)
|
23
|
+
Nokogiri::HTML(html_str)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pp"
|
4
|
+
require "concurrent"
|
5
|
+
require "observer"
|
6
|
+
|
7
|
+
module Wayfarer
|
8
|
+
# Runs jobs.
|
9
|
+
class Processor
|
10
|
+
extend Forwardable
|
11
|
+
|
12
|
+
include Observable
|
13
|
+
include CrawlObserver::Events
|
14
|
+
include CrawlObserver::ObservableShortcuts
|
15
|
+
|
16
|
+
attr_reader :job
|
17
|
+
|
18
|
+
delegate config: :job
|
19
|
+
delegate logger: :config
|
20
|
+
|
21
|
+
def initialize(job, frontier, dispatcher)
|
22
|
+
@job = job
|
23
|
+
@frontier = frontier
|
24
|
+
@dispatcher = dispatcher
|
25
|
+
@halted = Concurrent::AtomicBoolean.new(false)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Whether processing is done.
|
29
|
+
# @return [true, false]
|
30
|
+
def halted?
|
31
|
+
@halted.value
|
32
|
+
end
|
33
|
+
|
34
|
+
# Sets a halt flag.
|
35
|
+
def halt!
|
36
|
+
@halted.make_true
|
37
|
+
end
|
38
|
+
|
39
|
+
# Runs the job.
|
40
|
+
# @param [*Array<URI>, *Array<String>] uris
|
41
|
+
def run(*_uris)
|
42
|
+
notify_observers!(FirstCycle.new(@frontier))
|
43
|
+
|
44
|
+
while @halted.false? && @frontier.cycle
|
45
|
+
current_uris = @frontier.current_uris
|
46
|
+
queue = current_uris.inject(Queue.new, :push)
|
47
|
+
|
48
|
+
notify_observers!(NewCycle.new(current_uris.count))
|
49
|
+
|
50
|
+
@threads = Array.new(config.connection_count) do
|
51
|
+
Thread.new do
|
52
|
+
begin
|
53
|
+
loop do
|
54
|
+
uri = queue.pop(true)
|
55
|
+
break if uri.nil? || @halted.true?
|
56
|
+
handle_dispatch_result(@dispatcher.dispatch(@job, uri))
|
57
|
+
end
|
58
|
+
rescue ThreadError
|
59
|
+
notify_observers!(CycleFinished.new)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
@threads.each(&:join)
|
65
|
+
|
66
|
+
notify_observers!(AboutToCycle.new(@frontier.staged_uris.count))
|
67
|
+
end
|
68
|
+
ensure
|
69
|
+
halt!
|
70
|
+
@frontier.free
|
71
|
+
@dispatcher.adapter_pool.free
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def handle_dispatch_result(result)
|
77
|
+
case result
|
78
|
+
when Dispatcher::Mismatch then handle_mismatch(result)
|
79
|
+
when Dispatcher::Halt then handle_halt(result)
|
80
|
+
when Dispatcher::Stage then handle_stage(result)
|
81
|
+
when Dispatcher::Error then handle_error(result)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def handle_mismatch(mismatch)
|
86
|
+
notify_observers!(MismatchedURI.new(mismatch.uri))
|
87
|
+
end
|
88
|
+
|
89
|
+
def handle_halt(halt)
|
90
|
+
notify_observers!(HaltInitiated.new(halt.action, halt.uri))
|
91
|
+
halt!
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_stage(stage)
|
95
|
+
notify_observers!(StagingURIs.new(stage.uris.count))
|
96
|
+
@frontier.stage(*stage.uris) unless halted?
|
97
|
+
end
|
98
|
+
|
99
|
+
def handle_error(error)
|
100
|
+
notify_observers!(UnhandledError.new(error.exception))
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Wayfarer
|
4
|
+
module Routing
|
5
|
+
# @private
|
6
|
+
class HostRule < Rule
|
7
|
+
def initialize(str_or_regexp, opts = {}, &proc)
|
8
|
+
@str_or_regexp = str_or_regexp
|
9
|
+
super(opts, &proc)
|
10
|
+
end
|
11
|
+
|
12
|
+
# rubocop:disable Style/CaseEquality
|
13
|
+
def match!(uri)
|
14
|
+
@str_or_regexp === uri.host
|
15
|
+
end
|
16
|
+
# rubocop:enable Style/CaseEquality
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "mustermann"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
module Routing
|
7
|
+
# @private
|
8
|
+
class PathRule < Rule
|
9
|
+
attr_reader :matcher
|
10
|
+
|
11
|
+
def initialize(arg, opts = {}, &proc)
|
12
|
+
@matcher = if arg.is_a? String
|
13
|
+
Mustermann.new(arg, type: Wayfarer.config.mustermann_type)
|
14
|
+
else
|
15
|
+
arg
|
16
|
+
end
|
17
|
+
|
18
|
+
super(opts, &proc)
|
19
|
+
end
|
20
|
+
|
21
|
+
def params(uri)
|
22
|
+
return {} unless match!(uri)
|
23
|
+
|
24
|
+
path = uri.path
|
25
|
+
|
26
|
+
if @matcher.is_a? Mustermann
|
27
|
+
@matcher.params(path)
|
28
|
+
else
|
29
|
+
captures = @matcher.match(full_path(uri)).captures
|
30
|
+
|
31
|
+
captures.each.with_index.reduce({}) do |hash, (capture, i)|
|
32
|
+
hash.merge(i.to_s => capture)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# rubocop:disable Style/CaseEquality
|
40
|
+
def match!(uri)
|
41
|
+
if @matcher.is_a? Mustermann
|
42
|
+
@matcher === uri.path
|
43
|
+
else
|
44
|
+
@matcher =~ full_path(uri)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
# rubocop:enable Style/CaseEquality
|
48
|
+
|
49
|
+
def full_path(uri)
|
50
|
+
"#{uri.path}?#{uri.query}##{uri.fragment}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "cgi"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
module Routing
|
7
|
+
# @private
|
8
|
+
class QueryRule < Rule
|
9
|
+
def initialize(field_constraints, opts = {}, &proc)
|
10
|
+
@field_constraints = field_constraints
|
11
|
+
super(opts, &proc)
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def match!(uri)
|
17
|
+
CGI.parse(uri.query).none? do |field, vals| violates?(field, vals) end
|
18
|
+
rescue NoMethodError
|
19
|
+
# `CGI::parse` throws a `NoMethodError` if `uri.query` is an empty
|
20
|
+
# string
|
21
|
+
false
|
22
|
+
end
|
23
|
+
|
24
|
+
# rubocop:disable Lint/AssignmentInCondition
|
25
|
+
def violates?(field, vals)
|
26
|
+
return false unless constraint = @field_constraints[field.to_sym]
|
27
|
+
violates_constraint?(constraint, vals)
|
28
|
+
end
|
29
|
+
# rubocop:enable Lint/AssignmentInCondition
|
30
|
+
|
31
|
+
def violates_constraint?(constraint, vals)
|
32
|
+
case constraint
|
33
|
+
when String then violates_string?(constraint, vals)
|
34
|
+
when Integer then violates_integer?(constraint, vals)
|
35
|
+
when Regexp then violates_regexp?(constraint, vals)
|
36
|
+
when Range then violates_range?(constraint, vals)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def violates_string?(str, vals)
|
41
|
+
vals.none? { |val| str == val }
|
42
|
+
end
|
43
|
+
|
44
|
+
def violates_integer?(int, vals)
|
45
|
+
vals.none? do |val| int == Integer(val) end
|
46
|
+
rescue ArgumentError
|
47
|
+
true
|
48
|
+
end
|
49
|
+
|
50
|
+
def violates_regexp?(regexp, vals)
|
51
|
+
vals.none? { |val| regexp.match(val) }
|
52
|
+
end
|
53
|
+
|
54
|
+
def violates_range?(range, vals)
|
55
|
+
vals.none? { |val| range.include?(val.to_i) }
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "forwardable"
|
4
|
+
|
5
|
+
module Wayfarer
|
6
|
+
module Routing
|
7
|
+
# A {Router} maps URIs onto a {Job}'s instance methods.
|
8
|
+
class Router
|
9
|
+
extend Forwardable
|
10
|
+
|
11
|
+
# @!attribute [r] rule
|
12
|
+
# @return [Rule]
|
13
|
+
attr_reader :rule
|
14
|
+
|
15
|
+
# @!attribute [r] blacklist
|
16
|
+
# @return [Rule]
|
17
|
+
attr_reader :blacklist
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@rule = Rule.new
|
21
|
+
@blacklist = Rule.new
|
22
|
+
end
|
23
|
+
|
24
|
+
delegate %i[
|
25
|
+
uri
|
26
|
+
host
|
27
|
+
path
|
28
|
+
query
|
29
|
+
] => :rule
|
30
|
+
|
31
|
+
# Returns the associated instance method (action) of the first rule that
|
32
|
+
# matches a URI and the collected parameter hash from the rule chain.
|
33
|
+
# @return [[Boolean, Symbol, Hash]] if a matching rule exists.
|
34
|
+
# @return [false] if no matching rule exists or the URI is forbidden.
|
35
|
+
def route(uri)
|
36
|
+
return false if forbids?(uri)
|
37
|
+
|
38
|
+
# TODO: Use structs instead
|
39
|
+
is_matching, params, action = @rule.invoke(uri)
|
40
|
+
return action, params if is_matching && params
|
41
|
+
|
42
|
+
false
|
43
|
+
end
|
44
|
+
|
45
|
+
# Whether a route matches the URI.
|
46
|
+
# TODO: Test
|
47
|
+
def routes?(uri)
|
48
|
+
!!route(uri)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Adds a {Rule} to the blacklist.
|
52
|
+
def forbid(opts = {}, &proc)
|
53
|
+
@blacklist.build_child_rule_chain_from_options(opts)
|
54
|
+
@blacklist.instance_eval(&proc) if block_given?
|
55
|
+
@blacklist
|
56
|
+
end
|
57
|
+
|
58
|
+
# Whether the URI is matched by the blacklist rule.
|
59
|
+
# @see #forbid
|
60
|
+
def forbids?(uri)
|
61
|
+
@blacklist.matches?(uri)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Whether the URI is allowed.
|
65
|
+
# @see #forbid
|
66
|
+
def allows?(uri)
|
67
|
+
!forbids?(uri)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|