web_fetch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +10 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +5 -0
  7. data/Gemfile.lock +120 -0
  8. data/LICENSE +7 -0
  9. data/README.md +149 -0
  10. data/TODO +0 -0
  11. data/bin/rspec +29 -0
  12. data/bin/rubocop +29 -0
  13. data/bin/web_fetch_control +6 -0
  14. data/bin/web_fetch_server +30 -0
  15. data/config/locales/en.yml +12 -0
  16. data/doc/client_example.rb +19 -0
  17. data/doc/web_fetch_architecture.png +0 -0
  18. data/lib/web_fetch/client.rb +101 -0
  19. data/lib/web_fetch/concerns/http_helpers.rb +64 -0
  20. data/lib/web_fetch/concerns/validatable.rb +31 -0
  21. data/lib/web_fetch/event_machine_helpers.rb +36 -0
  22. data/lib/web_fetch/gatherer.rb +62 -0
  23. data/lib/web_fetch/helpers.rb +11 -0
  24. data/lib/web_fetch/http_helpers.rb +71 -0
  25. data/lib/web_fetch/logger.rb +29 -0
  26. data/lib/web_fetch/resources.rb +59 -0
  27. data/lib/web_fetch/retriever.rb +39 -0
  28. data/lib/web_fetch/router.rb +71 -0
  29. data/lib/web_fetch/server.rb +49 -0
  30. data/lib/web_fetch/storage.rb +16 -0
  31. data/lib/web_fetch/version.rb +5 -0
  32. data/lib/web_fetch.rb +40 -0
  33. data/spec/client_spec.rb +63 -0
  34. data/spec/concerns/validatable_spec.rb +53 -0
  35. data/spec/features/http_fetching_spec.rb +0 -0
  36. data/spec/gatherer_spec.rb +109 -0
  37. data/spec/helpers_spec.rb +18 -0
  38. data/spec/i18n_spec.rb +8 -0
  39. data/spec/resources_spec.rb +42 -0
  40. data/spec/retriever_spec.rb +68 -0
  41. data/spec/router_spec.rb +43 -0
  42. data/spec/server_spec.rb +96 -0
  43. data/spec/spec_helper.rb +55 -0
  44. data/spec/storage_spec.rb +24 -0
  45. data/swagger.yaml +115 -0
  46. data/web_fetch.gemspec +41 -0
  47. metadata +314 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # EventMachine layer-specific helpers
5
+ module EventMachineHelpers
6
+ def wait_for_response(deferred, response)
7
+ deferred[:http].callback do
8
+ Logger.debug("HTTP fetch complete for uid: #{deferred[:uid]}")
9
+ deferred[:succeeded] = true
10
+ end
11
+
12
+ deferred[:http].errback do
13
+ Logger.debug("HTTP fetch failed for uid: #{deferred[:uid]}")
14
+ deferred[:failed] = true
15
+ end
16
+
17
+ tick_loop(deferred, response)
18
+ end
19
+
20
+ def tick_loop(deferred, response)
21
+ # XXX There may be a much nicer way to wait for an async task to complete
22
+ # before returning a response but I couldn't figure it out, so I used
23
+ # EM.tick_loop which effectively does the same as a Twisted deferred
24
+ # callback chain, just much more explicitly.
25
+ EM.tick_loop do
26
+ if deferred[:succeeded]
27
+ succeed(deferred, response)
28
+ :stop
29
+ elsif deferred[:failed]
30
+ fail_(deferred, response)
31
+ :stop
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Handles requests to gather URLs and delegates to the EventMachine web
5
+ # server
6
+ class Gatherer
7
+ include Validatable
8
+
9
+ HASHABLE_KEYS = %i[url query_string headers method].freeze
10
+
11
+ def initialize(server, params)
12
+ @requests = params[:requests]
13
+ @server = server
14
+ end
15
+
16
+ def start
17
+ tagged = { requests: tag_requests }
18
+ @server.gather(tagged[:requests])
19
+ tagged
20
+ end
21
+
22
+ private
23
+
24
+ def validate
25
+ error(:requests_missing) if requests_missing?
26
+ error(:requests_not_array) if requests_not_array?
27
+ error(:requests_empty) if requests_empty?
28
+ error(:missing_url) if missing_url?
29
+ end
30
+
31
+ def requests_missing?
32
+ @requests.nil?
33
+ end
34
+
35
+ def requests_not_array?
36
+ !@requests.nil? && !@requests.is_a?(Array)
37
+ end
38
+
39
+ def requests_empty?
40
+ @requests.is_a?(Array) && @requests.length.zero?
41
+ end
42
+
43
+ def missing_url?
44
+ @requests.is_a?(Array) && @requests.any? { |req| req[:url].nil? }
45
+ end
46
+
47
+ def tag_requests
48
+ @requests.map do |request|
49
+ { request: request, hash: hash(request), uid: uid }
50
+ end
51
+ end
52
+
53
+ def hash(obj)
54
+ string = JSON.dump(obj.select { |key| HASHABLE_KEYS.include?(key) })
55
+ Digest.hexencode(Digest::SHA1.new.digest(string))
56
+ end
57
+
58
+ def uid
59
+ SecureRandom.uuid
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Shared code used throughout the application
5
+ module Helpers
6
+ def symbolize(obj)
7
+ # >:)
8
+ JSON.parse(JSON.dump(obj), symbolize_names: true)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Convenience methods for WebFetch HTTP layer
5
+ module HTTPHelpers
6
+ def respond_immediately(result, response)
7
+ response.status = result[:status]
8
+ response.content = compress(result[:payload].to_json)
9
+ response.send_response
10
+ end
11
+
12
+ def compress(string)
13
+ ActiveSupport::Gzip.compress(string)
14
+ end
15
+
16
+ def default_headers(response)
17
+ response.headers['Content-Type'] = 'application/json; charset=utf-8'
18
+ response.headers['Cache-Control'] = 'max-age=0, private, must-revalidate'
19
+ response.headers['Content-Encoding'] = 'gzip'
20
+ response.headers['Vary'] = 'Accept-Encoding'
21
+ end
22
+
23
+ def request_params
24
+ { method: @http_request_method,
25
+ query_string: @http_query_string,
26
+ post_data: post_data,
27
+ server: self }
28
+ end
29
+
30
+ def post_data
31
+ return nil unless @http_post_content
32
+
33
+ JSON.parse(@http_post_content, symbolize_names: true)
34
+ end
35
+
36
+ def succeed(deferred, response)
37
+ response.status = 200
38
+ response.content = compress(JSON.dump(success(deferred)))
39
+ response.send_response
40
+ end
41
+
42
+ def success(deferred)
43
+ result = deferred[:http]
44
+ { response: {
45
+ success: true,
46
+ body: result.response,
47
+ headers: result.headers,
48
+ status: result.response_header.status
49
+ },
50
+ uid: deferred[:uid] }
51
+ end
52
+
53
+ def fail_(deferred, response)
54
+ response.status = 200
55
+ response.content = compress(JSON.dump(failure(deferred)))
56
+ response.send_response
57
+ end
58
+
59
+ def failure(deferred)
60
+ result = deferred[:http]
61
+ { response: {
62
+ success: false,
63
+ body: result.response,
64
+ headers: result.headers,
65
+ status: result.response_header.status,
66
+ error: (result.error&.inspect)
67
+ },
68
+ uid: deferred[:uid] }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+
5
+ module WebFetch
6
+ # EventMachine-friendly Logger
7
+ class Logger
8
+ extend SingleForwardable
9
+
10
+ def self.logger(path)
11
+ @logger ||= EM::Logger.new(::Logger.new(log_file(path)))
12
+ end
13
+
14
+ def_delegators :@logger, :debug, :info, :warn, :error, :fatal
15
+
16
+ class << self
17
+ private
18
+
19
+ def log_file(path)
20
+ return STDOUT if STDOUT.isatty && path.nil?
21
+ return File.open(File::NULL, 'w') if path.nil?
22
+
23
+ log = File.open(path, 'a')
24
+ log.sync = true # Prevent buffering
25
+ log
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Glue between the router and the guts of the application; calls the relevant
5
+ # code and builds responses
6
+ class Resources
7
+ class << self
8
+ def root(_server, _params)
9
+ { status: status(:ok), payload: { application: 'WebFetch' } }
10
+ end
11
+
12
+ def gather(server, params)
13
+ gatherer = Gatherer.new(server, params)
14
+ if gatherer.valid?
15
+ { status: status(:ok), payload: gatherer.start }
16
+ else
17
+ { status: status(:unprocessable),
18
+ payload: { error: gatherer.errors } }
19
+ end
20
+ end
21
+
22
+ def retrieve(server, params)
23
+ retriever = Retriever.new(server, params)
24
+ unless retriever.valid?
25
+ return { status: status(:unprocessable),
26
+ payload: { error: retriever.errors } }
27
+ end
28
+ defer_if_found(retriever)
29
+ end
30
+
31
+ private
32
+
33
+ def status(name)
34
+ {
35
+ ok: 200,
36
+ unprocessable: 422,
37
+ not_found: 404
38
+ }.fetch(name)
39
+ end
40
+
41
+ def not_found(retriever)
42
+ {
43
+ status: status(:not_found),
44
+ payload: { error: retriever.not_found_error }
45
+ }
46
+ end
47
+
48
+ def defer_if_found(retriever)
49
+ found = retriever.find
50
+ if found.nil?
51
+ { status: status(:not_found),
52
+ payload: { error: retriever.not_found_error } }
53
+ else
54
+ { deferred: found }
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Retrieves a gathered HTTP request
5
+ class Retriever
6
+ include Validatable
7
+
8
+ attr_reader :not_found_error
9
+
10
+ def initialize(server, params)
11
+ @uid = params[:uid]
12
+ @hash = params[:hash]
13
+ @server = server
14
+ end
15
+
16
+ def find
17
+ stored = @server.storage.fetch(@uid)
18
+ return not_found if stored.nil?
19
+
20
+ stored
21
+ end
22
+
23
+ private
24
+
25
+ def validate
26
+ error(:hash_or_uid_but_not_both) if !@uid.nil? && !@hash.nil?
27
+ error(:missing_hash_and_uid) if @uid.nil? && @hash.nil?
28
+ end
29
+
30
+ def not_found
31
+ @not_found_error = if !@uid.nil?
32
+ I18n.t(:uid_not_found)
33
+ elsif !@hash.nil?
34
+ I18n.t(:hash_not_found)
35
+ end
36
+ nil
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hanami/router'
4
+ require 'rack/utils'
5
+
6
+ module WebFetch
7
+ # Dispatches requests to correct resource
8
+ class Router
9
+ include Helpers
10
+
11
+ def initialize
12
+ @router = setup
13
+ end
14
+
15
+ def route(url, options = {})
16
+ @server = options.delete(:server)
17
+ options = { query_string: nil, method: 'GET' }.merge(options)
18
+ method = options[:method].downcase.to_sym
19
+ Logger.info("#{url}: #{options}")
20
+ begin
21
+ params = build_params(options)
22
+ rescue JSON::ParserError
23
+ return { status: 400, payload: I18n.t(:bad_json) }
24
+ end
25
+ @router.recognize(url, method: method).call(params)
26
+ end
27
+
28
+ private
29
+
30
+ # rubocop:disable Metrics/MethodLength
31
+ def setup
32
+ resource_finder = lambda do |name, env|
33
+ Resources.public_send(name, @server, env)
34
+ end
35
+
36
+ Hanami::Router.new do
37
+ get '/', to: lambda { |params|
38
+ resource_finder.call(:root, params)
39
+ }
40
+
41
+ post '/gather', to: lambda { |params|
42
+ resource_finder.call(:gather, params)
43
+ }
44
+
45
+ get '/retrieve', to: lambda { |params|
46
+ resource_finder.call(:retrieve, params)
47
+ }
48
+ end
49
+ end
50
+ # rubocop:enable Metrics/MethodLength
51
+
52
+ def build_params(options)
53
+ params = Rack::Utils.parse_nested_query(options[:query_string])
54
+ merge_json!(params)
55
+ params = symbolize(params)
56
+ params.merge!(options[:post_data] || {})
57
+ params
58
+ end
59
+
60
+ def merge_json(params)
61
+ params.merge(
62
+ JSON.parse(params.delete('json') || '{}',
63
+ symbolize_names: true)
64
+ )
65
+ end
66
+
67
+ def merge_json!(params)
68
+ params.merge!(merge_json(params))
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Web server that accepts requests to gather and retrieve external HTTP
5
+ # requests
6
+ class Server < EM::Connection
7
+ attr_reader :storage
8
+
9
+ include EM::HttpServer
10
+ include HTTPHelpers
11
+ include EventMachineHelpers
12
+
13
+ def post_init
14
+ super
15
+ @router = Router.new
16
+ @storage = Storage
17
+ no_environment_strings
18
+ end
19
+
20
+ def process_http_request
21
+ result = @router.route(@http_request_uri, request_params)
22
+ response = EM::DelegatedHttpResponse.new(self)
23
+
24
+ default_headers(response)
25
+
26
+ if result[:deferred].nil?
27
+ respond_immediately(result, response)
28
+ else
29
+ wait_for_response(result[:deferred], response)
30
+ end
31
+ end
32
+
33
+ # Note that #gather is called by WebFetch itself to asynchronously gather
34
+ # the required HTTP objects. All public API requests go via
35
+ # #process_http_request and subsequently WebFetch::Router#route
36
+ def gather(targets)
37
+ targets.each do |target|
38
+ request = target[:request]
39
+ async_request = EM::HttpRequest.new(request[:url])
40
+ method = request.fetch(:method, 'GET').downcase.to_sym
41
+ http = async_request.public_send(method,
42
+ head: request[:headers],
43
+ query: request.fetch(:query, {}),
44
+ body: request.fetch(:body, nil))
45
+ @storage.store(target[:uid], uid: target[:uid], http: http)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Rudimentary global storage for responses
5
+ class Storage
6
+ @_storage = {}
7
+
8
+ def self.store(key, obj)
9
+ @_storage[key] = obj
10
+ end
11
+
12
+ def self.fetch(key)
13
+ @_storage.delete(key)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ VERSION = '0.1.0'
5
+ end
data/lib/web_fetch.rb ADDED
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eventmachine'
4
+ require 'evma_httpserver'
5
+ require 'em-http'
6
+ require 'em-logger'
7
+ require 'i18n'
8
+ require 'logger'
9
+ require 'json'
10
+ require 'digest'
11
+ require 'securerandom'
12
+ require 'faraday'
13
+ require 'childprocess'
14
+ require 'active_support/gzip'
15
+
16
+ locales_path = File.expand_path('../config/locales/*.yml', __dir__)
17
+
18
+ I18n.load_path += Dir[locales_path]
19
+
20
+ # Avoid i18n conflicts when using as a gem in a Rails application
21
+ unless Gem.loaded_specs.key?('rails')
22
+ I18n.load_path += Dir[locales_path]
23
+ I18n.backend.load_translations
24
+ I18n.config.available_locales = :en
25
+ end
26
+
27
+ require 'web_fetch/logger'
28
+ require 'web_fetch/helpers'
29
+ require 'web_fetch/event_machine_helpers'
30
+ require 'web_fetch/http_helpers'
31
+ require 'web_fetch/concerns/validatable'
32
+ require 'web_fetch/concerns/http_helpers'
33
+ require 'web_fetch/storage'
34
+ require 'web_fetch/server'
35
+ require 'web_fetch/router'
36
+ require 'web_fetch/resources'
37
+ require 'web_fetch/gatherer'
38
+ require 'web_fetch/retriever'
39
+ require 'web_fetch/client'
40
+ require 'web_fetch/version'
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe WebFetch::Client do
4
+ let(:client) { described_class.new('localhost', 8089, log: File::NULL) }
5
+
6
+ before(:each) do
7
+ stub_request(:any, 'http://blah.blah/success')
8
+ .to_return(body: 'hello, everybody')
9
+ end
10
+
11
+ it 'can be instantiated with host and port params' do
12
+ client
13
+ end
14
+
15
+ describe '#alive?' do
16
+ it 'confirms server is alive and accepting requests' do
17
+ expect(client.alive?).to be true
18
+ end
19
+ end
20
+
21
+ describe '#gather' do
22
+ it 'makes `gather` requests to a running server' do
23
+ result = client.gather([{ url: 'http://blah.blah/success' }])
24
+ expect(result.first[:uid]).to_not be_nil
25
+ end
26
+ end
27
+
28
+ describe '#retrieve_by_uid' do
29
+ it 'retrieves a gathered item' do
30
+ result = client.gather([{ url: 'http://blah.blah/success' }])
31
+ uid = result.first[:uid]
32
+
33
+ retrieved = client.retrieve_by_uid(uid)
34
+ expect(retrieved[:response][:status]).to eql 200
35
+ expect(retrieved[:response][:body]).to eql 'hello, everybody'
36
+ expect(retrieved[:uid]).to eql uid
37
+ end
38
+
39
+ it 'returns nil for non-requested items' do
40
+ client.gather([{ url: 'http://blah.blah/success' }])
41
+
42
+ retrieved = client.retrieve_by_uid('lalalala')
43
+ expect(retrieved).to be_nil
44
+ end
45
+ end
46
+
47
+ describe '#create' do
48
+ it 'spawns a server and returns a client able to connect' do
49
+ client = described_class.create('localhost', 8077, log: File::NULL)
50
+ expect(client.alive?).to be true
51
+ client.stop
52
+ end
53
+ end
54
+
55
+ describe '#stop' do
56
+ it 'can spawn a server and stop the process when needed' do
57
+ client = described_class.create('localhost', 8077, log: File::NULL)
58
+ expect(client.alive?).to be true
59
+ client.stop
60
+ expect(client.alive?).to be false
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe WebFetch::Validatable do
4
+ class ConcernedInvalid
5
+ include ::WebFetch::Validatable
6
+ def validate
7
+ error(:bad_json)
8
+ error(:missing_url)
9
+ error(:requests_empty, 'Hello there')
10
+ end
11
+ end
12
+
13
+ class ConcernedValid
14
+ include ::WebFetch::Validatable
15
+ def validate; end
16
+ end
17
+
18
+ class ConcernedNotOverridden
19
+ include ::WebFetch::Validatable
20
+ end
21
+
22
+ describe '#valid?' do
23
+ context 'invalid' do
24
+ subject { ConcernedInvalid.new }
25
+
26
+ it 'runs validations and provides errors including supplementary text' do
27
+ expect(subject.valid?).to be false
28
+ expect(subject.errors).to include I18n.t(:bad_json)
29
+ expect(subject.errors).to include I18n.t(:missing_url)
30
+ expect(subject.errors.last).to include 'Hello there'
31
+ end
32
+ end
33
+
34
+ context 'valid' do
35
+ subject { ConcernedValid.new }
36
+
37
+ it 'runs validations and provides (empty) errors' do
38
+ expect(subject.valid?).to be true
39
+ expect(subject.errors).to be_empty
40
+ end
41
+ end
42
+
43
+ context '#validate not overridden' do
44
+ subject { ConcernedNotOverridden.new }
45
+
46
+ it 'raises NotImplementedError when #valid? called' do
47
+ expect do
48
+ subject.valid?
49
+ end.to raise_error(NotImplementedError)
50
+ end
51
+ end
52
+ end
53
+ end
File without changes