web_fetch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +3 -0
  4. data/.rubocop.yml +10 -0
  5. data/.ruby-version +1 -0
  6. data/Gemfile +5 -0
  7. data/Gemfile.lock +120 -0
  8. data/LICENSE +7 -0
  9. data/README.md +149 -0
  10. data/TODO +0 -0
  11. data/bin/rspec +29 -0
  12. data/bin/rubocop +29 -0
  13. data/bin/web_fetch_control +6 -0
  14. data/bin/web_fetch_server +30 -0
  15. data/config/locales/en.yml +12 -0
  16. data/doc/client_example.rb +19 -0
  17. data/doc/web_fetch_architecture.png +0 -0
  18. data/lib/web_fetch/client.rb +101 -0
  19. data/lib/web_fetch/concerns/http_helpers.rb +64 -0
  20. data/lib/web_fetch/concerns/validatable.rb +31 -0
  21. data/lib/web_fetch/event_machine_helpers.rb +36 -0
  22. data/lib/web_fetch/gatherer.rb +62 -0
  23. data/lib/web_fetch/helpers.rb +11 -0
  24. data/lib/web_fetch/http_helpers.rb +71 -0
  25. data/lib/web_fetch/logger.rb +29 -0
  26. data/lib/web_fetch/resources.rb +59 -0
  27. data/lib/web_fetch/retriever.rb +39 -0
  28. data/lib/web_fetch/router.rb +71 -0
  29. data/lib/web_fetch/server.rb +49 -0
  30. data/lib/web_fetch/storage.rb +16 -0
  31. data/lib/web_fetch/version.rb +5 -0
  32. data/lib/web_fetch.rb +40 -0
  33. data/spec/client_spec.rb +63 -0
  34. data/spec/concerns/validatable_spec.rb +53 -0
  35. data/spec/features/http_fetching_spec.rb +0 -0
  36. data/spec/gatherer_spec.rb +109 -0
  37. data/spec/helpers_spec.rb +18 -0
  38. data/spec/i18n_spec.rb +8 -0
  39. data/spec/resources_spec.rb +42 -0
  40. data/spec/retriever_spec.rb +68 -0
  41. data/spec/router_spec.rb +43 -0
  42. data/spec/server_spec.rb +96 -0
  43. data/spec/spec_helper.rb +55 -0
  44. data/spec/storage_spec.rb +24 -0
  45. data/swagger.yaml +115 -0
  46. data/web_fetch.gemspec +41 -0
  47. metadata +314 -0
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # EventMachine layer-specific helpers
5
+ module EventMachineHelpers
6
+ def wait_for_response(deferred, response)
7
+ deferred[:http].callback do
8
+ Logger.debug("HTTP fetch complete for uid: #{deferred[:uid]}")
9
+ deferred[:succeeded] = true
10
+ end
11
+
12
+ deferred[:http].errback do
13
+ Logger.debug("HTTP fetch failed for uid: #{deferred[:uid]}")
14
+ deferred[:failed] = true
15
+ end
16
+
17
+ tick_loop(deferred, response)
18
+ end
19
+
20
+ def tick_loop(deferred, response)
21
+ # XXX There may be a much nicer way to wait for an async task to complete
22
+ # before returning a response but I couldn't figure it out, so I used
23
+ # EM.tick_loop which effectively does the same as a Twisted deferred
24
+ # callback chain, just much more explicitly.
25
+ EM.tick_loop do
26
+ if deferred[:succeeded]
27
+ succeed(deferred, response)
28
+ :stop
29
+ elsif deferred[:failed]
30
+ fail_(deferred, response)
31
+ :stop
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Handles requests to gather URLs and delegates to the EventMachine web
5
+ # server
6
+ class Gatherer
7
+ include Validatable
8
+
9
+ HASHABLE_KEYS = %i[url query_string headers method].freeze
10
+
11
+ def initialize(server, params)
12
+ @requests = params[:requests]
13
+ @server = server
14
+ end
15
+
16
+ def start
17
+ tagged = { requests: tag_requests }
18
+ @server.gather(tagged[:requests])
19
+ tagged
20
+ end
21
+
22
+ private
23
+
24
+ def validate
25
+ error(:requests_missing) if requests_missing?
26
+ error(:requests_not_array) if requests_not_array?
27
+ error(:requests_empty) if requests_empty?
28
+ error(:missing_url) if missing_url?
29
+ end
30
+
31
+ def requests_missing?
32
+ @requests.nil?
33
+ end
34
+
35
+ def requests_not_array?
36
+ !@requests.nil? && !@requests.is_a?(Array)
37
+ end
38
+
39
+ def requests_empty?
40
+ @requests.is_a?(Array) && @requests.length.zero?
41
+ end
42
+
43
+ def missing_url?
44
+ @requests.is_a?(Array) && @requests.any? { |req| req[:url].nil? }
45
+ end
46
+
47
+ def tag_requests
48
+ @requests.map do |request|
49
+ { request: request, hash: hash(request), uid: uid }
50
+ end
51
+ end
52
+
53
+ def hash(obj)
54
+ string = JSON.dump(obj.select { |key| HASHABLE_KEYS.include?(key) })
55
+ Digest.hexencode(Digest::SHA1.new.digest(string))
56
+ end
57
+
58
+ def uid
59
+ SecureRandom.uuid
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Shared code used throughout the application
5
+ module Helpers
6
+ def symbolize(obj)
7
+ # >:)
8
+ JSON.parse(JSON.dump(obj), symbolize_names: true)
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Convenience methods for WebFetch HTTP layer
5
+ module HTTPHelpers
6
+ def respond_immediately(result, response)
7
+ response.status = result[:status]
8
+ response.content = compress(result[:payload].to_json)
9
+ response.send_response
10
+ end
11
+
12
+ def compress(string)
13
+ ActiveSupport::Gzip.compress(string)
14
+ end
15
+
16
+ def default_headers(response)
17
+ response.headers['Content-Type'] = 'application/json; charset=utf-8'
18
+ response.headers['Cache-Control'] = 'max-age=0, private, must-revalidate'
19
+ response.headers['Content-Encoding'] = 'gzip'
20
+ response.headers['Vary'] = 'Accept-Encoding'
21
+ end
22
+
23
+ def request_params
24
+ { method: @http_request_method,
25
+ query_string: @http_query_string,
26
+ post_data: post_data,
27
+ server: self }
28
+ end
29
+
30
+ def post_data
31
+ return nil unless @http_post_content
32
+
33
+ JSON.parse(@http_post_content, symbolize_names: true)
34
+ end
35
+
36
+ def succeed(deferred, response)
37
+ response.status = 200
38
+ response.content = compress(JSON.dump(success(deferred)))
39
+ response.send_response
40
+ end
41
+
42
+ def success(deferred)
43
+ result = deferred[:http]
44
+ { response: {
45
+ success: true,
46
+ body: result.response,
47
+ headers: result.headers,
48
+ status: result.response_header.status
49
+ },
50
+ uid: deferred[:uid] }
51
+ end
52
+
53
+ def fail_(deferred, response)
54
+ response.status = 200
55
+ response.content = compress(JSON.dump(failure(deferred)))
56
+ response.send_response
57
+ end
58
+
59
+ def failure(deferred)
60
+ result = deferred[:http]
61
+ { response: {
62
+ success: false,
63
+ body: result.response,
64
+ headers: result.headers,
65
+ status: result.response_header.status,
66
+ error: (result.error&.inspect)
67
+ },
68
+ uid: deferred[:uid] }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+
5
+ module WebFetch
6
+ # EventMachine-friendly Logger
7
+ class Logger
8
+ extend SingleForwardable
9
+
10
+ def self.logger(path)
11
+ @logger ||= EM::Logger.new(::Logger.new(log_file(path)))
12
+ end
13
+
14
+ def_delegators :@logger, :debug, :info, :warn, :error, :fatal
15
+
16
+ class << self
17
+ private
18
+
19
+ def log_file(path)
20
+ return STDOUT if STDOUT.isatty && path.nil?
21
+ return File.open(File::NULL, 'w') if path.nil?
22
+
23
+ log = File.open(path, 'a')
24
+ log.sync = true # Prevent buffering
25
+ log
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Glue between the router and the guts of the application; calls the relevant
5
+ # code and builds responses
6
+ class Resources
7
+ class << self
8
+ def root(_server, _params)
9
+ { status: status(:ok), payload: { application: 'WebFetch' } }
10
+ end
11
+
12
+ def gather(server, params)
13
+ gatherer = Gatherer.new(server, params)
14
+ if gatherer.valid?
15
+ { status: status(:ok), payload: gatherer.start }
16
+ else
17
+ { status: status(:unprocessable),
18
+ payload: { error: gatherer.errors } }
19
+ end
20
+ end
21
+
22
+ def retrieve(server, params)
23
+ retriever = Retriever.new(server, params)
24
+ unless retriever.valid?
25
+ return { status: status(:unprocessable),
26
+ payload: { error: retriever.errors } }
27
+ end
28
+ defer_if_found(retriever)
29
+ end
30
+
31
+ private
32
+
33
+ def status(name)
34
+ {
35
+ ok: 200,
36
+ unprocessable: 422,
37
+ not_found: 404
38
+ }.fetch(name)
39
+ end
40
+
41
+ def not_found(retriever)
42
+ {
43
+ status: status(:not_found),
44
+ payload: { error: retriever.not_found_error }
45
+ }
46
+ end
47
+
48
+ def defer_if_found(retriever)
49
+ found = retriever.find
50
+ if found.nil?
51
+ { status: status(:not_found),
52
+ payload: { error: retriever.not_found_error } }
53
+ else
54
+ { deferred: found }
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Retrieves a gathered HTTP request
5
+ class Retriever
6
+ include Validatable
7
+
8
+ attr_reader :not_found_error
9
+
10
+ def initialize(server, params)
11
+ @uid = params[:uid]
12
+ @hash = params[:hash]
13
+ @server = server
14
+ end
15
+
16
+ def find
17
+ stored = @server.storage.fetch(@uid)
18
+ return not_found if stored.nil?
19
+
20
+ stored
21
+ end
22
+
23
+ private
24
+
25
+ def validate
26
+ error(:hash_or_uid_but_not_both) if !@uid.nil? && !@hash.nil?
27
+ error(:missing_hash_and_uid) if @uid.nil? && @hash.nil?
28
+ end
29
+
30
+ def not_found
31
+ @not_found_error = if !@uid.nil?
32
+ I18n.t(:uid_not_found)
33
+ elsif !@hash.nil?
34
+ I18n.t(:hash_not_found)
35
+ end
36
+ nil
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'hanami/router'
4
+ require 'rack/utils'
5
+
6
+ module WebFetch
7
+ # Dispatches requests to correct resource
8
+ class Router
9
+ include Helpers
10
+
11
+ def initialize
12
+ @router = setup
13
+ end
14
+
15
+ def route(url, options = {})
16
+ @server = options.delete(:server)
17
+ options = { query_string: nil, method: 'GET' }.merge(options)
18
+ method = options[:method].downcase.to_sym
19
+ Logger.info("#{url}: #{options}")
20
+ begin
21
+ params = build_params(options)
22
+ rescue JSON::ParserError
23
+ return { status: 400, payload: I18n.t(:bad_json) }
24
+ end
25
+ @router.recognize(url, method: method).call(params)
26
+ end
27
+
28
+ private
29
+
30
+ # rubocop:disable Metrics/MethodLength
31
+ def setup
32
+ resource_finder = lambda do |name, env|
33
+ Resources.public_send(name, @server, env)
34
+ end
35
+
36
+ Hanami::Router.new do
37
+ get '/', to: lambda { |params|
38
+ resource_finder.call(:root, params)
39
+ }
40
+
41
+ post '/gather', to: lambda { |params|
42
+ resource_finder.call(:gather, params)
43
+ }
44
+
45
+ get '/retrieve', to: lambda { |params|
46
+ resource_finder.call(:retrieve, params)
47
+ }
48
+ end
49
+ end
50
+ # rubocop:enable Metrics/MethodLength
51
+
52
+ def build_params(options)
53
+ params = Rack::Utils.parse_nested_query(options[:query_string])
54
+ merge_json!(params)
55
+ params = symbolize(params)
56
+ params.merge!(options[:post_data] || {})
57
+ params
58
+ end
59
+
60
+ def merge_json(params)
61
+ params.merge(
62
+ JSON.parse(params.delete('json') || '{}',
63
+ symbolize_names: true)
64
+ )
65
+ end
66
+
67
+ def merge_json!(params)
68
+ params.merge!(merge_json(params))
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Web server that accepts requests to gather and retrieve external HTTP
5
+ # requests
6
+ class Server < EM::Connection
7
+ attr_reader :storage
8
+
9
+ include EM::HttpServer
10
+ include HTTPHelpers
11
+ include EventMachineHelpers
12
+
13
+ def post_init
14
+ super
15
+ @router = Router.new
16
+ @storage = Storage
17
+ no_environment_strings
18
+ end
19
+
20
+ def process_http_request
21
+ result = @router.route(@http_request_uri, request_params)
22
+ response = EM::DelegatedHttpResponse.new(self)
23
+
24
+ default_headers(response)
25
+
26
+ if result[:deferred].nil?
27
+ respond_immediately(result, response)
28
+ else
29
+ wait_for_response(result[:deferred], response)
30
+ end
31
+ end
32
+
33
+ # Note that #gather is called by WebFetch itself to asynchronously gather
34
+ # the required HTTP objects. All public API requests go via
35
+ # #process_http_request and subsequently WebFetch::Router#route
36
+ def gather(targets)
37
+ targets.each do |target|
38
+ request = target[:request]
39
+ async_request = EM::HttpRequest.new(request[:url])
40
+ method = request.fetch(:method, 'GET').downcase.to_sym
41
+ http = async_request.public_send(method,
42
+ head: request[:headers],
43
+ query: request.fetch(:query, {}),
44
+ body: request.fetch(:body, nil))
45
+ @storage.store(target[:uid], uid: target[:uid], http: http)
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ # Rudimentary global storage for responses
5
+ class Storage
6
+ @_storage = {}
7
+
8
+ def self.store(key, obj)
9
+ @_storage[key] = obj
10
+ end
11
+
12
+ def self.fetch(key)
13
+ @_storage.delete(key)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WebFetch
4
+ VERSION = '0.1.0'
5
+ end
data/lib/web_fetch.rb ADDED
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'eventmachine'
4
+ require 'evma_httpserver'
5
+ require 'em-http'
6
+ require 'em-logger'
7
+ require 'i18n'
8
+ require 'logger'
9
+ require 'json'
10
+ require 'digest'
11
+ require 'securerandom'
12
+ require 'faraday'
13
+ require 'childprocess'
14
+ require 'active_support/gzip'
15
+
16
+ locales_path = File.expand_path('../config/locales/*.yml', __dir__)
17
+
18
+ I18n.load_path += Dir[locales_path]
19
+
20
+ # Avoid i18n conflicts when using as a gem in a Rails application
21
+ unless Gem.loaded_specs.key?('rails')
22
+ I18n.load_path += Dir[locales_path]
23
+ I18n.backend.load_translations
24
+ I18n.config.available_locales = :en
25
+ end
26
+
27
+ require 'web_fetch/logger'
28
+ require 'web_fetch/helpers'
29
+ require 'web_fetch/event_machine_helpers'
30
+ require 'web_fetch/http_helpers'
31
+ require 'web_fetch/concerns/validatable'
32
+ require 'web_fetch/concerns/http_helpers'
33
+ require 'web_fetch/storage'
34
+ require 'web_fetch/server'
35
+ require 'web_fetch/router'
36
+ require 'web_fetch/resources'
37
+ require 'web_fetch/gatherer'
38
+ require 'web_fetch/retriever'
39
+ require 'web_fetch/client'
40
+ require 'web_fetch/version'
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe WebFetch::Client do
4
+ let(:client) { described_class.new('localhost', 8089, log: File::NULL) }
5
+
6
+ before(:each) do
7
+ stub_request(:any, 'http://blah.blah/success')
8
+ .to_return(body: 'hello, everybody')
9
+ end
10
+
11
+ it 'can be instantiated with host and port params' do
12
+ client
13
+ end
14
+
15
+ describe '#alive?' do
16
+ it 'confirms server is alive and accepting requests' do
17
+ expect(client.alive?).to be true
18
+ end
19
+ end
20
+
21
+ describe '#gather' do
22
+ it 'makes `gather` requests to a running server' do
23
+ result = client.gather([{ url: 'http://blah.blah/success' }])
24
+ expect(result.first[:uid]).to_not be_nil
25
+ end
26
+ end
27
+
28
+ describe '#retrieve_by_uid' do
29
+ it 'retrieves a gathered item' do
30
+ result = client.gather([{ url: 'http://blah.blah/success' }])
31
+ uid = result.first[:uid]
32
+
33
+ retrieved = client.retrieve_by_uid(uid)
34
+ expect(retrieved[:response][:status]).to eql 200
35
+ expect(retrieved[:response][:body]).to eql 'hello, everybody'
36
+ expect(retrieved[:uid]).to eql uid
37
+ end
38
+
39
+ it 'returns nil for non-requested items' do
40
+ client.gather([{ url: 'http://blah.blah/success' }])
41
+
42
+ retrieved = client.retrieve_by_uid('lalalala')
43
+ expect(retrieved).to be_nil
44
+ end
45
+ end
46
+
47
+ describe '#create' do
48
+ it 'spawns a server and returns a client able to connect' do
49
+ client = described_class.create('localhost', 8077, log: File::NULL)
50
+ expect(client.alive?).to be true
51
+ client.stop
52
+ end
53
+ end
54
+
55
+ describe '#stop' do
56
+ it 'can spawn a server and stop the process when needed' do
57
+ client = described_class.create('localhost', 8077, log: File::NULL)
58
+ expect(client.alive?).to be true
59
+ client.stop
60
+ expect(client.alive?).to be false
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe WebFetch::Validatable do
4
+ class ConcernedInvalid
5
+ include ::WebFetch::Validatable
6
+ def validate
7
+ error(:bad_json)
8
+ error(:missing_url)
9
+ error(:requests_empty, 'Hello there')
10
+ end
11
+ end
12
+
13
+ class ConcernedValid
14
+ include ::WebFetch::Validatable
15
+ def validate; end
16
+ end
17
+
18
+ class ConcernedNotOverridden
19
+ include ::WebFetch::Validatable
20
+ end
21
+
22
+ describe '#valid?' do
23
+ context 'invalid' do
24
+ subject { ConcernedInvalid.new }
25
+
26
+ it 'runs validations and provides errors including supplementary text' do
27
+ expect(subject.valid?).to be false
28
+ expect(subject.errors).to include I18n.t(:bad_json)
29
+ expect(subject.errors).to include I18n.t(:missing_url)
30
+ expect(subject.errors.last).to include 'Hello there'
31
+ end
32
+ end
33
+
34
+ context 'valid' do
35
+ subject { ConcernedValid.new }
36
+
37
+ it 'runs validations and provides (empty) errors' do
38
+ expect(subject.valid?).to be true
39
+ expect(subject.errors).to be_empty
40
+ end
41
+ end
42
+
43
+ context '#validate not overridden' do
44
+ subject { ConcernedNotOverridden.new }
45
+
46
+ it 'raises NotImplementedError when #valid? called' do
47
+ expect do
48
+ subject.valid?
49
+ end.to raise_error(NotImplementedError)
50
+ end
51
+ end
52
+ end
53
+ end
File without changes