doc_repo 0.1.1 → 1.0.0.pre.beta.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DocRepo
4
+ class GatewayError < Error
5
+ include HttpResult
6
+
7
+ def initialize(uri, code:, cause:)
8
+ init_result_readers(uri, code)
9
+ @cause = cause
10
+ message = case code
11
+ when NetHttpAdapter::BAD_GATEWAY
12
+ '502 "Bad Gateway"'
13
+ when NetHttpAdapter::GATEWAY_TIMEOUT
14
+ '504 "Gateway Timeout"'
15
+ else
16
+ name = if defined?(::Rack::Utils::HTTP_STATUS_CODES)
17
+ ::Rack::Utils::HTTP_STATUS_CODES[code.to_i]
18
+ else
19
+ "Unknown Error"
20
+ end
21
+ "#{code} #{name.dump}"
22
+ end
23
+ super(message)
24
+ end
25
+
26
+ # Wrap exception as normal
27
+ attr_reader :cause
28
+
29
+ def details
30
+ cause.message
31
+ end
32
+
33
+ def error?
34
+ true
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DocRepo
4
+ class HttpError < Error
5
+ include HttpResult
6
+
7
+ def initialize(uri, http_response)
8
+ @http = http_response
9
+ init_result_readers(uri, @http.code)
10
+ message = @http.code
11
+ message += ' ' + @http.message.dump if @http.message
12
+ super(message)
13
+ end
14
+
15
+ attr_reader :http
16
+ private :http
17
+
18
+ def details
19
+ # NOTE: The Github raw site does not respond with anything other than
20
+ # `text/plain` for general HTTP errors.
21
+ http.body
22
+ end
23
+
24
+ def not_found?
25
+ 404 == code
26
+ end
27
+
28
+ def error?
29
+ true
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DocRepo
4
+ module HttpResult
5
+ def init_result_readers(uri, code)
6
+ @uri = uri.to_s.freeze
7
+ @code = code.to_i
8
+ end
9
+ protected :init_result_readers
10
+
11
+ attr_reader :code, :uri
12
+
13
+ def error?
14
+ false
15
+ end
16
+
17
+ def not_found?
18
+ false
19
+ end
20
+
21
+ def redirect?
22
+ false
23
+ end
24
+
25
+ def success?
26
+ false
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+ require 'net/http'
3
+ require 'time'
4
+
5
+ module DocRepo
6
+ # @api private
7
+ class NetHttpAdapter
8
+ # Net::HTTP default timeouts of 60 seconds are too long for our purposes
9
+ DEFAULT_OPTS = {
10
+ open_timeout: 10,
11
+ read_timeout: 10,
12
+ ssl_timeout: 10,
13
+ }.freeze
14
+
15
+ # HTTP Status Codes
16
+ BAD_GATEWAY = 502
17
+ GATEWAY_TIMEOUT = 504
18
+ UNKNOWN_ERROR = 520
19
+
20
+ def initialize(host, cache: NullCache.instance, cache_options: {}, **opts)
21
+ @host = host.dup.freeze
22
+ @opts = DEFAULT_OPTS.dup.merge!(opts)
23
+ # Always force SSL
24
+ @opts[:use_ssl] = true
25
+ @opts.freeze
26
+ @cache = cache
27
+ @cache_options = cache_options.dup.freeze
28
+ end
29
+
30
+ attr_reader :host, :opts
31
+
32
+ attr_reader :cache, :cache_options
33
+ private :cache
34
+
35
+ def retrieve(uri)
36
+ resp = http_cache(uri)
37
+ case resp
38
+ when Net::HTTPRedirection
39
+ Redirect.new(
40
+ resp['Location'],
41
+ code: resp.code,
42
+ headers: resp.to_hash,
43
+ )
44
+ when Net::HTTPSuccess
45
+ Doc.new(uri, resp)
46
+ else
47
+ HttpError.new(uri, resp)
48
+ end
49
+ rescue Timeout::Error => timeout
50
+ # Covers Net::OpenTimeout, Net::ReadTimeout, etc.
51
+ GatewayError.new(uri, code: GATEWAY_TIMEOUT, cause: timeout)
52
+ rescue Net::HTTPBadResponse,
53
+ Net::ProtocolError,
54
+ OpenSSL::SSL::SSLError => protocol_error
55
+ # Docs state `Net::HTTPBadResponse` is raised when there is a protocol
56
+ # error. It's unclear whether all protocol errors are wrapped so we
57
+ # handle both here.
58
+ GatewayError.new(uri, code: BAD_GATEWAY, cause: protocol_error)
59
+ rescue => e
60
+ # Covers IOError, Errno::*, and SocketError
61
+ GatewayError.new(uri, code: UNKNOWN_ERROR, cause: e)
62
+ end
63
+
64
+ private
65
+
66
+ def cache_key(uri)
67
+ "#{host}:#{uri}"
68
+ end
69
+
70
+ def conditional_headers(expired)
71
+ # Origin servers are supposed to treat `If-None-Match` with higher
72
+ # precedences than `If-Modified-Since` according to the RFC:
73
+ #
74
+ # > A recipient cache or origin server MUST evaluate the request
75
+ # > preconditions defined by this specification in the following order:
76
+ # >
77
+ # > 1. When recipient is the origin server and If-Match is present,
78
+ # > evaluate the If-Match precondition:
79
+ # >
80
+ # > * if true, continue to step 3
81
+ # >
82
+ # > * if false, respond 412 (Precondition Failed) unless it can be
83
+ # > determined that the state-changing request has already
84
+ # > succeeded (see Section 3.1)
85
+ # >
86
+ # > 2. When recipient is the origin server, If-Match is not present, and
87
+ # > If-Unmodified-Since is present, evaluate the If-Unmodified-Since
88
+ # > precondition:
89
+ # >
90
+ # > * if true, continue to step 3
91
+ # >
92
+ # > * if false, respond 412 (Precondition Failed) unless it can be
93
+ # > determined that the state-changing request has already
94
+ # > succeeded (see Section 3.4)
95
+ # >
96
+ # > 3. When If-None-Match is present, evaluate the If-None-Match
97
+ # > precondition:
98
+ # >
99
+ # > * if true, continue to step 5
100
+ # >
101
+ # > * if false for GET/HEAD, respond 304 (Not Modified)
102
+ # >
103
+ # > * if false for other methods, respond 412 (Precondition Failed)
104
+ # >
105
+ # > 4. When the method is GET or HEAD, If-None-Match is not present, and
106
+ # > If-Modified-Since is present, evaluate the If-Modified-Since
107
+ # > precondition:
108
+ # >
109
+ # > * if true, continue to step 5
110
+ # >
111
+ # > * if false, respond 304 (Not Modified)
112
+ # >
113
+ # > -- https://tools.ietf.org/html/rfc7232#section-6
114
+ #
115
+ # This allows clients, and caches, some flexibility in how they generate
116
+ # the `If-Modified-Since` header:
117
+ #
118
+ # > When used for cache updates, a cache will typically use the value of
119
+ # > the cached message's Last-Modified field to generate the field value
120
+ # > of If-Modified-Since. This behavior is most interoperable for cases
121
+ # > where clocks are poorly synchronized or when the server has chosen to
122
+ # > only honor exact timestamp matches (due to a problem with
123
+ # > Last-Modified dates that appear to go "back in time" when the origin
124
+ # > server's clock is corrected or a representation is restored from an
125
+ # > archived backup). However, caches occasionally generate the field
126
+ # > value based on other data, such as the Date header field of the
127
+ # > cached message or the local clock time that the message was received,
128
+ # > particularly when the cached message does not contain a Last-Modified
129
+ # > field.
130
+ # >
131
+ # > -- https://tools.ietf.org/html/rfc7232#section-3.3
132
+ #
133
+ # However, the Github raw content server (GRC) does not respect this.
134
+ # This may be due to the fact that the GRC does not send a
135
+ # `Last-Modified` header in replies. If we take that into account this
136
+ # behavior _may_ make sense if we assume the GRC is following the now
137
+ # obsolete HTTP/1.1 RFC 2616:
138
+ #
139
+ # > An HTTP/1.1 origin server, upon receiving a conditional request that
140
+ # > includes both a Last-Modified date (e.g., in an If-Modified-Since or
141
+ # > If-Unmodified-Since header field) and one or more entity tags (e.g.,
142
+ # > in an If-Match, If-None-Match, or If-Range header field) as cache
143
+ # > validators, MUST NOT return a response status of 304 (Not Modified)
144
+ # > unless doing so is consistent with all of the conditional header
145
+ # > fields in the request.
146
+ # >
147
+ # > -- https://tools.ietf.org/html/rfc2616#section-13.3.4
148
+ #
149
+ # So to actually receive `304 Not Modified` replies from GRC, but also
150
+ # try to be compatible with more current servers, this only sets
151
+ # `If-Modified-Since` based on the `Last-Modified` value (i.e. we no
152
+ # longer fall back to the `Date` value).
153
+ preconditions = {
154
+ "If-None-Match" => expired["ETag"],
155
+ "If-Modified-Since" => expired["Last-Modified"],
156
+ }
157
+ preconditions.compact!
158
+ preconditions
159
+ end
160
+
161
+ def expired?(resp)
162
+ # TODO: Use `Cache-Control` header when available
163
+ expires_at = resp['Expires']
164
+ expires_at && Time.httpdate(expires_at) < Time.now
165
+ rescue ArgumentError => _e
166
+ # Raised when `Time.parse` cannot parse the value
167
+ #
168
+ # Per the HTTP 1.1 RFC regarding the `Expires` header:
169
+ #
170
+ # > A cache recipient MUST interpret invalid date formats, especially the
171
+ # > value "0", as representing a time in the past (i.e., "already
172
+ # > expired").
173
+ # >
174
+ # > -- https://tools.ietf.org/html/rfc7234#section-5.3
175
+ true
176
+ end
177
+
178
+ def http_cache(uri)
179
+ uri_key = cache_key(uri)
180
+ resp = cache.fetch(uri_key, cache_options) {
181
+ Net::HTTP.start(host, opts) { |http| http.get(uri) }
182
+ }
183
+ if expired?(resp)
184
+ resp = refresh(uri, resp)
185
+ cache.write uri_key, resp, cache_options
186
+ end
187
+ resp
188
+ end
189
+
190
+ def refresh(uri, expired)
191
+ fresh = Net::HTTP.start(host, opts) { |http|
192
+ http.get(uri, conditional_headers(expired))
193
+ }
194
+ if Net::HTTPNotModified === fresh
195
+ fresh.each_header do |k, v|
196
+ expired[k] = v
197
+ end
198
+ fresh = expired
199
+ end
200
+ fresh
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Monkey patches to work with-in Rails conventions
4
+ module DocRepo
5
+ module Rails
6
+ # Prior to ActiveSupport 5.2 it is assumed the `cache_key` value contains
7
+ # version information.
8
+ module VersionedCacheKey
9
+ def cache_key
10
+ "#{super}-#{cache_version}"
11
+ end
12
+ alias_method :cache_key_with_version, :cache_key
13
+ end
14
+ end
15
+ end
16
+
17
+ DocRepo::Doc.class_exec do
18
+ include DocRepo::Rails::VersionedCacheKey
19
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Monkey patches to work with-in Rails conventions
4
+ module DocRepo
5
+ module Rails
6
+ module Modelish
7
+ # For some reason Rails _only_ calls `to_text` for the following:
8
+ #
9
+ # render html: doc
10
+ # render body: doc
11
+ # render plain: doc
12
+ #
13
+ # There's no way for us to know which of these is being called so we
14
+ # can't conditionally provide the raw markdown for `plain`. And without
15
+ # this `to_s` will be called then HTML escaped:
16
+ #
17
+ # "#&lt;DocRepo::Doc:0x007fabefe8c360&gt;"
18
+ def to_text
19
+ to_html.html_safe
20
+ end
21
+
22
+ def updated_at
23
+ last_modified
24
+ end
25
+ end
26
+ end
27
+ end
28
+
29
+ # Prior to ActiveSupport 5.2 it is assumed the `cache_key` value contains
30
+ # version information.
31
+ if Rails.gem_version < Gem::Version.new("5.2.0")
32
+ require_relative 'rails/legacy_versioned_cache'
33
+ end
34
+
35
+ DocRepo::Doc.class_exec do
36
+ include DocRepo::Rails::Modelish
37
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DocRepo
4
+ class Redirect
5
+ include HttpResult
6
+
7
+ def initialize(url, code: 302, headers: {})
8
+ init_result_readers(url, code)
9
+ @headers = headers.freeze
10
+ end
11
+
12
+ alias_method :url, :uri
13
+ alias_method :location, :url
14
+
15
+ def redirect?
16
+ true
17
+ end
18
+ end
19
+ end
@@ -1,31 +1,83 @@
1
+ # frozen_string_literal: true
2
+ require 'forwardable'
3
+
1
4
  module DocRepo
2
5
  class Repository
3
- REDIRECT_FORMATS = %w[
4
- .jpg
5
- .png
6
- .jpeg
7
- .svg
8
- .css
9
- .txt
10
- ]
11
-
12
- def respond(slug, &block)
13
- if REDIRECT_FORMATS.include?(File.extname(slug).downcase)
14
- yield DocRepo::Response.redirect(get_redirect_url(slug))
15
- else
16
- yield DocRepo::Response.html(render_page(slug))
17
- end
6
+ extend Forwardable
7
+
8
+ def initialize(config, http_adapter: nil)
9
+ @config = config.dup.freeze
10
+ @http = http_adapter
11
+ @http ||= NetHttpAdapter.new(
12
+ GITHUB_HOST,
13
+ cache: cache_store,
14
+ cache_options: cache_options,
15
+ )
16
+ end
17
+
18
+ attr_reader :config
19
+ def_delegators :config, :branch, :doc_root, :fallback_ext, :org, :repo
20
+
21
+ def request(slug, result_handler: ResultHandler.new)
22
+ yield result_handler
23
+ result = detect(uri_for(slug))
24
+ action = handler_for(result, result_handler)
25
+ action.call result
26
+ end
27
+
28
+ def uri_for(slug)
29
+ "/#{org}/#{repo}/#{branch}/#{doc_root}/#{ensure_ext(slug)}".squeeze("/")
18
30
  end
19
31
 
20
32
  private
21
33
 
22
- def render_page(slug)
23
- DocRepo::Page.new(slug).to_html
34
+ GITHUB_HOST = "raw.githubusercontent.com"
35
+
36
+ attr_reader :http
37
+ def_delegators :config, :doc_formats, :cache_store, :cache_options
38
+
39
+ def redirect_type?(ext)
40
+ !doc_formats.include?(ext)
24
41
  end
25
42
 
26
- def get_redirect_url(slug)
27
- GithubFile.new(slug).redirect_url
43
+ def detect(uri)
44
+ if redirect_type?(File.extname(uri))
45
+ Redirect.new("https://#{GITHUB_HOST}#{uri}")
46
+ else
47
+ http.retrieve(uri)
48
+ end
49
+ end
50
+
51
+ def ensure_ext(slug)
52
+ if File.extname(slug).empty?
53
+ "#{slug}#{fallback_ext}"
54
+ else
55
+ slug
56
+ end
57
+ end
58
+
59
+ def handler_for(result, result_handler)
60
+ case
61
+ when result.redirect?
62
+ result_handler.fetch(:redirect) {
63
+ raise UnhandledAction.new(:redirect, <<~MSG.chomp)
64
+ no result redirect handler defined for #{result_handler.inspect}
65
+ MSG
66
+ }
67
+ when result.success?
68
+ result_handler.fetch(:complete) {
69
+ raise UnhandledAction.new(:complete, <<~MSG.chomp)
70
+ no result completion handler defined for #{result_handler.inspect}
71
+ MSG
72
+ }
73
+ when result.not_found?
74
+ result_handler.fetch(:not_found) {
75
+ result_handler.fetch(:error) { raise result }
76
+ }
77
+ else
78
+ # TODO: Are we missing other cases?
79
+ result_handler.fetch(:error) { raise result }
80
+ end
28
81
  end
29
82
  end
30
83
  end
31
-
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+ require 'forwardable'
3
+
4
+ module DocRepo
5
+ class ResultHandler
6
+ extend Forwardable
7
+
8
+ def self.handler(*types)
9
+ types.each do |type|
10
+ define_method(type) do |&block|
11
+ raise ArgumentError, "Result handler block required" unless block
12
+ @actions[type] = block
13
+ end
14
+ end
15
+ end
16
+ private_class_method :handler
17
+
18
+ handler :complete, :error, :not_found, :redirect
19
+
20
+ def initialize
21
+ @actions = {}
22
+ yield self if block_given?
23
+ end
24
+
25
+ def_delegators :actions, :[], :each, :fetch
26
+
27
+ attr_reader :actions
28
+ private :actions
29
+ end
30
+ end
@@ -1,3 +1,3 @@
1
1
  module DocRepo
2
- VERSION = "0.1.1"
2
+ VERSION = "1.0.0-beta.1"
3
3
  end
data/lib/doc_repo.rb CHANGED
@@ -1,23 +1,27 @@
1
+ # frozen_string_literal: true
1
2
  require "doc_repo/version"
2
3
 
3
4
  module DocRepo
4
- autoload :Configuration, "doc_repo/configuration"
5
- autoload :GithubFile, "doc_repo/github_file"
6
- autoload :Page, "doc_repo/page"
5
+ require_relative "doc_repo/configuration"
6
+ require_relative "doc_repo/error"
7
+
8
+ # HTTP Adapter and Results
9
+ autoload :NetHttpAdapter, "doc_repo/net_http_adapter"
10
+ autoload :HttpResult, "doc_repo/http_result"
11
+ autoload :Doc, "doc_repo/doc"
12
+ autoload :Redirect, "doc_repo/redirect"
13
+ autoload :HttpError, "doc_repo/http_error"
14
+ autoload :GatewayError, "doc_repo/gateway_error"
15
+
7
16
  autoload :Repository, "doc_repo/repository"
8
- autoload :Response, "doc_repo/response"
9
-
10
- BadPageFormat = Class.new(StandardError)
11
- class NotFound < StandardError
12
- attr_reader :base
13
- def initialize(*args, base: $!)
14
- @base = base
15
- super(*args)
16
- end
17
+ autoload :ResultHandler, "doc_repo/result_handler"
18
+
19
+ if defined?(Rails)
20
+ require_relative "doc_repo/rails"
17
21
  end
18
22
 
19
23
  class << self
20
- attr_reader :configuration
24
+ attr_writer :configuration
21
25
 
22
26
  def configuration
23
27
  @configuration ||= Configuration.new
@@ -25,11 +29,10 @@ module DocRepo
25
29
  end
26
30
 
27
31
  def self.configure
28
- yield(configuration) if block_given?
32
+ yield(configuration)
29
33
  end
30
34
 
31
- def self.respond_with(slug, &block)
32
- Repository.new.respond(slug, &block)
35
+ def self.request(slug, &block)
36
+ Repository.new(configuration).request(slug, &block)
33
37
  end
34
38
  end
35
-