kataba 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/kataba/fetcher.rb +88 -0
- data/lib/kataba.rb +14 -15
- metadata +14 -13
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1849ebd6214469739d5a16981525a9c5ab23d0ca7ddb91ad028064a43e63dad0
|
|
4
|
+
data.tar.gz: 46da3306e49890c4801362c7c5af2c8e7cbf68cf2d3a8b9604e4d69f9d3f7887
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7d26265f3e4b30fbc11c82f12bec40860b56f3910d8a7aefccc9150bf6cd24d61a543da9fb33eb11111a8410386e1c632df6c02f56e2f91a8681515e4f2e5548
|
|
7
|
+
data.tar.gz: e35b99af877e7b660838d9b58943806c35084861c8295ea93e4a456d66e20943fb987261ee5adf795a0c858c26fa8a7c4cd41212d9012d26d5ba14cab80d51bc
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
require 'net/http'
|
|
2
|
+
require 'uri'
|
|
3
|
+
|
|
4
|
+
module Kataba
|
|
5
|
+
# Fetches a schema body, recovering from LoC-shaped delivery quirks
|
|
6
|
+
# that a verbatim URI.open would surface as cache-poisoning errors:
|
|
7
|
+
#
|
|
8
|
+
# - 5xx (Cloudflare bot-management 503/529, origin overload):
|
|
9
|
+
# retry once on the alternate scheme.
|
|
10
|
+
# - same-origin HTTPS->HTTP 3xx: follow. open-uri refuses all
|
|
11
|
+
# scheme downgrades; we relax to same-origin because the
|
|
12
|
+
# consumer already trusted this host by putting it in their
|
|
13
|
+
# schemaLocation. Cross-origin downgrades stay refused — that's
|
|
14
|
+
# the actual DNS-redirect attack vector.
|
|
15
|
+
#
|
|
16
|
+
# mirror_list remains the consumer's backstop for URI-identity
|
|
17
|
+
# changes (path renames, host moves) that no delivery heuristic
|
|
18
|
+
# can rescue.
|
|
19
|
+
class Fetcher
|
|
20
|
+
MAX_REDIRECTS = 5
|
|
21
|
+
|
|
22
|
+
class FetchError < StandardError; end
|
|
23
|
+
|
|
24
|
+
def initialize(uri)
|
|
25
|
+
@original_uri = uri
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def fetch
|
|
29
|
+
attempt(@original_uri, alt_scheme_retry: true)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
private
|
|
33
|
+
|
|
34
|
+
def attempt(uri, alt_scheme_retry:)
|
|
35
|
+
response = request_with_redirects(uri, redirect_depth: 0)
|
|
36
|
+
|
|
37
|
+
case response
|
|
38
|
+
when Net::HTTPSuccess
|
|
39
|
+
response.body
|
|
40
|
+
when Net::HTTPServerError
|
|
41
|
+
if alt_scheme_retry && (alt = swap_scheme(uri))
|
|
42
|
+
attempt(alt, alt_scheme_retry: false)
|
|
43
|
+
else
|
|
44
|
+
raise FetchError, "#{response.code} #{response.message} fetching #{uri}"
|
|
45
|
+
end
|
|
46
|
+
else
|
|
47
|
+
raise FetchError, "#{response.code} #{response.message} fetching #{uri}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def request_with_redirects(uri, redirect_depth:)
|
|
52
|
+
raise FetchError, "too many redirects fetching #{@original_uri}" if redirect_depth > MAX_REDIRECTS
|
|
53
|
+
|
|
54
|
+
parsed = URI.parse(uri)
|
|
55
|
+
http = Net::HTTP.new(parsed.host, parsed.port)
|
|
56
|
+
http.use_ssl = (parsed.scheme == 'https')
|
|
57
|
+
response = http.get(parsed.request_uri)
|
|
58
|
+
|
|
59
|
+
if response.is_a?(Net::HTTPRedirection)
|
|
60
|
+
target = resolve_redirect(parsed, response['location'])
|
|
61
|
+
request_with_redirects(target, redirect_depth: redirect_depth + 1)
|
|
62
|
+
else
|
|
63
|
+
response
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def resolve_redirect(from, location)
|
|
68
|
+
raise FetchError, "redirect with no Location header from #{from}" if location.nil? || location.empty?
|
|
69
|
+
|
|
70
|
+
target = URI.parse(location)
|
|
71
|
+
target = from + target if target.relative?
|
|
72
|
+
|
|
73
|
+
if from.scheme == 'https' && target.scheme == 'http' && from.host != target.host
|
|
74
|
+
raise FetchError, "cross-origin HTTPS->HTTP redirect refused: #{from} -> #{target}"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
target.to_s
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def swap_scheme(uri)
|
|
81
|
+
if uri.start_with?('https://')
|
|
82
|
+
uri.sub(/\Ahttps:/, 'http:')
|
|
83
|
+
elsif uri.start_with?('http://')
|
|
84
|
+
uri.sub(/\Ahttp:/, 'https:')
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
data/lib/kataba.rb
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
require 'nokogiri'
|
|
2
2
|
require 'tmpdir'
|
|
3
3
|
require 'digest/md5'
|
|
4
|
-
require 'open-uri'
|
|
5
4
|
require 'fileutils'
|
|
6
5
|
require 'yaml'
|
|
6
|
+
require 'kataba/fetcher'
|
|
7
7
|
|
|
8
8
|
module Kataba
|
|
9
9
|
|
|
@@ -110,25 +110,24 @@ module Kataba
|
|
|
110
110
|
file_path = "#{dir_name}/#{uri_md5}.xsd"
|
|
111
111
|
tmp_path = "#{file_path}.part"
|
|
112
112
|
|
|
113
|
+
# Resolve mirror first, if configured, then fetch. Fetching BEFORE
|
|
114
|
+
# opening tmp_path means a failed network fetch can't leave an
|
|
115
|
+
# orphaned 0-byte .part on disk — the file simply isn't created.
|
|
116
|
+
fetch_uri = xsd_uri
|
|
117
|
+
if !self.configuration.mirror_list.to_s.empty?
|
|
118
|
+
mirror_list = YAML.load_file(self.configuration.mirror_list)
|
|
119
|
+
mirror = mirror_list[xsd_uri]
|
|
120
|
+
fetch_uri = mirror unless mirror.to_s.empty?
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
body = Kataba::Fetcher.new(fetch_uri).fetch
|
|
124
|
+
|
|
113
125
|
# Write to a .part file first; only rename to the final cache path
|
|
114
126
|
# after we've confirmed the bytes parse as XML. Without this, a
|
|
115
127
|
# malformed response (HTML error page, truncated TCP stream, captive
|
|
116
128
|
# portal stub) would land at the canonical cache path and poison
|
|
117
129
|
# every subsequent fetch.
|
|
118
|
-
File.open(tmp_path, "wb+")
|
|
119
|
-
if !self.configuration.mirror_list.to_s.empty?
|
|
120
|
-
mirror_list = YAML.load_file(self.configuration.mirror_list)
|
|
121
|
-
mirror = mirror_list[xsd_uri]
|
|
122
|
-
if mirror.to_s.empty?
|
|
123
|
-
# No mirror for that uri
|
|
124
|
-
file.write(URI.open(xsd_uri).read)
|
|
125
|
-
else
|
|
126
|
-
file.write(URI.open(mirror).read)
|
|
127
|
-
end
|
|
128
|
-
else
|
|
129
|
-
file.write(URI.open(xsd_uri).read)
|
|
130
|
-
end
|
|
131
|
-
end
|
|
130
|
+
File.open(tmp_path, "wb+") { |file| file.write(body) }
|
|
132
131
|
|
|
133
132
|
begin
|
|
134
133
|
Nokogiri::XML(File.read(tmp_path)) { |c| c.strict }
|
metadata
CHANGED
|
@@ -1,69 +1,69 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: kataba
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0
|
|
4
|
+
version: 1.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- David Cliff
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-18 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
|
-
- -
|
|
17
|
+
- - '>='
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
19
|
version: '1.19'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
|
-
- -
|
|
24
|
+
- - '>='
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
26
|
version: '1.19'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: rake
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
-
- -
|
|
31
|
+
- - '>='
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
33
|
version: '0'
|
|
34
34
|
type: :development
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
|
-
- -
|
|
38
|
+
- - '>='
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '0'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: minitest
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
|
-
- -
|
|
45
|
+
- - '>='
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
47
|
version: '0'
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
|
-
- -
|
|
52
|
+
- - '>='
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
54
|
version: '0'
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
56
|
name: webmock
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
|
-
- -
|
|
59
|
+
- - '>='
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
61
|
version: '0'
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
|
-
- -
|
|
66
|
+
- - '>='
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
68
|
version: '0'
|
|
69
69
|
description: Kataba allows for mirroring and offline storage of XSD files, to enhance
|
|
@@ -74,6 +74,7 @@ extensions: []
|
|
|
74
74
|
extra_rdoc_files: []
|
|
75
75
|
files:
|
|
76
76
|
- lib/kataba.rb
|
|
77
|
+
- lib/kataba/fetcher.rb
|
|
77
78
|
homepage: http://rubygems.org/gems/kataba
|
|
78
79
|
licenses:
|
|
79
80
|
- MIT
|
|
@@ -84,16 +85,16 @@ require_paths:
|
|
|
84
85
|
- lib
|
|
85
86
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
86
87
|
requirements:
|
|
87
|
-
- -
|
|
88
|
+
- - '>='
|
|
88
89
|
- !ruby/object:Gem::Version
|
|
89
90
|
version: '3.2'
|
|
90
91
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
92
|
requirements:
|
|
92
|
-
- -
|
|
93
|
+
- - '>='
|
|
93
94
|
- !ruby/object:Gem::Version
|
|
94
95
|
version: '0'
|
|
95
96
|
requirements: []
|
|
96
|
-
rubygems_version: 3.
|
|
97
|
+
rubygems_version: 3.0.9
|
|
97
98
|
signing_key:
|
|
98
99
|
specification_version: 4
|
|
99
100
|
summary: XML Schema Definition (XSD) mirroring and offline validation for Nokogiri
|