kataba 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/kataba/fetcher.rb +88 -0
  3. data/lib/kataba.rb +14 -15
  4. metadata +14 -13
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 97107593bef37bc5a20d934c683ca45c5c1142d301bc705067a20b3e94a07d58
4
- data.tar.gz: fee7e5c92557a7f85170a395a97de0055b447d978d822575aadf8a6fe301b391
3
+ metadata.gz: 1849ebd6214469739d5a16981525a9c5ab23d0ca7ddb91ad028064a43e63dad0
4
+ data.tar.gz: 46da3306e49890c4801362c7c5af2c8e7cbf68cf2d3a8b9604e4d69f9d3f7887
5
5
  SHA512:
6
- metadata.gz: 28c18ab1818719194b5f9ae1a512bfad8716caf430fac3ce4509dad455e4796cf9634bf33fec4b0e51501cea3dc70b1908945dfd4c80d71316d61bac2fa68293
7
- data.tar.gz: d7d05f52acc72e4f6ed031d255304b7d37984f07e89d24bf76c74e9e6be40bf06e1dab08a07f9eb8e7b2d8633c39b6d93ea10df503752189672ea0689e397dc0
6
+ metadata.gz: 7d26265f3e4b30fbc11c82f12bec40860b56f3910d8a7aefccc9150bf6cd24d61a543da9fb33eb11111a8410386e1c632df6c02f56e2f91a8681515e4f2e5548
7
+ data.tar.gz: e35b99af877e7b660838d9b58943806c35084861c8295ea93e4a456d66e20943fb987261ee5adf795a0c858c26fa8a7c4cd41212d9012d26d5ba14cab80d51bc
@@ -0,0 +1,88 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+
4
+ module Kataba
5
+ # Fetches a schema body, recovering from LoC-shaped delivery quirks
6
+ # that a verbatim URI.open would surface as cache-poisoning errors:
7
+ #
8
+ # - 5xx (Cloudflare bot-management 503/529, origin overload):
9
+ # retry once on the alternate scheme.
10
+ # - same-origin HTTPS->HTTP 3xx: follow. open-uri refuses all
11
+ # scheme downgrades; we relax to same-origin because the
12
+ # consumer already trusted this host by putting it in their
13
+ # schemaLocation. Cross-origin downgrades stay refused — that's
14
+ # the actual DNS-redirect attack vector.
15
+ #
16
+ # mirror_list remains the consumer's backstop for URI-identity
17
+ # changes (path renames, host moves) that no delivery heuristic
18
+ # can rescue.
19
+ class Fetcher
20
+ MAX_REDIRECTS = 5
21
+
22
+ class FetchError < StandardError; end
23
+
24
+ def initialize(uri)
25
+ @original_uri = uri
26
+ end
27
+
28
+ def fetch
29
+ attempt(@original_uri, alt_scheme_retry: true)
30
+ end
31
+
32
+ private
33
+
34
+ def attempt(uri, alt_scheme_retry:)
35
+ response = request_with_redirects(uri, redirect_depth: 0)
36
+
37
+ case response
38
+ when Net::HTTPSuccess
39
+ response.body
40
+ when Net::HTTPServerError
41
+ if alt_scheme_retry && (alt = swap_scheme(uri))
42
+ attempt(alt, alt_scheme_retry: false)
43
+ else
44
+ raise FetchError, "#{response.code} #{response.message} fetching #{uri}"
45
+ end
46
+ else
47
+ raise FetchError, "#{response.code} #{response.message} fetching #{uri}"
48
+ end
49
+ end
50
+
51
+ def request_with_redirects(uri, redirect_depth:)
52
+ raise FetchError, "too many redirects fetching #{@original_uri}" if redirect_depth > MAX_REDIRECTS
53
+
54
+ parsed = URI.parse(uri)
55
+ http = Net::HTTP.new(parsed.host, parsed.port)
56
+ http.use_ssl = (parsed.scheme == 'https')
57
+ response = http.get(parsed.request_uri)
58
+
59
+ if response.is_a?(Net::HTTPRedirection)
60
+ target = resolve_redirect(parsed, response['location'])
61
+ request_with_redirects(target, redirect_depth: redirect_depth + 1)
62
+ else
63
+ response
64
+ end
65
+ end
66
+
67
+ def resolve_redirect(from, location)
68
+ raise FetchError, "redirect with no Location header from #{from}" if location.nil? || location.empty?
69
+
70
+ target = URI.parse(location)
71
+ target = from + target if target.relative?
72
+
73
+ if from.scheme == 'https' && target.scheme == 'http' && from.host != target.host
74
+ raise FetchError, "cross-origin HTTPS->HTTP redirect refused: #{from} -> #{target}"
75
+ end
76
+
77
+ target.to_s
78
+ end
79
+
80
+ def swap_scheme(uri)
81
+ if uri.start_with?('https://')
82
+ uri.sub(/\Ahttps:/, 'http:')
83
+ elsif uri.start_with?('http://')
84
+ uri.sub(/\Ahttp:/, 'https:')
85
+ end
86
+ end
87
+ end
88
+ end
data/lib/kataba.rb CHANGED
@@ -1,9 +1,9 @@
1
1
  require 'nokogiri'
2
2
  require 'tmpdir'
3
3
  require 'digest/md5'
4
- require 'open-uri'
5
4
  require 'fileutils'
6
5
  require 'yaml'
6
+ require 'kataba/fetcher'
7
7
 
8
8
  module Kataba
9
9
 
@@ -110,25 +110,24 @@ module Kataba
110
110
  file_path = "#{dir_name}/#{uri_md5}.xsd"
111
111
  tmp_path = "#{file_path}.part"
112
112
 
113
+ # Resolve mirror first, if configured, then fetch. Fetching BEFORE
114
+ # opening tmp_path means a failed network fetch can't leave an
115
+ # orphaned 0-byte .part on disk — the file simply isn't created.
116
+ fetch_uri = xsd_uri
117
+ if !self.configuration.mirror_list.to_s.empty?
118
+ mirror_list = YAML.load_file(self.configuration.mirror_list)
119
+ mirror = mirror_list[xsd_uri]
120
+ fetch_uri = mirror unless mirror.to_s.empty?
121
+ end
122
+
123
+ body = Kataba::Fetcher.new(fetch_uri).fetch
124
+
113
125
  # Write to a .part file first; only rename to the final cache path
114
126
  # after we've confirmed the bytes parse as XML. Without this, a
115
127
  # malformed response (HTML error page, truncated TCP stream, captive
116
128
  # portal stub) would land at the canonical cache path and poison
117
129
  # every subsequent fetch.
118
- File.open(tmp_path, "wb+") do |file|
119
- if !self.configuration.mirror_list.to_s.empty?
120
- mirror_list = YAML.load_file(self.configuration.mirror_list)
121
- mirror = mirror_list[xsd_uri]
122
- if mirror.to_s.empty?
123
- # No mirror for that uri
124
- file.write(URI.open(xsd_uri).read)
125
- else
126
- file.write(URI.open(mirror).read)
127
- end
128
- else
129
- file.write(URI.open(xsd_uri).read)
130
- end
131
- end
130
+ File.open(tmp_path, "wb+") { |file| file.write(body) }
132
131
 
133
132
  begin
134
133
  Nokogiri::XML(File.read(tmp_path)) { |c| c.strict }
metadata CHANGED
@@ -1,69 +1,69 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kataba
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Cliff
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-15 00:00:00.000000000 Z
11
+ date: 2026-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - '>='
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.19'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - '>='
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.19'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - '>='
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - '>='
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - '>='
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: webmock
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ">="
59
+ - - '>='
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ">="
66
+ - - '>='
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  description: Kataba allows for mirroring and offline storage of XSD files, to enhance
@@ -74,6 +74,7 @@ extensions: []
74
74
  extra_rdoc_files: []
75
75
  files:
76
76
  - lib/kataba.rb
77
+ - lib/kataba/fetcher.rb
77
78
  homepage: http://rubygems.org/gems/kataba
78
79
  licenses:
79
80
  - MIT
@@ -84,16 +85,16 @@ require_paths:
84
85
  - lib
85
86
  required_ruby_version: !ruby/object:Gem::Requirement
86
87
  requirements:
87
- - - ">="
88
+ - - '>='
88
89
  - !ruby/object:Gem::Version
89
90
  version: '3.2'
90
91
  required_rubygems_version: !ruby/object:Gem::Requirement
91
92
  requirements:
92
- - - ">="
93
+ - - '>='
93
94
  - !ruby/object:Gem::Version
94
95
  version: '0'
95
96
  requirements: []
96
- rubygems_version: 3.4.10
97
+ rubygems_version: 3.0.9
97
98
  signing_key:
98
99
  specification_version: 4
99
100
  summary: XML Schema Definition (XSD) mirroring and offline validation for Nokogiri