lenex-parser 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +21 -0
  3. data/.yardopts +2 -0
  4. data/LICENSE +21 -0
  5. data/README.md +796 -0
  6. data/Rakefile +43 -0
  7. data/bin/console +8 -0
  8. data/bin/setup +5 -0
  9. data/lenex-parser.gemspec +35 -0
  10. data/lib/lenex/document/serializer.rb +191 -0
  11. data/lib/lenex/document.rb +163 -0
  12. data/lib/lenex/parser/objects/age_date.rb +53 -0
  13. data/lib/lenex/parser/objects/age_group.rb +86 -0
  14. data/lib/lenex/parser/objects/athlete.rb +93 -0
  15. data/lib/lenex/parser/objects/bank.rb +56 -0
  16. data/lib/lenex/parser/objects/club.rb +101 -0
  17. data/lib/lenex/parser/objects/constructor.rb +51 -0
  18. data/lib/lenex/parser/objects/contact.rb +55 -0
  19. data/lib/lenex/parser/objects/entry.rb +70 -0
  20. data/lib/lenex/parser/objects/entry_schedule.rb +40 -0
  21. data/lib/lenex/parser/objects/event.rb +114 -0
  22. data/lib/lenex/parser/objects/facility.rb +58 -0
  23. data/lib/lenex/parser/objects/fee.rb +54 -0
  24. data/lib/lenex/parser/objects/fee_schedule.rb +26 -0
  25. data/lib/lenex/parser/objects/handicap.rb +86 -0
  26. data/lib/lenex/parser/objects/heat.rb +58 -0
  27. data/lib/lenex/parser/objects/host_club.rb +34 -0
  28. data/lib/lenex/parser/objects/judge.rb +55 -0
  29. data/lib/lenex/parser/objects/lenex.rb +72 -0
  30. data/lib/lenex/parser/objects/meet.rb +175 -0
  31. data/lib/lenex/parser/objects/meet_info.rb +60 -0
  32. data/lib/lenex/parser/objects/official.rb +70 -0
  33. data/lib/lenex/parser/objects/organizer.rb +34 -0
  34. data/lib/lenex/parser/objects/point_table.rb +54 -0
  35. data/lib/lenex/parser/objects/pool.rb +44 -0
  36. data/lib/lenex/parser/objects/qualify.rb +55 -0
  37. data/lib/lenex/parser/objects/ranking.rb +54 -0
  38. data/lib/lenex/parser/objects/record.rb +107 -0
  39. data/lib/lenex/parser/objects/record_athlete.rb +92 -0
  40. data/lib/lenex/parser/objects/record_list.rb +106 -0
  41. data/lib/lenex/parser/objects/record_relay.rb +62 -0
  42. data/lib/lenex/parser/objects/record_relay_position.rb +62 -0
  43. data/lib/lenex/parser/objects/relay.rb +93 -0
  44. data/lib/lenex/parser/objects/relay_entry.rb +81 -0
  45. data/lib/lenex/parser/objects/relay_position.rb +74 -0
  46. data/lib/lenex/parser/objects/relay_result.rb +85 -0
  47. data/lib/lenex/parser/objects/result.rb +76 -0
  48. data/lib/lenex/parser/objects/session.rb +107 -0
  49. data/lib/lenex/parser/objects/split.rb +53 -0
  50. data/lib/lenex/parser/objects/swim_style.rb +58 -0
  51. data/lib/lenex/parser/objects/time_standard.rb +55 -0
  52. data/lib/lenex/parser/objects/time_standard_list.rb +98 -0
  53. data/lib/lenex/parser/objects/time_standard_ref.rb +63 -0
  54. data/lib/lenex/parser/objects.rb +52 -0
  55. data/lib/lenex/parser/sax/document_handler.rb +184 -0
  56. data/lib/lenex/parser/version.rb +8 -0
  57. data/lib/lenex/parser/zip_source.rb +111 -0
  58. data/lib/lenex/parser.rb +184 -0
  59. data/lib/lenex-parser.rb +16 -0
  60. metadata +132 -0
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lenex
4
+ module Parser
5
+ module Objects
6
+ # Value object representing a TIMESTANDARDLIST element.
7
+ class TimeStandardList
8
+ ATTRIBUTES = {
9
+ 'course' => { key: :course, required: true },
10
+ 'gender' => { key: :gender, required: true },
11
+ 'handicap' => { key: :handicap, required: false },
12
+ 'name' => { key: :name, required: true },
13
+ 'timestandardlistid' => { key: :time_standard_list_id, required: true },
14
+ 'type' => { key: :type, required: false }
15
+ }.freeze
16
+
17
+ ATTRIBUTE_KEYS = ATTRIBUTES.values.map { |definition| definition[:key] }.freeze
18
+ private_constant :ATTRIBUTE_KEYS
19
+
20
+ ATTRIBUTE_KEYS.each { |attribute| attr_reader attribute }
21
+ attr_reader :age_group, :time_standards
22
+
23
+ def initialize(age_group: nil, time_standards: [], **attributes)
24
+ ATTRIBUTES.each_value do |definition|
25
+ key = definition[:key]
26
+ instance_variable_set(:"@#{key}", attributes[key])
27
+ end
28
+ @age_group = age_group
29
+ @time_standards = Array(time_standards)
30
+ end
31
+
32
+ def self.from_xml(element)
33
+ raise ::Lenex::Parser::ParseError, 'TIMESTANDARDLIST element is required' unless element
34
+
35
+ attributes = extract_attributes(element)
36
+ ensure_required_attributes!(attributes)
37
+
38
+ age_group = age_group_from(element.at_xpath('AGEGROUP'))
39
+ time_standards = extract_time_standards(element.at_xpath('TIMESTANDARDS'))
40
+
41
+ new(**attributes, age_group:, time_standards:)
42
+ end
43
+
44
+ def self.extract_attributes(element)
45
+ ATTRIBUTES.each_with_object({}) do |(attribute_name, definition), collected|
46
+ value = element.attribute(attribute_name)&.value
47
+ collected[definition[:key]] = value if value
48
+ end
49
+ end
50
+ private_class_method :extract_attributes
51
+
52
+ def self.ensure_required_attributes!(attributes)
53
+ REQUIRED_ATTRIBUTE_KEYS.each do |key|
54
+ value = attributes[key]
55
+ next unless value.nil? || value.strip.empty?
56
+
57
+ message = "TIMESTANDARDLIST #{ATTRIBUTE_NAME_FOR.fetch(key)} attribute is required"
58
+ raise ::Lenex::Parser::ParseError, message
59
+ end
60
+ end
61
+ private_class_method :ensure_required_attributes!
62
+
63
+ REQUIRED_ATTRIBUTE_KEYS = %i[
64
+ course
65
+ gender
66
+ name
67
+ time_standard_list_id
68
+ ].freeze
69
+ private_constant :REQUIRED_ATTRIBUTE_KEYS
70
+
71
+ ATTRIBUTE_NAME_FOR = ATTRIBUTES.each_with_object({}) do |attribute, mapping|
72
+ attribute_name, definition = attribute
73
+ mapping[definition[:key]] = attribute_name
74
+ end.freeze
75
+ private_constant :ATTRIBUTE_NAME_FOR
76
+
77
+ def self.age_group_from(element)
78
+ return unless element
79
+
80
+ AgeGroup.from_xml(element)
81
+ end
82
+ private_class_method :age_group_from
83
+
84
+ def self.extract_time_standards(collection_element)
85
+ unless collection_element
86
+ message = 'TIMESTANDARDLIST TIMESTANDARDS element is required'
87
+ raise ::Lenex::Parser::ParseError, message
88
+ end
89
+
90
+ collection_element.xpath('TIMESTANDARD').map do |time_standard_element|
91
+ TimeStandard.from_xml(time_standard_element)
92
+ end
93
+ end
94
+ private_class_method :extract_time_standards
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lenex
4
+ module Parser
5
+ module Objects
6
+ # Value object representing a TIMESTANDARDREF element.
7
+ class TimeStandardRef
8
+ ATTRIBUTES = {
9
+ 'timestandardlistid' => { key: :time_standard_list_id, required: true },
10
+ 'marker' => { key: :marker, required: false }
11
+ }.freeze
12
+
13
+ ATTRIBUTE_KEYS = ATTRIBUTES.values.map { |definition| definition[:key] }.freeze
14
+ private_constant :ATTRIBUTE_KEYS
15
+
16
+ ATTRIBUTE_KEYS.each { |attribute| attr_reader attribute }
17
+ attr_reader :fee
18
+
19
+ def initialize(fee: nil, **attributes)
20
+ ATTRIBUTES.each_value do |definition|
21
+ key = definition[:key]
22
+ instance_variable_set(:"@#{key}", attributes[key])
23
+ end
24
+ @fee = fee
25
+ end
26
+
27
+ def self.from_xml(element)
28
+ raise ::Lenex::Parser::ParseError, 'TIMESTANDARDREF element is required' unless element
29
+
30
+ attributes = extract_attributes(element)
31
+ fee = fee_from(element.at_xpath('FEE'))
32
+
33
+ new(**attributes, fee:)
34
+ end
35
+
36
+ def self.extract_attributes(element)
37
+ ATTRIBUTES.each_with_object({}) do |(attribute_name, definition), collected|
38
+ value = element.attribute(attribute_name)&.value
39
+ ensure_required_attribute!(attribute_name, definition, value)
40
+ collected[definition[:key]] = value if value
41
+ end
42
+ end
43
+ private_class_method :extract_attributes
44
+
45
+ def self.ensure_required_attribute!(attribute_name, definition, value)
46
+ return unless definition[:required]
47
+ return unless value.nil? || value.strip.empty?
48
+
49
+ message = "TIMESTANDARDREF #{attribute_name} attribute is required"
50
+ raise ::Lenex::Parser::ParseError, message
51
+ end
52
+ private_class_method :ensure_required_attribute!
53
+
54
+ def self.fee_from(element)
55
+ return unless element
56
+
57
+ Fee.from_xml(element)
58
+ end
59
+ private_class_method :fee_from
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lenex
4
+ module Parser
5
+ # Namespace for parser value objects.
6
+ module Objects
7
+ end
8
+ end
9
+ end
10
+
11
+ require_relative 'objects/age_date'
12
+ require_relative 'objects/age_group'
13
+ require_relative 'objects/athlete'
14
+ require_relative 'objects/bank'
15
+ require_relative 'objects/club'
16
+ require_relative 'objects/contact'
17
+ require_relative 'objects/constructor'
18
+ require_relative 'objects/entry'
19
+ require_relative 'objects/entry_schedule'
20
+ require_relative 'objects/event'
21
+ require_relative 'objects/facility'
22
+ require_relative 'objects/fee'
23
+ require_relative 'objects/fee_schedule'
24
+ require_relative 'objects/handicap'
25
+ require_relative 'objects/heat'
26
+ require_relative 'objects/host_club'
27
+ require_relative 'objects/judge'
28
+ require_relative 'objects/lenex'
29
+ require_relative 'objects/meet'
30
+ require_relative 'objects/meet_info'
31
+ require_relative 'objects/official'
32
+ require_relative 'objects/organizer'
33
+ require_relative 'objects/point_table'
34
+ require_relative 'objects/pool'
35
+ require_relative 'objects/qualify'
36
+ require_relative 'objects/ranking'
37
+ require_relative 'objects/record'
38
+ require_relative 'objects/record_athlete'
39
+ require_relative 'objects/record_list'
40
+ require_relative 'objects/record_relay'
41
+ require_relative 'objects/record_relay_position'
42
+ require_relative 'objects/relay'
43
+ require_relative 'objects/relay_entry'
44
+ require_relative 'objects/relay_position'
45
+ require_relative 'objects/relay_result'
46
+ require_relative 'objects/result'
47
+ require_relative 'objects/session'
48
+ require_relative 'objects/split'
49
+ require_relative 'objects/swim_style'
50
+ require_relative 'objects/time_standard'
51
+ require_relative 'objects/time_standard_list'
52
+ require_relative 'objects/time_standard_ref'
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'nokogiri'
5
+
6
+ module Lenex
7
+ module Parser
8
+ module Sax
9
+ # SAX document handler that streams Lenex XML into a {Lenex::Document}.
10
+ class DocumentHandler < Nokogiri::XML::SAX::Document
11
+ CAPTURED_ELEMENTS = {
12
+ 'CONSTRUCTOR' => lambda do |element, document|
13
+ document.constructor = Objects::Constructor.from_xml(element)
14
+ end,
15
+ 'MEET' => lambda do |element, document|
16
+ document.add_meet(Objects::Meet.from_xml(element))
17
+ end,
18
+ 'RECORDLIST' => lambda do |element, document|
19
+ document.add_record_list(Objects::RecordList.from_xml(element))
20
+ end,
21
+ 'TIMESTANDARDLIST' => lambda do |element, document|
22
+ document.add_time_standard_list(Objects::TimeStandardList.from_xml(element))
23
+ end
24
+ }.freeze
25
+
26
+ def initialize(document)
27
+ super()
28
+ @document = document
29
+ @capture = nil
30
+ @root_encountered = false
31
+ end
32
+
33
+ def start_document
34
+ @capture = nil
35
+ @root_encountered = false
36
+ end
37
+
38
+ def start_element(name, attrs = [])
39
+ handle_root(name, attrs)
40
+ append_start_tag(name, attrs)
41
+ start_capture(name, attrs) if CAPTURED_ELEMENTS.key?(name)
42
+ end
43
+
44
+ def characters(string)
45
+ append_text(string)
46
+ end
47
+
48
+ def cdata_block(string)
49
+ append_cdata(string)
50
+ end
51
+
52
+ def end_element(name)
53
+ finalize_capture(name)
54
+ end
55
+
56
+ def end_document
57
+ ensure_root_present!
58
+ end
59
+
60
+ private
61
+
62
+ attr_reader :document
63
+
64
+ def handle_root(name, attrs)
65
+ return if @root_encountered
66
+
67
+ raise ParseError, 'Root element must be LENEX' unless name == 'LENEX'
68
+
69
+ @root_encountered = true
70
+ attributes = attributes_from(attrs)
71
+ version = attributes['version']
72
+
73
+ if version.nil? || version.strip.empty?
74
+ raise ParseError, 'LENEX version attribute is required'
75
+ end
76
+
77
+ document.version = version
78
+ document.revision = attributes['revision'] if attributes.key?('revision')
79
+ end
80
+
81
+ def append_start_tag(name, attrs)
82
+ @capture&.start_tag(name, attrs)
83
+ end
84
+
85
+ def append_text(string)
86
+ return if string.nil? || string.empty?
87
+
88
+ @capture&.append_text(string)
89
+ end
90
+
91
+ def append_cdata(string)
92
+ return if string.nil?
93
+
94
+ @capture&.append_cdata(string)
95
+ end
96
+
97
+ def start_capture(name, attrs)
98
+ return if @capture
99
+
100
+ @capture = Capture.new(name)
101
+ @capture.start_tag(name, attrs)
102
+ end
103
+
104
+ def finalize_capture(name)
105
+ return unless @capture
106
+
107
+ @capture.end_tag(name)
108
+ return unless @capture.complete?
109
+
110
+ emit_capture(@capture)
111
+ @capture = nil
112
+ end
113
+
114
+ def ensure_root_present!
115
+ return if @root_encountered
116
+
117
+ raise ParseError, 'Root element must be LENEX'
118
+ end
119
+
120
+ def emit_capture(capture)
121
+ element = Nokogiri::XML::Document.parse(capture.to_xml) do |config|
122
+ config.strict.noblanks
123
+ end.root
124
+
125
+ handler = CAPTURED_ELEMENTS.fetch(capture.name)
126
+ handler.call(element, document)
127
+ end
128
+
129
+ def attributes_from(attrs)
130
+ attrs.each_with_object({}) do |(key, value), collected|
131
+ collected[key] = value
132
+ end
133
+ end
134
+
135
+ # Simple builder for captured subtrees.
136
+ class Capture
137
+ attr_reader :name
138
+
139
+ def initialize(name)
140
+ @name = name
141
+ @buffer = +''
142
+ @depth = 0
143
+ end
144
+
145
+ def start_tag(name, attrs)
146
+ @buffer << '<' << name
147
+ attrs.each do |attr_name, attr_value|
148
+ @buffer << ' ' << attr_name << '="' << escape_attribute(attr_value) << '"'
149
+ end
150
+ @buffer << '>'
151
+ @depth += 1
152
+ end
153
+
154
+ def end_tag(name)
155
+ @buffer << '</' << name << '>'
156
+ @depth -= 1
157
+ end
158
+
159
+ def append_text(string)
160
+ @buffer << CGI.escapeHTML(string)
161
+ end
162
+
163
+ def append_cdata(string)
164
+ @buffer << '<![CDATA[' << string << ']]>'
165
+ end
166
+
167
+ def complete?
168
+ @depth.zero?
169
+ end
170
+
171
+ def to_xml
172
+ @buffer
173
+ end
174
+
175
+ private
176
+
177
+ def escape_attribute(value)
178
+ CGI.escapeHTML(value.to_s)
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lenex
4
+ module Parser
5
+ # Current version of the lenex-parser gem.
6
+ VERSION = '3.0.0'
7
+ end
8
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Lenex
6
+ module Parser
7
+ # Utility helpers for reading XML payloads embedded in ZIP archives.
8
+ module ZipSource
9
+ extend self
10
+
11
+ SIGNATURE = "PK\x03\x04".b
12
+ INSTALL_MESSAGE = 'ZIP archives require the rubyzip gem. ' \
13
+ 'Install it with `gem install rubyzip` and try again.'
14
+ MISSING_XML_MESSAGE = 'Lenex archive does not contain a .lef or .xml payload'
15
+
16
+ def extract(io)
17
+ ensure_rubyzip!
18
+
19
+ payload = xml_payload_from(io)
20
+ return build_io(payload) if payload
21
+
22
+ raise Lenex::Parser::Error, MISSING_XML_MESSAGE
23
+ rescue Zip::Error => e
24
+ raise Lenex::Parser::Error, "Unable to read Lenex archive: #{e.message}"
25
+ ensure
26
+ reset_io(io)
27
+ end
28
+
29
+ private
30
+
31
+ def ensure_rubyzip!
32
+ require 'zip'
33
+ rescue LoadError
34
+ raise Lenex::Parser::Error, INSTALL_MESSAGE
35
+ end
36
+
37
+ def xml_payload_from(io)
38
+ reset_io(io)
39
+ payload = read_with_input_stream(io)
40
+ return payload if payload
41
+
42
+ reset_io(io)
43
+ read_with_file(io)
44
+ end
45
+
46
+ def read_with_input_stream(io)
47
+ Zip::InputStream.open(io) do |zip|
48
+ while (entry = zip.get_next_entry)
49
+ next unless xml_entry?(entry)
50
+
51
+ return zip.read
52
+ end
53
+ end
54
+ nil
55
+ end
56
+
57
+ def read_with_file(io)
58
+ data = buffered_zip_data(io)
59
+ return if data.empty?
60
+
61
+ Zip::File.open_buffer(data) do |zip|
62
+ zip.each do |entry|
63
+ next unless xml_entry?(entry)
64
+
65
+ return entry.get_input_stream.read
66
+ end
67
+ end
68
+ nil
69
+ end
70
+
71
+ def buffered_zip_data(io)
72
+ raw_data = io.read
73
+ return '' if raw_data.nil? || raw_data.empty?
74
+
75
+ raw_data.dup.tap { |buffer| buffer.force_encoding(Encoding::BINARY) }
76
+ end
77
+
78
+ def xml_entry?(entry)
79
+ return false if entry.nil?
80
+ return false if entry.respond_to?(:directory?) && entry.directory?
81
+
82
+ name = entry.name
83
+ return false if name.nil? || name.empty?
84
+
85
+ name.downcase.end_with?('.lef', '.xml')
86
+ end
87
+
88
+ def build_io(payload)
89
+ if payload.nil? || payload.empty?
90
+ raise Lenex::Parser::Error, 'Lenex archive is missing XML payload'
91
+ end
92
+
93
+ binary_payload = payload.dup
94
+ binary_payload.force_encoding(Encoding::BINARY)
95
+
96
+ StringIO.new(binary_payload).tap do |xml_io|
97
+ xml_io.binmode if xml_io.respond_to?(:binmode)
98
+ xml_io.rewind if xml_io.respond_to?(:rewind)
99
+ end
100
+ end
101
+
102
+ def reset_io(io)
103
+ return unless io.respond_to?(:rewind)
104
+
105
+ io.rewind
106
+ rescue IOError
107
+ nil
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'stringio'
5
+
6
+ require_relative 'parser/version'
7
+ require_relative 'parser/objects'
8
+ require_relative 'parser/zip_source'
9
+ require_relative 'parser/sax/document_handler'
10
+
11
+ # Namespace for Lenex parsing functionality and data structures.
12
+ module Lenex
13
+ # Lenex namespace for parser functionality.
14
+ module Parser
15
+ # Base error class for all parser-specific failures.
16
+ class Error < StandardError; end
17
+
18
+ # Error raised when the parser encounters invalid Lenex XML input.
19
+ class ParseError < Error; end
20
+
21
+ module_function
22
+
23
+ # Parses a Lenex XML document and returns an object model representing
24
+ # the LENEX root node. Accepts an IO-like object or a string containing
25
+ # the XML payload.
26
+ #
27
+ # @param source [#read, String] XML source to parse
28
+ # @return [Lenex::Parser::Objects::Lenex]
29
+ # @raise [Lenex::Parser::ParseError] when the payload is invalid
30
+ def parse(source)
31
+ io = ensure_io(source)
32
+ document = ::Lenex::Document.new
33
+ handler = Sax::DocumentHandler.new(document)
34
+ parser = Nokogiri::XML::SAX::Parser.new(handler)
35
+
36
+ parser.parse(io)
37
+ document.build_lenex
38
+ rescue ParseError
39
+ raise
40
+ rescue Nokogiri::XML::SyntaxError => e
41
+ raise ParseError, e.message
42
+ end
43
+
44
+ # Normalizes the provided source so Nokogiri can consume it as an IO.
45
+ #
46
+ # @param source [#read, String]
47
+ # @return [#read] an IO-like object ready for Nokogiri
48
+ def ensure_io(source)
49
+ io = normalize_source(source)
50
+
51
+ return ZipSource.extract(io) if zip_archive?(io)
52
+
53
+ io
54
+ end
55
+ private_class_method :ensure_io
56
+
57
+ def normalize_source(source)
58
+ io = if source.respond_to?(:read)
59
+ ensure_binmode(source)
60
+ elsif path_argument?(source)
61
+ open_path(source)
62
+ else
63
+ string_io_for(source)
64
+ end
65
+
66
+ ensure_rewindable_io(io)
67
+ end
68
+ private_class_method :normalize_source
69
+
70
+ def path_argument?(source)
71
+ return false unless path_like?(source)
72
+ return false if SourceClassifier.xml_payload?(source)
73
+ return false if SourceClassifier.zip_payload?(source)
74
+
75
+ true
76
+ end
77
+ private_class_method :path_argument?
78
+
79
+ def ensure_binmode(stream)
80
+ stream.tap { |io| io.binmode if io.respond_to?(:binmode) }
81
+ end
82
+ private_class_method :ensure_binmode
83
+
84
+ def path_like?(source)
85
+ path = extract_path(source)
86
+ return false unless path
87
+
88
+ ::File.file?(path) && ::File.readable?(path)
89
+ rescue TypeError
90
+ false
91
+ end
92
+ private_class_method :path_like?
93
+
94
+ def open_path(source)
95
+ path = extract_path(source)
96
+ ::File.open(path, 'rb')
97
+ end
98
+ private_class_method :open_path
99
+
100
+ def string_io_for(source)
101
+ StringIO.new(String(source)).tap do |string_io|
102
+ string_io.binmode if string_io.respond_to?(:binmode)
103
+ end
104
+ end
105
+ private_class_method :string_io_for
106
+
107
+ def extract_path(source)
108
+ path = if source.respond_to?(:to_path)
109
+ source.to_path
110
+ elsif source.is_a?(String)
111
+ source
112
+ end
113
+
114
+ return unless path
115
+ return if path.include?("\0")
116
+
117
+ path
118
+ end
119
+ private_class_method :extract_path
120
+
121
+ def zip_archive?(io)
122
+ read_signature(io) == ZipSource::SIGNATURE
123
+ end
124
+ private_class_method :zip_archive?
125
+
126
+ def read_signature(io)
127
+ signature = (io.read(ZipSource::SIGNATURE.length) || '').b
128
+ io.rewind
129
+ signature
130
+ end
131
+ private_class_method :read_signature
132
+
133
+ def ensure_rewindable_io(io)
134
+ return io if io.respond_to?(:rewind)
135
+
136
+ buffered = +''
137
+ while (chunk = io.read(4_096))
138
+ buffered << chunk
139
+ end
140
+
141
+ StringIO.new(buffered).tap do |buffered_io|
142
+ buffered_io.binmode if buffered_io.respond_to?(:binmode)
143
+ end
144
+ end
145
+ private_class_method :ensure_rewindable_io
146
+ end
147
+ end
148
+
149
+ module Lenex
150
+ module Parser
151
+ # Internal heuristics for deciding whether a value is a filesystem path or
152
+ # inline XML/ZIP payload.
153
+ module SourceClassifier
154
+ module_function
155
+
156
+ def xml_payload?(source)
157
+ payload = String(source)
158
+ bytes = payload.b
159
+ bytes = strip_utf8_bom(bytes)
160
+ stripped = bytes.lstrip
161
+
162
+ stripped.start_with?('<')
163
+ rescue Encoding::CompatibilityError, TypeError
164
+ false
165
+ end
166
+
167
+ def zip_payload?(source)
168
+ bytes = String(source).b
169
+
170
+ bytes.start_with?(ZipSource::SIGNATURE)
171
+ rescue Encoding::CompatibilityError, TypeError
172
+ false
173
+ end
174
+
175
+ def strip_utf8_bom(bytes)
176
+ return bytes unless bytes.start_with?("\xEF\xBB\xBF".b)
177
+
178
+ bytes.byteslice(3, bytes.bytesize - 3) || ''.b
179
+ end
180
+ module_function :strip_utf8_bom
181
+ private_class_method :strip_utf8_bom
182
+ end
183
+ end
184
+ end