lenex-parser 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +21 -0
- data/.yardopts +2 -0
- data/LICENSE +21 -0
- data/README.md +796 -0
- data/Rakefile +43 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/lenex-parser.gemspec +35 -0
- data/lib/lenex/document/serializer.rb +191 -0
- data/lib/lenex/document.rb +163 -0
- data/lib/lenex/parser/objects/age_date.rb +53 -0
- data/lib/lenex/parser/objects/age_group.rb +86 -0
- data/lib/lenex/parser/objects/athlete.rb +93 -0
- data/lib/lenex/parser/objects/bank.rb +56 -0
- data/lib/lenex/parser/objects/club.rb +101 -0
- data/lib/lenex/parser/objects/constructor.rb +51 -0
- data/lib/lenex/parser/objects/contact.rb +55 -0
- data/lib/lenex/parser/objects/entry.rb +70 -0
- data/lib/lenex/parser/objects/entry_schedule.rb +40 -0
- data/lib/lenex/parser/objects/event.rb +114 -0
- data/lib/lenex/parser/objects/facility.rb +58 -0
- data/lib/lenex/parser/objects/fee.rb +54 -0
- data/lib/lenex/parser/objects/fee_schedule.rb +26 -0
- data/lib/lenex/parser/objects/handicap.rb +86 -0
- data/lib/lenex/parser/objects/heat.rb +58 -0
- data/lib/lenex/parser/objects/host_club.rb +34 -0
- data/lib/lenex/parser/objects/judge.rb +55 -0
- data/lib/lenex/parser/objects/lenex.rb +72 -0
- data/lib/lenex/parser/objects/meet.rb +175 -0
- data/lib/lenex/parser/objects/meet_info.rb +60 -0
- data/lib/lenex/parser/objects/official.rb +70 -0
- data/lib/lenex/parser/objects/organizer.rb +34 -0
- data/lib/lenex/parser/objects/point_table.rb +54 -0
- data/lib/lenex/parser/objects/pool.rb +44 -0
- data/lib/lenex/parser/objects/qualify.rb +55 -0
- data/lib/lenex/parser/objects/ranking.rb +54 -0
- data/lib/lenex/parser/objects/record.rb +107 -0
- data/lib/lenex/parser/objects/record_athlete.rb +92 -0
- data/lib/lenex/parser/objects/record_list.rb +106 -0
- data/lib/lenex/parser/objects/record_relay.rb +62 -0
- data/lib/lenex/parser/objects/record_relay_position.rb +62 -0
- data/lib/lenex/parser/objects/relay.rb +93 -0
- data/lib/lenex/parser/objects/relay_entry.rb +81 -0
- data/lib/lenex/parser/objects/relay_position.rb +74 -0
- data/lib/lenex/parser/objects/relay_result.rb +85 -0
- data/lib/lenex/parser/objects/result.rb +76 -0
- data/lib/lenex/parser/objects/session.rb +107 -0
- data/lib/lenex/parser/objects/split.rb +53 -0
- data/lib/lenex/parser/objects/swim_style.rb +58 -0
- data/lib/lenex/parser/objects/time_standard.rb +55 -0
- data/lib/lenex/parser/objects/time_standard_list.rb +98 -0
- data/lib/lenex/parser/objects/time_standard_ref.rb +63 -0
- data/lib/lenex/parser/objects.rb +52 -0
- data/lib/lenex/parser/sax/document_handler.rb +184 -0
- data/lib/lenex/parser/version.rb +8 -0
- data/lib/lenex/parser/zip_source.rb +111 -0
- data/lib/lenex/parser.rb +184 -0
- data/lib/lenex-parser.rb +16 -0
- metadata +132 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Lenex
|
|
4
|
+
module Parser
|
|
5
|
+
module Objects
|
|
6
|
+
# Value object representing a TIMESTANDARDLIST element.
|
|
7
|
+
class TimeStandardList
|
|
8
|
+
ATTRIBUTES = {
|
|
9
|
+
'course' => { key: :course, required: true },
|
|
10
|
+
'gender' => { key: :gender, required: true },
|
|
11
|
+
'handicap' => { key: :handicap, required: false },
|
|
12
|
+
'name' => { key: :name, required: true },
|
|
13
|
+
'timestandardlistid' => { key: :time_standard_list_id, required: true },
|
|
14
|
+
'type' => { key: :type, required: false }
|
|
15
|
+
}.freeze
|
|
16
|
+
|
|
17
|
+
ATTRIBUTE_KEYS = ATTRIBUTES.values.map { |definition| definition[:key] }.freeze
|
|
18
|
+
private_constant :ATTRIBUTE_KEYS
|
|
19
|
+
|
|
20
|
+
ATTRIBUTE_KEYS.each { |attribute| attr_reader attribute }
|
|
21
|
+
attr_reader :age_group, :time_standards
|
|
22
|
+
|
|
23
|
+
def initialize(age_group: nil, time_standards: [], **attributes)
|
|
24
|
+
ATTRIBUTES.each_value do |definition|
|
|
25
|
+
key = definition[:key]
|
|
26
|
+
instance_variable_set(:"@#{key}", attributes[key])
|
|
27
|
+
end
|
|
28
|
+
@age_group = age_group
|
|
29
|
+
@time_standards = Array(time_standards)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def self.from_xml(element)
|
|
33
|
+
raise ::Lenex::Parser::ParseError, 'TIMESTANDARDLIST element is required' unless element
|
|
34
|
+
|
|
35
|
+
attributes = extract_attributes(element)
|
|
36
|
+
ensure_required_attributes!(attributes)
|
|
37
|
+
|
|
38
|
+
age_group = age_group_from(element.at_xpath('AGEGROUP'))
|
|
39
|
+
time_standards = extract_time_standards(element.at_xpath('TIMESTANDARDS'))
|
|
40
|
+
|
|
41
|
+
new(**attributes, age_group:, time_standards:)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.extract_attributes(element)
|
|
45
|
+
ATTRIBUTES.each_with_object({}) do |(attribute_name, definition), collected|
|
|
46
|
+
value = element.attribute(attribute_name)&.value
|
|
47
|
+
collected[definition[:key]] = value if value
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
private_class_method :extract_attributes
|
|
51
|
+
|
|
52
|
+
def self.ensure_required_attributes!(attributes)
|
|
53
|
+
REQUIRED_ATTRIBUTE_KEYS.each do |key|
|
|
54
|
+
value = attributes[key]
|
|
55
|
+
next unless value.nil? || value.strip.empty?
|
|
56
|
+
|
|
57
|
+
message = "TIMESTANDARDLIST #{ATTRIBUTE_NAME_FOR.fetch(key)} attribute is required"
|
|
58
|
+
raise ::Lenex::Parser::ParseError, message
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
private_class_method :ensure_required_attributes!
|
|
62
|
+
|
|
63
|
+
REQUIRED_ATTRIBUTE_KEYS = %i[
|
|
64
|
+
course
|
|
65
|
+
gender
|
|
66
|
+
name
|
|
67
|
+
time_standard_list_id
|
|
68
|
+
].freeze
|
|
69
|
+
private_constant :REQUIRED_ATTRIBUTE_KEYS
|
|
70
|
+
|
|
71
|
+
ATTRIBUTE_NAME_FOR = ATTRIBUTES.each_with_object({}) do |attribute, mapping|
|
|
72
|
+
attribute_name, definition = attribute
|
|
73
|
+
mapping[definition[:key]] = attribute_name
|
|
74
|
+
end.freeze
|
|
75
|
+
private_constant :ATTRIBUTE_NAME_FOR
|
|
76
|
+
|
|
77
|
+
def self.age_group_from(element)
|
|
78
|
+
return unless element
|
|
79
|
+
|
|
80
|
+
AgeGroup.from_xml(element)
|
|
81
|
+
end
|
|
82
|
+
private_class_method :age_group_from
|
|
83
|
+
|
|
84
|
+
def self.extract_time_standards(collection_element)
|
|
85
|
+
unless collection_element
|
|
86
|
+
message = 'TIMESTANDARDLIST TIMESTANDARDS element is required'
|
|
87
|
+
raise ::Lenex::Parser::ParseError, message
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
collection_element.xpath('TIMESTANDARD').map do |time_standard_element|
|
|
91
|
+
TimeStandard.from_xml(time_standard_element)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
private_class_method :extract_time_standards
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Lenex
|
|
4
|
+
module Parser
|
|
5
|
+
module Objects
|
|
6
|
+
# Value object representing a TIMESTANDARDREF element.
|
|
7
|
+
class TimeStandardRef
|
|
8
|
+
ATTRIBUTES = {
|
|
9
|
+
'timestandardlistid' => { key: :time_standard_list_id, required: true },
|
|
10
|
+
'marker' => { key: :marker, required: false }
|
|
11
|
+
}.freeze
|
|
12
|
+
|
|
13
|
+
ATTRIBUTE_KEYS = ATTRIBUTES.values.map { |definition| definition[:key] }.freeze
|
|
14
|
+
private_constant :ATTRIBUTE_KEYS
|
|
15
|
+
|
|
16
|
+
ATTRIBUTE_KEYS.each { |attribute| attr_reader attribute }
|
|
17
|
+
attr_reader :fee
|
|
18
|
+
|
|
19
|
+
def initialize(fee: nil, **attributes)
|
|
20
|
+
ATTRIBUTES.each_value do |definition|
|
|
21
|
+
key = definition[:key]
|
|
22
|
+
instance_variable_set(:"@#{key}", attributes[key])
|
|
23
|
+
end
|
|
24
|
+
@fee = fee
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def self.from_xml(element)
|
|
28
|
+
raise ::Lenex::Parser::ParseError, 'TIMESTANDARDREF element is required' unless element
|
|
29
|
+
|
|
30
|
+
attributes = extract_attributes(element)
|
|
31
|
+
fee = fee_from(element.at_xpath('FEE'))
|
|
32
|
+
|
|
33
|
+
new(**attributes, fee:)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def self.extract_attributes(element)
|
|
37
|
+
ATTRIBUTES.each_with_object({}) do |(attribute_name, definition), collected|
|
|
38
|
+
value = element.attribute(attribute_name)&.value
|
|
39
|
+
ensure_required_attribute!(attribute_name, definition, value)
|
|
40
|
+
collected[definition[:key]] = value if value
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
private_class_method :extract_attributes
|
|
44
|
+
|
|
45
|
+
def self.ensure_required_attribute!(attribute_name, definition, value)
|
|
46
|
+
return unless definition[:required]
|
|
47
|
+
return unless value.nil? || value.strip.empty?
|
|
48
|
+
|
|
49
|
+
message = "TIMESTANDARDREF #{attribute_name} attribute is required"
|
|
50
|
+
raise ::Lenex::Parser::ParseError, message
|
|
51
|
+
end
|
|
52
|
+
private_class_method :ensure_required_attribute!
|
|
53
|
+
|
|
54
|
+
def self.fee_from(element)
|
|
55
|
+
return unless element
|
|
56
|
+
|
|
57
|
+
Fee.from_xml(element)
|
|
58
|
+
end
|
|
59
|
+
private_class_method :fee_from
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Lenex
|
|
4
|
+
module Parser
|
|
5
|
+
# Namespace for parser value objects.
|
|
6
|
+
module Objects
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
require_relative 'objects/age_date'
|
|
12
|
+
require_relative 'objects/age_group'
|
|
13
|
+
require_relative 'objects/athlete'
|
|
14
|
+
require_relative 'objects/bank'
|
|
15
|
+
require_relative 'objects/club'
|
|
16
|
+
require_relative 'objects/contact'
|
|
17
|
+
require_relative 'objects/constructor'
|
|
18
|
+
require_relative 'objects/entry'
|
|
19
|
+
require_relative 'objects/entry_schedule'
|
|
20
|
+
require_relative 'objects/event'
|
|
21
|
+
require_relative 'objects/facility'
|
|
22
|
+
require_relative 'objects/fee'
|
|
23
|
+
require_relative 'objects/fee_schedule'
|
|
24
|
+
require_relative 'objects/handicap'
|
|
25
|
+
require_relative 'objects/heat'
|
|
26
|
+
require_relative 'objects/host_club'
|
|
27
|
+
require_relative 'objects/judge'
|
|
28
|
+
require_relative 'objects/lenex'
|
|
29
|
+
require_relative 'objects/meet'
|
|
30
|
+
require_relative 'objects/meet_info'
|
|
31
|
+
require_relative 'objects/official'
|
|
32
|
+
require_relative 'objects/organizer'
|
|
33
|
+
require_relative 'objects/point_table'
|
|
34
|
+
require_relative 'objects/pool'
|
|
35
|
+
require_relative 'objects/qualify'
|
|
36
|
+
require_relative 'objects/ranking'
|
|
37
|
+
require_relative 'objects/record'
|
|
38
|
+
require_relative 'objects/record_athlete'
|
|
39
|
+
require_relative 'objects/record_list'
|
|
40
|
+
require_relative 'objects/record_relay'
|
|
41
|
+
require_relative 'objects/record_relay_position'
|
|
42
|
+
require_relative 'objects/relay'
|
|
43
|
+
require_relative 'objects/relay_entry'
|
|
44
|
+
require_relative 'objects/relay_position'
|
|
45
|
+
require_relative 'objects/relay_result'
|
|
46
|
+
require_relative 'objects/result'
|
|
47
|
+
require_relative 'objects/session'
|
|
48
|
+
require_relative 'objects/split'
|
|
49
|
+
require_relative 'objects/swim_style'
|
|
50
|
+
require_relative 'objects/time_standard'
|
|
51
|
+
require_relative 'objects/time_standard_list'
|
|
52
|
+
require_relative 'objects/time_standard_ref'
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require 'nokogiri'
|
|
5
|
+
|
|
6
|
+
module Lenex
|
|
7
|
+
module Parser
|
|
8
|
+
module Sax
|
|
9
|
+
# SAX document handler that streams Lenex XML into a {Lenex::Document}.
|
|
10
|
+
class DocumentHandler < Nokogiri::XML::SAX::Document
|
|
11
|
+
CAPTURED_ELEMENTS = {
|
|
12
|
+
'CONSTRUCTOR' => lambda do |element, document|
|
|
13
|
+
document.constructor = Objects::Constructor.from_xml(element)
|
|
14
|
+
end,
|
|
15
|
+
'MEET' => lambda do |element, document|
|
|
16
|
+
document.add_meet(Objects::Meet.from_xml(element))
|
|
17
|
+
end,
|
|
18
|
+
'RECORDLIST' => lambda do |element, document|
|
|
19
|
+
document.add_record_list(Objects::RecordList.from_xml(element))
|
|
20
|
+
end,
|
|
21
|
+
'TIMESTANDARDLIST' => lambda do |element, document|
|
|
22
|
+
document.add_time_standard_list(Objects::TimeStandardList.from_xml(element))
|
|
23
|
+
end
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
def initialize(document)
|
|
27
|
+
super()
|
|
28
|
+
@document = document
|
|
29
|
+
@capture = nil
|
|
30
|
+
@root_encountered = false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def start_document
|
|
34
|
+
@capture = nil
|
|
35
|
+
@root_encountered = false
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def start_element(name, attrs = [])
|
|
39
|
+
handle_root(name, attrs)
|
|
40
|
+
append_start_tag(name, attrs)
|
|
41
|
+
start_capture(name, attrs) if CAPTURED_ELEMENTS.key?(name)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def characters(string)
|
|
45
|
+
append_text(string)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def cdata_block(string)
|
|
49
|
+
append_cdata(string)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def end_element(name)
|
|
53
|
+
finalize_capture(name)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def end_document
|
|
57
|
+
ensure_root_present!
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
attr_reader :document
|
|
63
|
+
|
|
64
|
+
def handle_root(name, attrs)
|
|
65
|
+
return if @root_encountered
|
|
66
|
+
|
|
67
|
+
raise ParseError, 'Root element must be LENEX' unless name == 'LENEX'
|
|
68
|
+
|
|
69
|
+
@root_encountered = true
|
|
70
|
+
attributes = attributes_from(attrs)
|
|
71
|
+
version = attributes['version']
|
|
72
|
+
|
|
73
|
+
if version.nil? || version.strip.empty?
|
|
74
|
+
raise ParseError, 'LENEX version attribute is required'
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
document.version = version
|
|
78
|
+
document.revision = attributes['revision'] if attributes.key?('revision')
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def append_start_tag(name, attrs)
|
|
82
|
+
@capture&.start_tag(name, attrs)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def append_text(string)
|
|
86
|
+
return if string.nil? || string.empty?
|
|
87
|
+
|
|
88
|
+
@capture&.append_text(string)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def append_cdata(string)
|
|
92
|
+
return if string.nil?
|
|
93
|
+
|
|
94
|
+
@capture&.append_cdata(string)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def start_capture(name, attrs)
|
|
98
|
+
return if @capture
|
|
99
|
+
|
|
100
|
+
@capture = Capture.new(name)
|
|
101
|
+
@capture.start_tag(name, attrs)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def finalize_capture(name)
|
|
105
|
+
return unless @capture
|
|
106
|
+
|
|
107
|
+
@capture.end_tag(name)
|
|
108
|
+
return unless @capture.complete?
|
|
109
|
+
|
|
110
|
+
emit_capture(@capture)
|
|
111
|
+
@capture = nil
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def ensure_root_present!
|
|
115
|
+
return if @root_encountered
|
|
116
|
+
|
|
117
|
+
raise ParseError, 'Root element must be LENEX'
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def emit_capture(capture)
|
|
121
|
+
element = Nokogiri::XML::Document.parse(capture.to_xml) do |config|
|
|
122
|
+
config.strict.noblanks
|
|
123
|
+
end.root
|
|
124
|
+
|
|
125
|
+
handler = CAPTURED_ELEMENTS.fetch(capture.name)
|
|
126
|
+
handler.call(element, document)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def attributes_from(attrs)
|
|
130
|
+
attrs.each_with_object({}) do |(key, value), collected|
|
|
131
|
+
collected[key] = value
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Simple builder for captured subtrees.
|
|
136
|
+
class Capture
|
|
137
|
+
attr_reader :name
|
|
138
|
+
|
|
139
|
+
def initialize(name)
|
|
140
|
+
@name = name
|
|
141
|
+
@buffer = +''
|
|
142
|
+
@depth = 0
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def start_tag(name, attrs)
|
|
146
|
+
@buffer << '<' << name
|
|
147
|
+
attrs.each do |attr_name, attr_value|
|
|
148
|
+
@buffer << ' ' << attr_name << '="' << escape_attribute(attr_value) << '"'
|
|
149
|
+
end
|
|
150
|
+
@buffer << '>'
|
|
151
|
+
@depth += 1
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def end_tag(name)
|
|
155
|
+
@buffer << '</' << name << '>'
|
|
156
|
+
@depth -= 1
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def append_text(string)
|
|
160
|
+
@buffer << CGI.escapeHTML(string)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def append_cdata(string)
|
|
164
|
+
@buffer << '<![CDATA[' << string << ']]>'
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def complete?
|
|
168
|
+
@depth.zero?
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def to_xml
|
|
172
|
+
@buffer
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
private
|
|
176
|
+
|
|
177
|
+
def escape_attribute(value)
|
|
178
|
+
CGI.escapeHTML(value.to_s)
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'stringio'
|
|
4
|
+
|
|
5
|
+
module Lenex
|
|
6
|
+
module Parser
|
|
7
|
+
# Utility helpers for reading XML payloads embedded in ZIP archives.
|
|
8
|
+
module ZipSource
|
|
9
|
+
extend self
|
|
10
|
+
|
|
11
|
+
SIGNATURE = "PK\x03\x04".b
|
|
12
|
+
INSTALL_MESSAGE = 'ZIP archives require the rubyzip gem. ' \
|
|
13
|
+
'Install it with `gem install rubyzip` and try again.'
|
|
14
|
+
MISSING_XML_MESSAGE = 'Lenex archive does not contain a .lef or .xml payload'
|
|
15
|
+
|
|
16
|
+
def extract(io)
|
|
17
|
+
ensure_rubyzip!
|
|
18
|
+
|
|
19
|
+
payload = xml_payload_from(io)
|
|
20
|
+
return build_io(payload) if payload
|
|
21
|
+
|
|
22
|
+
raise Lenex::Parser::Error, MISSING_XML_MESSAGE
|
|
23
|
+
rescue Zip::Error => e
|
|
24
|
+
raise Lenex::Parser::Error, "Unable to read Lenex archive: #{e.message}"
|
|
25
|
+
ensure
|
|
26
|
+
reset_io(io)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def ensure_rubyzip!
|
|
32
|
+
require 'zip'
|
|
33
|
+
rescue LoadError
|
|
34
|
+
raise Lenex::Parser::Error, INSTALL_MESSAGE
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def xml_payload_from(io)
|
|
38
|
+
reset_io(io)
|
|
39
|
+
payload = read_with_input_stream(io)
|
|
40
|
+
return payload if payload
|
|
41
|
+
|
|
42
|
+
reset_io(io)
|
|
43
|
+
read_with_file(io)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def read_with_input_stream(io)
|
|
47
|
+
Zip::InputStream.open(io) do |zip|
|
|
48
|
+
while (entry = zip.get_next_entry)
|
|
49
|
+
next unless xml_entry?(entry)
|
|
50
|
+
|
|
51
|
+
return zip.read
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def read_with_file(io)
|
|
58
|
+
data = buffered_zip_data(io)
|
|
59
|
+
return if data.empty?
|
|
60
|
+
|
|
61
|
+
Zip::File.open_buffer(data) do |zip|
|
|
62
|
+
zip.each do |entry|
|
|
63
|
+
next unless xml_entry?(entry)
|
|
64
|
+
|
|
65
|
+
return entry.get_input_stream.read
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
nil
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def buffered_zip_data(io)
|
|
72
|
+
raw_data = io.read
|
|
73
|
+
return '' if raw_data.nil? || raw_data.empty?
|
|
74
|
+
|
|
75
|
+
raw_data.dup.tap { |buffer| buffer.force_encoding(Encoding::BINARY) }
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def xml_entry?(entry)
|
|
79
|
+
return false if entry.nil?
|
|
80
|
+
return false if entry.respond_to?(:directory?) && entry.directory?
|
|
81
|
+
|
|
82
|
+
name = entry.name
|
|
83
|
+
return false if name.nil? || name.empty?
|
|
84
|
+
|
|
85
|
+
name.downcase.end_with?('.lef', '.xml')
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def build_io(payload)
|
|
89
|
+
if payload.nil? || payload.empty?
|
|
90
|
+
raise Lenex::Parser::Error, 'Lenex archive is missing XML payload'
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
binary_payload = payload.dup
|
|
94
|
+
binary_payload.force_encoding(Encoding::BINARY)
|
|
95
|
+
|
|
96
|
+
StringIO.new(binary_payload).tap do |xml_io|
|
|
97
|
+
xml_io.binmode if xml_io.respond_to?(:binmode)
|
|
98
|
+
xml_io.rewind if xml_io.respond_to?(:rewind)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def reset_io(io)
|
|
103
|
+
return unless io.respond_to?(:rewind)
|
|
104
|
+
|
|
105
|
+
io.rewind
|
|
106
|
+
rescue IOError
|
|
107
|
+
nil
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
data/lib/lenex/parser.rb
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'nokogiri'
|
|
4
|
+
require 'stringio'
|
|
5
|
+
|
|
6
|
+
require_relative 'parser/version'
|
|
7
|
+
require_relative 'parser/objects'
|
|
8
|
+
require_relative 'parser/zip_source'
|
|
9
|
+
require_relative 'parser/sax/document_handler'
|
|
10
|
+
|
|
11
|
+
# Namespace for Lenex parsing functionality and data structures.
|
|
12
|
+
module Lenex
|
|
13
|
+
# Lenex namespace for parser functionality.
|
|
14
|
+
module Parser
|
|
15
|
+
# Base error class for all parser-specific failures.
|
|
16
|
+
class Error < StandardError; end
|
|
17
|
+
|
|
18
|
+
# Error raised when the parser encounters invalid Lenex XML input.
|
|
19
|
+
class ParseError < Error; end
|
|
20
|
+
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
# Parses a Lenex XML document and returns an object model representing
|
|
24
|
+
# the LENEX root node. Accepts an IO-like object or a string containing
|
|
25
|
+
# the XML payload.
|
|
26
|
+
#
|
|
27
|
+
# @param source [#read, String] XML source to parse
|
|
28
|
+
# @return [Lenex::Parser::Objects::Lenex]
|
|
29
|
+
# @raise [Lenex::Parser::ParseError] when the payload is invalid
|
|
30
|
+
def parse(source)
|
|
31
|
+
io = ensure_io(source)
|
|
32
|
+
document = ::Lenex::Document.new
|
|
33
|
+
handler = Sax::DocumentHandler.new(document)
|
|
34
|
+
parser = Nokogiri::XML::SAX::Parser.new(handler)
|
|
35
|
+
|
|
36
|
+
parser.parse(io)
|
|
37
|
+
document.build_lenex
|
|
38
|
+
rescue ParseError
|
|
39
|
+
raise
|
|
40
|
+
rescue Nokogiri::XML::SyntaxError => e
|
|
41
|
+
raise ParseError, e.message
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Normalizes the provided source so Nokogiri can consume it as an IO.
|
|
45
|
+
#
|
|
46
|
+
# @param source [#read, String]
|
|
47
|
+
# @return [#read] an IO-like object ready for Nokogiri
|
|
48
|
+
def ensure_io(source)
|
|
49
|
+
io = normalize_source(source)
|
|
50
|
+
|
|
51
|
+
return ZipSource.extract(io) if zip_archive?(io)
|
|
52
|
+
|
|
53
|
+
io
|
|
54
|
+
end
|
|
55
|
+
private_class_method :ensure_io
|
|
56
|
+
|
|
57
|
+
def normalize_source(source)
|
|
58
|
+
io = if source.respond_to?(:read)
|
|
59
|
+
ensure_binmode(source)
|
|
60
|
+
elsif path_argument?(source)
|
|
61
|
+
open_path(source)
|
|
62
|
+
else
|
|
63
|
+
string_io_for(source)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
ensure_rewindable_io(io)
|
|
67
|
+
end
|
|
68
|
+
private_class_method :normalize_source
|
|
69
|
+
|
|
70
|
+
def path_argument?(source)
|
|
71
|
+
return false unless path_like?(source)
|
|
72
|
+
return false if SourceClassifier.xml_payload?(source)
|
|
73
|
+
return false if SourceClassifier.zip_payload?(source)
|
|
74
|
+
|
|
75
|
+
true
|
|
76
|
+
end
|
|
77
|
+
private_class_method :path_argument?
|
|
78
|
+
|
|
79
|
+
def ensure_binmode(stream)
|
|
80
|
+
stream.tap { |io| io.binmode if io.respond_to?(:binmode) }
|
|
81
|
+
end
|
|
82
|
+
private_class_method :ensure_binmode
|
|
83
|
+
|
|
84
|
+
def path_like?(source)
|
|
85
|
+
path = extract_path(source)
|
|
86
|
+
return false unless path
|
|
87
|
+
|
|
88
|
+
::File.file?(path) && ::File.readable?(path)
|
|
89
|
+
rescue TypeError
|
|
90
|
+
false
|
|
91
|
+
end
|
|
92
|
+
private_class_method :path_like?
|
|
93
|
+
|
|
94
|
+
def open_path(source)
|
|
95
|
+
path = extract_path(source)
|
|
96
|
+
::File.open(path, 'rb')
|
|
97
|
+
end
|
|
98
|
+
private_class_method :open_path
|
|
99
|
+
|
|
100
|
+
def string_io_for(source)
|
|
101
|
+
StringIO.new(String(source)).tap do |string_io|
|
|
102
|
+
string_io.binmode if string_io.respond_to?(:binmode)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
private_class_method :string_io_for
|
|
106
|
+
|
|
107
|
+
def extract_path(source)
|
|
108
|
+
path = if source.respond_to?(:to_path)
|
|
109
|
+
source.to_path
|
|
110
|
+
elsif source.is_a?(String)
|
|
111
|
+
source
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
return unless path
|
|
115
|
+
return if path.include?("\0")
|
|
116
|
+
|
|
117
|
+
path
|
|
118
|
+
end
|
|
119
|
+
private_class_method :extract_path
|
|
120
|
+
|
|
121
|
+
def zip_archive?(io)
|
|
122
|
+
read_signature(io) == ZipSource::SIGNATURE
|
|
123
|
+
end
|
|
124
|
+
private_class_method :zip_archive?
|
|
125
|
+
|
|
126
|
+
def read_signature(io)
|
|
127
|
+
signature = (io.read(ZipSource::SIGNATURE.length) || '').b
|
|
128
|
+
io.rewind
|
|
129
|
+
signature
|
|
130
|
+
end
|
|
131
|
+
private_class_method :read_signature
|
|
132
|
+
|
|
133
|
+
def ensure_rewindable_io(io)
|
|
134
|
+
return io if io.respond_to?(:rewind)
|
|
135
|
+
|
|
136
|
+
buffered = +''
|
|
137
|
+
while (chunk = io.read(4_096))
|
|
138
|
+
buffered << chunk
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
StringIO.new(buffered).tap do |buffered_io|
|
|
142
|
+
buffered_io.binmode if buffered_io.respond_to?(:binmode)
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
private_class_method :ensure_rewindable_io
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
module Lenex
|
|
150
|
+
module Parser
|
|
151
|
+
# Internal heuristics for deciding whether a value is a filesystem path or
|
|
152
|
+
# inline XML/ZIP payload.
|
|
153
|
+
module SourceClassifier
|
|
154
|
+
module_function
|
|
155
|
+
|
|
156
|
+
def xml_payload?(source)
|
|
157
|
+
payload = String(source)
|
|
158
|
+
bytes = payload.b
|
|
159
|
+
bytes = strip_utf8_bom(bytes)
|
|
160
|
+
stripped = bytes.lstrip
|
|
161
|
+
|
|
162
|
+
stripped.start_with?('<')
|
|
163
|
+
rescue Encoding::CompatibilityError, TypeError
|
|
164
|
+
false
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def zip_payload?(source)
|
|
168
|
+
bytes = String(source).b
|
|
169
|
+
|
|
170
|
+
bytes.start_with?(ZipSource::SIGNATURE)
|
|
171
|
+
rescue Encoding::CompatibilityError, TypeError
|
|
172
|
+
false
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def strip_utf8_bom(bytes)
|
|
176
|
+
return bytes unless bytes.start_with?("\xEF\xBB\xBF".b)
|
|
177
|
+
|
|
178
|
+
bytes.byteslice(3, bytes.bytesize - 3) || ''.b
|
|
179
|
+
end
|
|
180
|
+
module_function :strip_utf8_bom
|
|
181
|
+
private_class_method :strip_utf8_bom
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|