relaton-oasis 1.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/rake.yml +36 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yml +12 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +225 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/grammars/basicdoc.rng +1150 -0
- data/grammars/biblio.rng +1238 -0
- data/grammars/isodoc.rng +2634 -0
- data/grammars/reqt.rng +223 -0
- data/lib/relaton_oasis/data_fetcher.rb +91 -0
- data/lib/relaton_oasis/data_parser.rb +204 -0
- data/lib/relaton_oasis/data_parser_utils.rb +89 -0
- data/lib/relaton_oasis/data_part_parser.rb +131 -0
- data/lib/relaton_oasis/hash_converter.rb +15 -0
- data/lib/relaton_oasis/oasis_bibliographic_item.rb +81 -0
- data/lib/relaton_oasis/oasis_bibliography.rb +43 -0
- data/lib/relaton_oasis/processor.rb +51 -0
- data/lib/relaton_oasis/version.rb +5 -0
- data/lib/relaton_oasis/xml_parser.rb +25 -0
- data/lib/relaton_oasis.rb +27 -0
- data/relaton_oasis.gemspec +48 -0
- metadata +214 -0
data/grammars/reqt.rng
ADDED
@@ -0,0 +1,223 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
5
|
+
include "isodoc.rnc" { }
|
6
|
+
-->
|
7
|
+
<define name="requirement">
|
8
|
+
<element name="requirement">
|
9
|
+
<ref name="RequirementType"/>
|
10
|
+
</element>
|
11
|
+
</define>
|
12
|
+
<define name="recommendation">
|
13
|
+
<element name="recommendation">
|
14
|
+
<ref name="RequirementType"/>
|
15
|
+
</element>
|
16
|
+
</define>
|
17
|
+
<define name="permission">
|
18
|
+
<element name="permission">
|
19
|
+
<ref name="RequirementType"/>
|
20
|
+
</element>
|
21
|
+
</define>
|
22
|
+
<define name="RequirementType">
|
23
|
+
<optional>
|
24
|
+
<attribute name="obligation">
|
25
|
+
<ref name="ObligationType"/>
|
26
|
+
</attribute>
|
27
|
+
</optional>
|
28
|
+
<optional>
|
29
|
+
<attribute name="unnumbered">
|
30
|
+
<data type="boolean"/>
|
31
|
+
</attribute>
|
32
|
+
</optional>
|
33
|
+
<optional>
|
34
|
+
<attribute name="number"/>
|
35
|
+
</optional>
|
36
|
+
<optional>
|
37
|
+
<attribute name="subsequence"/>
|
38
|
+
</optional>
|
39
|
+
<optional>
|
40
|
+
<attribute name="keep-with-next">
|
41
|
+
<data type="boolean"/>
|
42
|
+
</attribute>
|
43
|
+
</optional>
|
44
|
+
<optional>
|
45
|
+
<attribute name="keep-lines-together">
|
46
|
+
<data type="boolean"/>
|
47
|
+
</attribute>
|
48
|
+
</optional>
|
49
|
+
<attribute name="id">
|
50
|
+
<data type="ID"/>
|
51
|
+
</attribute>
|
52
|
+
<optional>
|
53
|
+
<attribute name="filename"/>
|
54
|
+
</optional>
|
55
|
+
<optional>
|
56
|
+
<attribute name="model"/>
|
57
|
+
</optional>
|
58
|
+
<optional>
|
59
|
+
<attribute name="type"/>
|
60
|
+
</optional>
|
61
|
+
<optional>
|
62
|
+
<attribute name="tag"/>
|
63
|
+
</optional>
|
64
|
+
<optional>
|
65
|
+
<attribute name="multilingual-rendering">
|
66
|
+
<ref name="MultilingualRenderingType"/>
|
67
|
+
</attribute>
|
68
|
+
</optional>
|
69
|
+
<optional>
|
70
|
+
<ref name="reqtitle"/>
|
71
|
+
</optional>
|
72
|
+
<optional>
|
73
|
+
<ref name="label"/>
|
74
|
+
</optional>
|
75
|
+
<zeroOrMore>
|
76
|
+
<ref name="subject"/>
|
77
|
+
</zeroOrMore>
|
78
|
+
<zeroOrMore>
|
79
|
+
<ref name="reqinherit"/>
|
80
|
+
</zeroOrMore>
|
81
|
+
<zeroOrMore>
|
82
|
+
<ref name="classification"/>
|
83
|
+
</zeroOrMore>
|
84
|
+
<zeroOrMore>
|
85
|
+
<choice>
|
86
|
+
<ref name="measurementtarget"/>
|
87
|
+
<ref name="specification"/>
|
88
|
+
<ref name="verification"/>
|
89
|
+
<ref name="import"/>
|
90
|
+
<ref name="description"/>
|
91
|
+
<ref name="component"/>
|
92
|
+
</choice>
|
93
|
+
</zeroOrMore>
|
94
|
+
<optional>
|
95
|
+
<ref name="reqt_references"/>
|
96
|
+
</optional>
|
97
|
+
<zeroOrMore>
|
98
|
+
<choice>
|
99
|
+
<ref name="requirement"/>
|
100
|
+
<ref name="recommendation"/>
|
101
|
+
<ref name="permission"/>
|
102
|
+
</choice>
|
103
|
+
</zeroOrMore>
|
104
|
+
</define>
|
105
|
+
<define name="reqtitle">
|
106
|
+
<element name="title">
|
107
|
+
<ref name="FormattedString"/>
|
108
|
+
</element>
|
109
|
+
</define>
|
110
|
+
<define name="label">
|
111
|
+
<element name="label">
|
112
|
+
<oneOrMore>
|
113
|
+
<ref name="TextElement"/>
|
114
|
+
</oneOrMore>
|
115
|
+
</element>
|
116
|
+
</define>
|
117
|
+
<define name="subject">
|
118
|
+
<element name="subject">
|
119
|
+
<oneOrMore>
|
120
|
+
<ref name="TextElement"/>
|
121
|
+
</oneOrMore>
|
122
|
+
</element>
|
123
|
+
</define>
|
124
|
+
<define name="reqinherit">
|
125
|
+
<element name="inherit">
|
126
|
+
<oneOrMore>
|
127
|
+
<ref name="TextElement"/>
|
128
|
+
</oneOrMore>
|
129
|
+
</element>
|
130
|
+
</define>
|
131
|
+
<define name="measurementtarget">
|
132
|
+
<element name="measurement-target">
|
133
|
+
<ref name="RequirementSubpart"/>
|
134
|
+
</element>
|
135
|
+
</define>
|
136
|
+
<define name="specification">
|
137
|
+
<element name="specification">
|
138
|
+
<ref name="RequirementSubpart"/>
|
139
|
+
</element>
|
140
|
+
</define>
|
141
|
+
<define name="verification">
|
142
|
+
<element name="verification">
|
143
|
+
<ref name="RequirementSubpart"/>
|
144
|
+
</element>
|
145
|
+
</define>
|
146
|
+
<define name="import">
|
147
|
+
<element name="import">
|
148
|
+
<ref name="RequirementSubpart"/>
|
149
|
+
</element>
|
150
|
+
</define>
|
151
|
+
<define name="description">
|
152
|
+
<element name="description">
|
153
|
+
<ref name="RequirementSubpart"/>
|
154
|
+
</element>
|
155
|
+
</define>
|
156
|
+
<define name="component">
|
157
|
+
<element name="component">
|
158
|
+
<attribute name="class"/>
|
159
|
+
<ref name="RequirementSubpart"/>
|
160
|
+
</element>
|
161
|
+
</define>
|
162
|
+
<define name="reqt_references">
|
163
|
+
<element name="references">
|
164
|
+
<oneOrMore>
|
165
|
+
<ref name="bibitem"/>
|
166
|
+
</oneOrMore>
|
167
|
+
</element>
|
168
|
+
</define>
|
169
|
+
<define name="RequirementSubpart">
|
170
|
+
<optional>
|
171
|
+
<attribute name="type"/>
|
172
|
+
</optional>
|
173
|
+
<optional>
|
174
|
+
<attribute name="exclude">
|
175
|
+
<data type="boolean"/>
|
176
|
+
</attribute>
|
177
|
+
</optional>
|
178
|
+
<optional>
|
179
|
+
<attribute name="keep-with-next">
|
180
|
+
<data type="boolean"/>
|
181
|
+
</attribute>
|
182
|
+
</optional>
|
183
|
+
<optional>
|
184
|
+
<attribute name="keep-lines-together">
|
185
|
+
<data type="boolean"/>
|
186
|
+
</attribute>
|
187
|
+
</optional>
|
188
|
+
<optional>
|
189
|
+
<attribute name="tag"/>
|
190
|
+
</optional>
|
191
|
+
<optional>
|
192
|
+
<attribute name="multilingual-rendering">
|
193
|
+
<ref name="MultilingualRenderingType"/>
|
194
|
+
</attribute>
|
195
|
+
</optional>
|
196
|
+
<oneOrMore>
|
197
|
+
<ref name="BasicBlock"/>
|
198
|
+
</oneOrMore>
|
199
|
+
</define>
|
200
|
+
<define name="ObligationType">
|
201
|
+
<choice>
|
202
|
+
<value>requirement</value>
|
203
|
+
<value>recommendation</value>
|
204
|
+
<value>permission</value>
|
205
|
+
</choice>
|
206
|
+
</define>
|
207
|
+
<define name="classification">
|
208
|
+
<element name="classification">
|
209
|
+
<ref name="classification_tag"/>
|
210
|
+
<ref name="classification_value"/>
|
211
|
+
</element>
|
212
|
+
</define>
|
213
|
+
<define name="classification_tag">
|
214
|
+
<element name="tag">
|
215
|
+
<text/>
|
216
|
+
</element>
|
217
|
+
</define>
|
218
|
+
<define name="classification_value">
|
219
|
+
<element name="value">
|
220
|
+
<text/>
|
221
|
+
</element>
|
222
|
+
</define>
|
223
|
+
</grammar>
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module RelatonOasis
|
2
|
+
class DataFetcher
|
3
|
+
#
|
4
|
+
# Initialize a new DataFetcher
|
5
|
+
#
|
6
|
+
# @param [Strin] output directory to save files, default: "data"
|
7
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml); default: yaml
|
8
|
+
#
|
9
|
+
def initialize(output, format)
|
10
|
+
@output = output
|
11
|
+
@format = format
|
12
|
+
@ext = @format.sub(/^bib|^rfc/, "")
|
13
|
+
@files = []
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# Initialize fetcher and run fetch
|
18
|
+
#
|
19
|
+
# @param [Strin] output directory to save files, default: "data"
|
20
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml); default: yaml
|
21
|
+
#
|
22
|
+
def self.fetch(output: "data", format: "yaml")
|
23
|
+
t1 = Time.now
|
24
|
+
puts "Started at: #{t1}"
|
25
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
26
|
+
new(output, format).fetch
|
27
|
+
t2 = Time.now
|
28
|
+
puts "Stopped at: #{t2}"
|
29
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Fetch and save all the documents from OASIS
|
34
|
+
#
|
35
|
+
def fetch
|
36
|
+
agent = Mechanize.new
|
37
|
+
resp = agent.get "https://www.oasis-open.org/standards/"
|
38
|
+
doc = Nokogiri::HTML resp.body
|
39
|
+
doc.xpath("//details").map do |item|
|
40
|
+
save_doc DataParser.new(item).parse
|
41
|
+
fetch_parts item
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# Fetch and save parts of document
|
47
|
+
#
|
48
|
+
# @param [Nokogiri::HTML::Element] item document node
|
49
|
+
#
|
50
|
+
def fetch_parts(item)
|
51
|
+
parts = item.xpath("./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[strong or span/strong]")
|
52
|
+
return unless parts.size > 1
|
53
|
+
|
54
|
+
parts.each do |part|
|
55
|
+
save_doc DataPartParser.new(part).parse
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
#
|
60
|
+
# Save document to file
|
61
|
+
#
|
62
|
+
# @param [RelatonOasis::OasisBibliographicItem] doc
|
63
|
+
#
|
64
|
+
def save_doc(doc) # rubocop:disable Metrics/MethodLength
|
65
|
+
c = case @format
|
66
|
+
when "xml" then doc.to_xml(bibdata: true)
|
67
|
+
when "yaml" then doc.to_hash.to_yaml
|
68
|
+
else doc.send("to_#{@format}")
|
69
|
+
end
|
70
|
+
file = file_name doc
|
71
|
+
if @files.include? file
|
72
|
+
warn "File #{file} already exists. Document: #{doc.docnumber}"
|
73
|
+
else
|
74
|
+
@files << file
|
75
|
+
end
|
76
|
+
File.write file, c, encoding: "UTF-8"
|
77
|
+
end
|
78
|
+
|
79
|
+
#
|
80
|
+
# Generate file name
|
81
|
+
#
|
82
|
+
# @param [RelatonOasis::OasisBibliographicItem] doc
|
83
|
+
#
|
84
|
+
# @return [String] file name
|
85
|
+
#
|
86
|
+
def file_name(doc)
|
87
|
+
name = doc.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
88
|
+
File.join @output, "#{name}.#{@ext}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,204 @@
|
|
1
|
+
module RelatonOasis
|
2
|
+
class DataParser
|
3
|
+
include RelatonOasis::DataParserUtils
|
4
|
+
|
5
|
+
#
|
6
|
+
# Initialize parser.
|
7
|
+
#
|
8
|
+
# @param [Nokogiri::HTML::Element] node docment node
|
9
|
+
#
|
10
|
+
def initialize(node)
|
11
|
+
@node = node
|
12
|
+
@title = @node.at("./summary/div/h2").text
|
13
|
+
@text = @node.at(
|
14
|
+
"./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[em or i or a or span]",
|
15
|
+
)&.text
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Parse document.
|
20
|
+
#
|
21
|
+
# @return [RelatonOasis::OasisBibliographicItem] bibliographic item
|
22
|
+
#
|
23
|
+
def parse # rubocop:disable Metrics/MethodLength
|
24
|
+
RelatonOasis::OasisBibliographicItem.new(
|
25
|
+
fetched: Date.today.to_s,
|
26
|
+
type: "standard",
|
27
|
+
doctype: parse_doctype,
|
28
|
+
title: parse_title,
|
29
|
+
docid: parse_docid,
|
30
|
+
docnumber: parse_docnumber,
|
31
|
+
date: parse_date,
|
32
|
+
abstract: parse_abstract,
|
33
|
+
language: ["en"],
|
34
|
+
script: ["Latn"],
|
35
|
+
editorialgroup: parse_editorialgroup,
|
36
|
+
relation: parse_relation,
|
37
|
+
technology_area: parse_technology_area,
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
#
|
42
|
+
# Parse title.
|
43
|
+
#
|
44
|
+
# @return [Array<RelatonBib::TypedTitleString>] <description>
|
45
|
+
#
|
46
|
+
def parse_title
|
47
|
+
[RelatonBib::TypedTitleString.new(type: "main", content: @title, language: "en", script: "Latn")]
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Parse date.
|
52
|
+
#
|
53
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
54
|
+
#
|
55
|
+
def parse_date
|
56
|
+
@node.xpath("./summary/div/time[@class='standard__date']").map do |d|
|
57
|
+
date_str = d.text.match(/\d{2}\s\w+\s\d{4}/).to_s
|
58
|
+
date = Date.parse(date_str).to_s
|
59
|
+
RelatonBib::BibliographicDate.new(on: date, type: "issued")
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Parse abstract.
|
65
|
+
#
|
66
|
+
# @return [Array<RelatonBib::FormattedString>] abstract
|
67
|
+
#
|
68
|
+
def parse_abstract
|
69
|
+
c = @node.xpath(
|
70
|
+
"./summary/div/div[@class='standard__description']/p",
|
71
|
+
).map { |a| a.text.gsub(/[\n\t]+/, " ").strip }.join("\n")
|
72
|
+
return [] if c.empty?
|
73
|
+
|
74
|
+
[RelatonBib::FormattedString.new(content: c, language: "en", script: "Latn")]
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Parse technical committee.
|
79
|
+
#
|
80
|
+
# @return [RelatonBib::EditorialGroup] technical committee
|
81
|
+
#
|
82
|
+
def parse_editorialgroup
|
83
|
+
tc = @node.xpath("./div[@class='standard__details']/a").map do |a|
|
84
|
+
wg = RelatonBib::WorkGroup.new name: a.text.strip
|
85
|
+
RelatonBib::TechnicalCommittee.new wg
|
86
|
+
end
|
87
|
+
RelatonBib::EditorialGroup.new tc
|
88
|
+
end
|
89
|
+
|
90
|
+
#
|
91
|
+
# Parse relation.
|
92
|
+
#
|
93
|
+
# @return [Array<RelatonBib::DocumentRelation>] relation
|
94
|
+
#
|
95
|
+
def parse_relation
|
96
|
+
rels = @node.xpath(
|
97
|
+
"./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[strong or span/strong or b/span]",
|
98
|
+
)
|
99
|
+
return [] unless rels.size > 1
|
100
|
+
|
101
|
+
rels.map do |r|
|
102
|
+
docid = DataPartParser.new(r).parse_docid
|
103
|
+
fref = RelatonBib::FormattedRef.new content: docid[0].id
|
104
|
+
bib = RelatonOasis::OasisBibliographicItem.new formattedref: fref
|
105
|
+
RelatonBib::DocumentRelation.new type: "hasPart", bibitem: bib
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
#
|
110
|
+
# Parse document part references.
|
111
|
+
#
|
112
|
+
# @return [Array<String>] document part references
|
113
|
+
#
|
114
|
+
def document_part_refs
|
115
|
+
@node.css(
|
116
|
+
".standard__grid--cite-as > p > strong",
|
117
|
+
"span.Refterm", "span.abbrev", "span.citationLabel > strong"
|
118
|
+
).map { |p| p.text.gsub(/^\[{1,2}|\]$/, "").strip }
|
119
|
+
end
|
120
|
+
|
121
|
+
#
|
122
|
+
# Parse document number.
|
123
|
+
#
|
124
|
+
# @return [String] document number
|
125
|
+
#
|
126
|
+
def parse_docnumber
|
127
|
+
parts = document_part_refs
|
128
|
+
case parts.size
|
129
|
+
when 0 then title_to_docid @node.at("./summary/div/h2").text
|
130
|
+
when 1 then parse_spec(parts[0])
|
131
|
+
else parts_to_docid parts
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
#
|
136
|
+
# Create document identifier from parts references.
|
137
|
+
#
|
138
|
+
# @param [Array<String>] parts parts references
|
139
|
+
#
|
140
|
+
# @return [String] document identifier
|
141
|
+
#
|
142
|
+
def parts_to_docid(parts)
|
143
|
+
id = parts[1..-1].each_with_object(parts[0].split("-")) do |part, acc|
|
144
|
+
chunks = part.split "-"
|
145
|
+
chunks.each.with_index do |chunk, idx|
|
146
|
+
unless chunk.casecmp(acc[idx])&.zero?
|
147
|
+
acc.slice!(idx..-1)
|
148
|
+
break
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end.join("-")
|
152
|
+
parse_spec(id)
|
153
|
+
end
|
154
|
+
|
155
|
+
#
|
156
|
+
# Create document identifier from title.
|
157
|
+
#
|
158
|
+
# @param [String] title title
|
159
|
+
#
|
160
|
+
# @return [String] document identifier
|
161
|
+
#
|
162
|
+
def title_to_docid(title) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
163
|
+
abbrs = title.scan(/(?<=\()[^)]+(?=\))/)
|
164
|
+
if abbrs.any?
|
165
|
+
id = abbrs.map { |abbr| abbr.split.join("-") }.join "-"
|
166
|
+
/(?:Version\s|v)(?<ver>[\d.]+)/ =~ title
|
167
|
+
id += "-v#{ver}" if ver
|
168
|
+
/(?<eb>ebXML|ebMS)/ =~ title
|
169
|
+
id = "#{eb}-#{id}" if eb
|
170
|
+
id
|
171
|
+
else
|
172
|
+
series_end = false
|
173
|
+
title.sub(/\s\[OASIS\s\d+\]$/, "").split(/[,:]?\s|-|(?<=[a-z])(?=[A-Z][a-z])/)
|
174
|
+
.each_with_object([""]) do |word, acc|
|
175
|
+
if word =~ /^v[\d.]+/
|
176
|
+
acc << $MATCH.to_s
|
177
|
+
series_end = true
|
178
|
+
elsif word.match?(/^Version/)
|
179
|
+
acc << "v"
|
180
|
+
series_end = false
|
181
|
+
elsif word.match?(/^\d|ebXML|ebMS/)
|
182
|
+
series_end ? acc << word : acc[-1] += word
|
183
|
+
series_end = true
|
184
|
+
elsif word.match?(/^\w+$/) && word == word.upcase
|
185
|
+
series_end ? acc << word : acc[-1] = word
|
186
|
+
series_end = true
|
187
|
+
elsif word.match?(/[A-Z]+[a-z]+/)
|
188
|
+
series_end ? acc << word[0] : acc[-1] += word[0]
|
189
|
+
series_end = false
|
190
|
+
end
|
191
|
+
end.join "-"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
#
|
196
|
+
# Parse technology areas.
|
197
|
+
#
|
198
|
+
# @return [Array<String>] technology areas
|
199
|
+
#
|
200
|
+
def parse_technology_area
|
201
|
+
super @node
|
202
|
+
end
|
203
|
+
end
|
204
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module RelatonOasis
|
2
|
+
module DataParserUtils
|
3
|
+
#
|
4
|
+
# Parse document identifier specification.
|
5
|
+
#
|
6
|
+
# @param [String] num document number
|
7
|
+
#
|
8
|
+
# @return [String] document identifier with specification if needed
|
9
|
+
#
|
10
|
+
def parse_spec(num)
|
11
|
+
id = case @text
|
12
|
+
when /OASIS Project Specification (\d+)/ then "#{num}-PS#{$1}"
|
13
|
+
when /Committee Specification (\d+)/ then "#{num}-CS#{$1}"
|
14
|
+
else num
|
15
|
+
end
|
16
|
+
parse_part(id)
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# Parse document identifier part.
|
21
|
+
#
|
22
|
+
# @param [<Type>] docid <description>
|
23
|
+
#
|
24
|
+
# @return [<Type>] <description>
|
25
|
+
#
|
26
|
+
def parse_part(docid)
|
27
|
+
return docid if docid.match?(/(?:Part|Pt)\d+/i)
|
28
|
+
|
29
|
+
id = case @title
|
30
|
+
when /Part\s(\d+)/ then "#{docid}-Pt#{$1}"
|
31
|
+
else docid
|
32
|
+
end
|
33
|
+
parse_errata(id)
|
34
|
+
end
|
35
|
+
|
36
|
+
#
|
37
|
+
# Parse document identifier errata.
|
38
|
+
#
|
39
|
+
# @param [String] id document identifier
|
40
|
+
#
|
41
|
+
# @return [String] document identifier with errata if needed
|
42
|
+
#
|
43
|
+
def parse_errata(id)
|
44
|
+
return id if id.match?(/errata\d+/i)
|
45
|
+
|
46
|
+
case @title
|
47
|
+
when /Plus\sErrata\s(\d+)/ then "#{id}-plus-errata#{$1}"
|
48
|
+
when /Errata\s(\d+)/ then "#{id}-errata#{$1}"
|
49
|
+
else id
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#
|
54
|
+
# Parse document identifier.
|
55
|
+
#
|
56
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] document identifier
|
57
|
+
#
|
58
|
+
def parse_docid
|
59
|
+
id = "OASIS #{parse_docnumber}"
|
60
|
+
[RelatonBib::DocumentIdentifier.new(type: "OASIS", id: id, primary: true)]
|
61
|
+
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Parse document type.
|
65
|
+
#
|
66
|
+
# @return [String] document type
|
67
|
+
#
|
68
|
+
def parse_doctype
|
69
|
+
case @text
|
70
|
+
when /OASIS Project Specification/, /Committee Specification/
|
71
|
+
"specification"
|
72
|
+
when /Technical Memorandum/ then "memorandum"
|
73
|
+
when /Technical Resolution/ then "resolution"
|
74
|
+
else "standard"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
#
|
79
|
+
# Parse technology area.
|
80
|
+
#
|
81
|
+
# @return [Array<String>] technology areas
|
82
|
+
#
|
83
|
+
def parse_technology_area(node)
|
84
|
+
node.xpath(
|
85
|
+
"./summary/div/div/ul[@class='technology-areas__list']/li/a",
|
86
|
+
).map { |ta| ta.text.strip.gsub(/\s/, "-") }
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|