relaton-oasis 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/rake.yml +36 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.rubocop.yml +12 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +21 -0
- data/README.adoc +225 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/grammars/basicdoc.rng +1150 -0
- data/grammars/biblio.rng +1238 -0
- data/grammars/isodoc.rng +2634 -0
- data/grammars/reqt.rng +223 -0
- data/lib/relaton_oasis/data_fetcher.rb +91 -0
- data/lib/relaton_oasis/data_parser.rb +204 -0
- data/lib/relaton_oasis/data_parser_utils.rb +89 -0
- data/lib/relaton_oasis/data_part_parser.rb +131 -0
- data/lib/relaton_oasis/hash_converter.rb +15 -0
- data/lib/relaton_oasis/oasis_bibliographic_item.rb +81 -0
- data/lib/relaton_oasis/oasis_bibliography.rb +43 -0
- data/lib/relaton_oasis/processor.rb +51 -0
- data/lib/relaton_oasis/version.rb +5 -0
- data/lib/relaton_oasis/xml_parser.rb +25 -0
- data/lib/relaton_oasis.rb +27 -0
- data/relaton_oasis.gemspec +48 -0
- metadata +214 -0
data/grammars/reqt.rng
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
|
3
|
+
<!--
|
|
4
|
+
Presupposes isodoc.rnc, is included in it
|
|
5
|
+
include "isodoc.rnc" { }
|
|
6
|
+
-->
|
|
7
|
+
<define name="requirement">
|
|
8
|
+
<element name="requirement">
|
|
9
|
+
<ref name="RequirementType"/>
|
|
10
|
+
</element>
|
|
11
|
+
</define>
|
|
12
|
+
<define name="recommendation">
|
|
13
|
+
<element name="recommendation">
|
|
14
|
+
<ref name="RequirementType"/>
|
|
15
|
+
</element>
|
|
16
|
+
</define>
|
|
17
|
+
<define name="permission">
|
|
18
|
+
<element name="permission">
|
|
19
|
+
<ref name="RequirementType"/>
|
|
20
|
+
</element>
|
|
21
|
+
</define>
|
|
22
|
+
<define name="RequirementType">
|
|
23
|
+
<optional>
|
|
24
|
+
<attribute name="obligation">
|
|
25
|
+
<ref name="ObligationType"/>
|
|
26
|
+
</attribute>
|
|
27
|
+
</optional>
|
|
28
|
+
<optional>
|
|
29
|
+
<attribute name="unnumbered">
|
|
30
|
+
<data type="boolean"/>
|
|
31
|
+
</attribute>
|
|
32
|
+
</optional>
|
|
33
|
+
<optional>
|
|
34
|
+
<attribute name="number"/>
|
|
35
|
+
</optional>
|
|
36
|
+
<optional>
|
|
37
|
+
<attribute name="subsequence"/>
|
|
38
|
+
</optional>
|
|
39
|
+
<optional>
|
|
40
|
+
<attribute name="keep-with-next">
|
|
41
|
+
<data type="boolean"/>
|
|
42
|
+
</attribute>
|
|
43
|
+
</optional>
|
|
44
|
+
<optional>
|
|
45
|
+
<attribute name="keep-lines-together">
|
|
46
|
+
<data type="boolean"/>
|
|
47
|
+
</attribute>
|
|
48
|
+
</optional>
|
|
49
|
+
<attribute name="id">
|
|
50
|
+
<data type="ID"/>
|
|
51
|
+
</attribute>
|
|
52
|
+
<optional>
|
|
53
|
+
<attribute name="filename"/>
|
|
54
|
+
</optional>
|
|
55
|
+
<optional>
|
|
56
|
+
<attribute name="model"/>
|
|
57
|
+
</optional>
|
|
58
|
+
<optional>
|
|
59
|
+
<attribute name="type"/>
|
|
60
|
+
</optional>
|
|
61
|
+
<optional>
|
|
62
|
+
<attribute name="tag"/>
|
|
63
|
+
</optional>
|
|
64
|
+
<optional>
|
|
65
|
+
<attribute name="multilingual-rendering">
|
|
66
|
+
<ref name="MultilingualRenderingType"/>
|
|
67
|
+
</attribute>
|
|
68
|
+
</optional>
|
|
69
|
+
<optional>
|
|
70
|
+
<ref name="reqtitle"/>
|
|
71
|
+
</optional>
|
|
72
|
+
<optional>
|
|
73
|
+
<ref name="label"/>
|
|
74
|
+
</optional>
|
|
75
|
+
<zeroOrMore>
|
|
76
|
+
<ref name="subject"/>
|
|
77
|
+
</zeroOrMore>
|
|
78
|
+
<zeroOrMore>
|
|
79
|
+
<ref name="reqinherit"/>
|
|
80
|
+
</zeroOrMore>
|
|
81
|
+
<zeroOrMore>
|
|
82
|
+
<ref name="classification"/>
|
|
83
|
+
</zeroOrMore>
|
|
84
|
+
<zeroOrMore>
|
|
85
|
+
<choice>
|
|
86
|
+
<ref name="measurementtarget"/>
|
|
87
|
+
<ref name="specification"/>
|
|
88
|
+
<ref name="verification"/>
|
|
89
|
+
<ref name="import"/>
|
|
90
|
+
<ref name="description"/>
|
|
91
|
+
<ref name="component"/>
|
|
92
|
+
</choice>
|
|
93
|
+
</zeroOrMore>
|
|
94
|
+
<optional>
|
|
95
|
+
<ref name="reqt_references"/>
|
|
96
|
+
</optional>
|
|
97
|
+
<zeroOrMore>
|
|
98
|
+
<choice>
|
|
99
|
+
<ref name="requirement"/>
|
|
100
|
+
<ref name="recommendation"/>
|
|
101
|
+
<ref name="permission"/>
|
|
102
|
+
</choice>
|
|
103
|
+
</zeroOrMore>
|
|
104
|
+
</define>
|
|
105
|
+
<define name="reqtitle">
|
|
106
|
+
<element name="title">
|
|
107
|
+
<ref name="FormattedString"/>
|
|
108
|
+
</element>
|
|
109
|
+
</define>
|
|
110
|
+
<define name="label">
|
|
111
|
+
<element name="label">
|
|
112
|
+
<oneOrMore>
|
|
113
|
+
<ref name="TextElement"/>
|
|
114
|
+
</oneOrMore>
|
|
115
|
+
</element>
|
|
116
|
+
</define>
|
|
117
|
+
<define name="subject">
|
|
118
|
+
<element name="subject">
|
|
119
|
+
<oneOrMore>
|
|
120
|
+
<ref name="TextElement"/>
|
|
121
|
+
</oneOrMore>
|
|
122
|
+
</element>
|
|
123
|
+
</define>
|
|
124
|
+
<define name="reqinherit">
|
|
125
|
+
<element name="inherit">
|
|
126
|
+
<oneOrMore>
|
|
127
|
+
<ref name="TextElement"/>
|
|
128
|
+
</oneOrMore>
|
|
129
|
+
</element>
|
|
130
|
+
</define>
|
|
131
|
+
<define name="measurementtarget">
|
|
132
|
+
<element name="measurement-target">
|
|
133
|
+
<ref name="RequirementSubpart"/>
|
|
134
|
+
</element>
|
|
135
|
+
</define>
|
|
136
|
+
<define name="specification">
|
|
137
|
+
<element name="specification">
|
|
138
|
+
<ref name="RequirementSubpart"/>
|
|
139
|
+
</element>
|
|
140
|
+
</define>
|
|
141
|
+
<define name="verification">
|
|
142
|
+
<element name="verification">
|
|
143
|
+
<ref name="RequirementSubpart"/>
|
|
144
|
+
</element>
|
|
145
|
+
</define>
|
|
146
|
+
<define name="import">
|
|
147
|
+
<element name="import">
|
|
148
|
+
<ref name="RequirementSubpart"/>
|
|
149
|
+
</element>
|
|
150
|
+
</define>
|
|
151
|
+
<define name="description">
|
|
152
|
+
<element name="description">
|
|
153
|
+
<ref name="RequirementSubpart"/>
|
|
154
|
+
</element>
|
|
155
|
+
</define>
|
|
156
|
+
<define name="component">
|
|
157
|
+
<element name="component">
|
|
158
|
+
<attribute name="class"/>
|
|
159
|
+
<ref name="RequirementSubpart"/>
|
|
160
|
+
</element>
|
|
161
|
+
</define>
|
|
162
|
+
<define name="reqt_references">
|
|
163
|
+
<element name="references">
|
|
164
|
+
<oneOrMore>
|
|
165
|
+
<ref name="bibitem"/>
|
|
166
|
+
</oneOrMore>
|
|
167
|
+
</element>
|
|
168
|
+
</define>
|
|
169
|
+
<define name="RequirementSubpart">
|
|
170
|
+
<optional>
|
|
171
|
+
<attribute name="type"/>
|
|
172
|
+
</optional>
|
|
173
|
+
<optional>
|
|
174
|
+
<attribute name="exclude">
|
|
175
|
+
<data type="boolean"/>
|
|
176
|
+
</attribute>
|
|
177
|
+
</optional>
|
|
178
|
+
<optional>
|
|
179
|
+
<attribute name="keep-with-next">
|
|
180
|
+
<data type="boolean"/>
|
|
181
|
+
</attribute>
|
|
182
|
+
</optional>
|
|
183
|
+
<optional>
|
|
184
|
+
<attribute name="keep-lines-together">
|
|
185
|
+
<data type="boolean"/>
|
|
186
|
+
</attribute>
|
|
187
|
+
</optional>
|
|
188
|
+
<optional>
|
|
189
|
+
<attribute name="tag"/>
|
|
190
|
+
</optional>
|
|
191
|
+
<optional>
|
|
192
|
+
<attribute name="multilingual-rendering">
|
|
193
|
+
<ref name="MultilingualRenderingType"/>
|
|
194
|
+
</attribute>
|
|
195
|
+
</optional>
|
|
196
|
+
<oneOrMore>
|
|
197
|
+
<ref name="BasicBlock"/>
|
|
198
|
+
</oneOrMore>
|
|
199
|
+
</define>
|
|
200
|
+
<define name="ObligationType">
|
|
201
|
+
<choice>
|
|
202
|
+
<value>requirement</value>
|
|
203
|
+
<value>recommendation</value>
|
|
204
|
+
<value>permission</value>
|
|
205
|
+
</choice>
|
|
206
|
+
</define>
|
|
207
|
+
<define name="classification">
|
|
208
|
+
<element name="classification">
|
|
209
|
+
<ref name="classification_tag"/>
|
|
210
|
+
<ref name="classification_value"/>
|
|
211
|
+
</element>
|
|
212
|
+
</define>
|
|
213
|
+
<define name="classification_tag">
|
|
214
|
+
<element name="tag">
|
|
215
|
+
<text/>
|
|
216
|
+
</element>
|
|
217
|
+
</define>
|
|
218
|
+
<define name="classification_value">
|
|
219
|
+
<element name="value">
|
|
220
|
+
<text/>
|
|
221
|
+
</element>
|
|
222
|
+
</define>
|
|
223
|
+
</grammar>
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
module RelatonOasis
|
|
2
|
+
class DataFetcher
|
|
3
|
+
#
|
|
4
|
+
# Initialize a new DataFetcher
|
|
5
|
+
#
|
|
6
|
+
# @param [Strin] output directory to save files, default: "data"
|
|
7
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml); default: yaml
|
|
8
|
+
#
|
|
9
|
+
def initialize(output, format)
|
|
10
|
+
@output = output
|
|
11
|
+
@format = format
|
|
12
|
+
@ext = @format.sub(/^bib|^rfc/, "")
|
|
13
|
+
@files = []
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
# Initialize fetcher and run fetch
|
|
18
|
+
#
|
|
19
|
+
# @param [Strin] output directory to save files, default: "data"
|
|
20
|
+
# @param [Strin] format format of output files (xml, yaml, bibxml); default: yaml
|
|
21
|
+
#
|
|
22
|
+
def self.fetch(output: "data", format: "yaml")
|
|
23
|
+
t1 = Time.now
|
|
24
|
+
puts "Started at: #{t1}"
|
|
25
|
+
FileUtils.mkdir_p output unless Dir.exist? output
|
|
26
|
+
new(output, format).fetch
|
|
27
|
+
t2 = Time.now
|
|
28
|
+
puts "Stopped at: #{t2}"
|
|
29
|
+
puts "Done in: #{(t2 - t1).round} sec."
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
#
|
|
33
|
+
# Fetch and save all the documents from OASIS
|
|
34
|
+
#
|
|
35
|
+
def fetch
|
|
36
|
+
agent = Mechanize.new
|
|
37
|
+
resp = agent.get "https://www.oasis-open.org/standards/"
|
|
38
|
+
doc = Nokogiri::HTML resp.body
|
|
39
|
+
doc.xpath("//details").map do |item|
|
|
40
|
+
save_doc DataParser.new(item).parse
|
|
41
|
+
fetch_parts item
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
#
|
|
46
|
+
# Fetch and save parts of document
|
|
47
|
+
#
|
|
48
|
+
# @param [Nokogiri::HTML::Element] item document node
|
|
49
|
+
#
|
|
50
|
+
def fetch_parts(item)
|
|
51
|
+
parts = item.xpath("./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[strong or span/strong]")
|
|
52
|
+
return unless parts.size > 1
|
|
53
|
+
|
|
54
|
+
parts.each do |part|
|
|
55
|
+
save_doc DataPartParser.new(part).parse
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
#
|
|
60
|
+
# Save document to file
|
|
61
|
+
#
|
|
62
|
+
# @param [RelatonOasis::OasisBibliographicItem] doc
|
|
63
|
+
#
|
|
64
|
+
def save_doc(doc) # rubocop:disable Metrics/MethodLength
|
|
65
|
+
c = case @format
|
|
66
|
+
when "xml" then doc.to_xml(bibdata: true)
|
|
67
|
+
when "yaml" then doc.to_hash.to_yaml
|
|
68
|
+
else doc.send("to_#{@format}")
|
|
69
|
+
end
|
|
70
|
+
file = file_name doc
|
|
71
|
+
if @files.include? file
|
|
72
|
+
warn "File #{file} already exists. Document: #{doc.docnumber}"
|
|
73
|
+
else
|
|
74
|
+
@files << file
|
|
75
|
+
end
|
|
76
|
+
File.write file, c, encoding: "UTF-8"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
#
|
|
80
|
+
# Generate file name
|
|
81
|
+
#
|
|
82
|
+
# @param [RelatonOasis::OasisBibliographicItem] doc
|
|
83
|
+
#
|
|
84
|
+
# @return [String] file name
|
|
85
|
+
#
|
|
86
|
+
def file_name(doc)
|
|
87
|
+
name = doc.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase
|
|
88
|
+
File.join @output, "#{name}.#{@ext}"
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
module RelatonOasis
|
|
2
|
+
class DataParser
|
|
3
|
+
include RelatonOasis::DataParserUtils
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# Initialize parser.
|
|
7
|
+
#
|
|
8
|
+
# @param [Nokogiri::HTML::Element] node docment node
|
|
9
|
+
#
|
|
10
|
+
def initialize(node)
|
|
11
|
+
@node = node
|
|
12
|
+
@title = @node.at("./summary/div/h2").text
|
|
13
|
+
@text = @node.at(
|
|
14
|
+
"./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[em or i or a or span]",
|
|
15
|
+
)&.text
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
#
|
|
19
|
+
# Parse document.
|
|
20
|
+
#
|
|
21
|
+
# @return [RelatonOasis::OasisBibliographicItem] bibliographic item
|
|
22
|
+
#
|
|
23
|
+
def parse # rubocop:disable Metrics/MethodLength
|
|
24
|
+
RelatonOasis::OasisBibliographicItem.new(
|
|
25
|
+
fetched: Date.today.to_s,
|
|
26
|
+
type: "standard",
|
|
27
|
+
doctype: parse_doctype,
|
|
28
|
+
title: parse_title,
|
|
29
|
+
docid: parse_docid,
|
|
30
|
+
docnumber: parse_docnumber,
|
|
31
|
+
date: parse_date,
|
|
32
|
+
abstract: parse_abstract,
|
|
33
|
+
language: ["en"],
|
|
34
|
+
script: ["Latn"],
|
|
35
|
+
editorialgroup: parse_editorialgroup,
|
|
36
|
+
relation: parse_relation,
|
|
37
|
+
technology_area: parse_technology_area,
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#
|
|
42
|
+
# Parse title.
|
|
43
|
+
#
|
|
44
|
+
# @return [Array<RelatonBib::TypedTitleString>] <description>
|
|
45
|
+
#
|
|
46
|
+
def parse_title
|
|
47
|
+
[RelatonBib::TypedTitleString.new(type: "main", content: @title, language: "en", script: "Latn")]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
#
|
|
51
|
+
# Parse date.
|
|
52
|
+
#
|
|
53
|
+
# @return [Array<RelatonBib::BibliographicDate>] date
|
|
54
|
+
#
|
|
55
|
+
def parse_date
|
|
56
|
+
@node.xpath("./summary/div/time[@class='standard__date']").map do |d|
|
|
57
|
+
date_str = d.text.match(/\d{2}\s\w+\s\d{4}/).to_s
|
|
58
|
+
date = Date.parse(date_str).to_s
|
|
59
|
+
RelatonBib::BibliographicDate.new(on: date, type: "issued")
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
#
|
|
64
|
+
# Parse abstract.
|
|
65
|
+
#
|
|
66
|
+
# @return [Array<RelatonBib::FormattedString>] abstract
|
|
67
|
+
#
|
|
68
|
+
def parse_abstract
|
|
69
|
+
c = @node.xpath(
|
|
70
|
+
"./summary/div/div[@class='standard__description']/p",
|
|
71
|
+
).map { |a| a.text.gsub(/[\n\t]+/, " ").strip }.join("\n")
|
|
72
|
+
return [] if c.empty?
|
|
73
|
+
|
|
74
|
+
[RelatonBib::FormattedString.new(content: c, language: "en", script: "Latn")]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#
|
|
78
|
+
# Parse technical committee.
|
|
79
|
+
#
|
|
80
|
+
# @return [RelatonBib::EditorialGroup] technical committee
|
|
81
|
+
#
|
|
82
|
+
def parse_editorialgroup
|
|
83
|
+
tc = @node.xpath("./div[@class='standard__details']/a").map do |a|
|
|
84
|
+
wg = RelatonBib::WorkGroup.new name: a.text.strip
|
|
85
|
+
RelatonBib::TechnicalCommittee.new wg
|
|
86
|
+
end
|
|
87
|
+
RelatonBib::EditorialGroup.new tc
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
#
|
|
91
|
+
# Parse relation.
|
|
92
|
+
#
|
|
93
|
+
# @return [Array<RelatonBib::DocumentRelation>] relation
|
|
94
|
+
#
|
|
95
|
+
def parse_relation
|
|
96
|
+
rels = @node.xpath(
|
|
97
|
+
"./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[strong or span/strong or b/span]",
|
|
98
|
+
)
|
|
99
|
+
return [] unless rels.size > 1
|
|
100
|
+
|
|
101
|
+
rels.map do |r|
|
|
102
|
+
docid = DataPartParser.new(r).parse_docid
|
|
103
|
+
fref = RelatonBib::FormattedRef.new content: docid[0].id
|
|
104
|
+
bib = RelatonOasis::OasisBibliographicItem.new formattedref: fref
|
|
105
|
+
RelatonBib::DocumentRelation.new type: "hasPart", bibitem: bib
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
#
|
|
110
|
+
# Parse document part references.
|
|
111
|
+
#
|
|
112
|
+
# @return [Array<String>] document part references
|
|
113
|
+
#
|
|
114
|
+
def document_part_refs
|
|
115
|
+
@node.css(
|
|
116
|
+
".standard__grid--cite-as > p > strong",
|
|
117
|
+
"span.Refterm", "span.abbrev", "span.citationLabel > strong"
|
|
118
|
+
).map { |p| p.text.gsub(/^\[{1,2}|\]$/, "").strip }
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
#
|
|
122
|
+
# Parse document number.
|
|
123
|
+
#
|
|
124
|
+
# @return [String] document number
|
|
125
|
+
#
|
|
126
|
+
def parse_docnumber
|
|
127
|
+
parts = document_part_refs
|
|
128
|
+
case parts.size
|
|
129
|
+
when 0 then title_to_docid @node.at("./summary/div/h2").text
|
|
130
|
+
when 1 then parse_spec(parts[0])
|
|
131
|
+
else parts_to_docid parts
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
#
|
|
136
|
+
# Create document identifier from parts references.
|
|
137
|
+
#
|
|
138
|
+
# @param [Array<String>] parts parts references
|
|
139
|
+
#
|
|
140
|
+
# @return [String] document identifier
|
|
141
|
+
#
|
|
142
|
+
def parts_to_docid(parts)
|
|
143
|
+
id = parts[1..-1].each_with_object(parts[0].split("-")) do |part, acc|
|
|
144
|
+
chunks = part.split "-"
|
|
145
|
+
chunks.each.with_index do |chunk, idx|
|
|
146
|
+
unless chunk.casecmp(acc[idx])&.zero?
|
|
147
|
+
acc.slice!(idx..-1)
|
|
148
|
+
break
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end.join("-")
|
|
152
|
+
parse_spec(id)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
#
|
|
156
|
+
# Create document identifier from title.
|
|
157
|
+
#
|
|
158
|
+
# @param [String] title title
|
|
159
|
+
#
|
|
160
|
+
# @return [String] document identifier
|
|
161
|
+
#
|
|
162
|
+
def title_to_docid(title) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
|
163
|
+
abbrs = title.scan(/(?<=\()[^)]+(?=\))/)
|
|
164
|
+
if abbrs.any?
|
|
165
|
+
id = abbrs.map { |abbr| abbr.split.join("-") }.join "-"
|
|
166
|
+
/(?:Version\s|v)(?<ver>[\d.]+)/ =~ title
|
|
167
|
+
id += "-v#{ver}" if ver
|
|
168
|
+
/(?<eb>ebXML|ebMS)/ =~ title
|
|
169
|
+
id = "#{eb}-#{id}" if eb
|
|
170
|
+
id
|
|
171
|
+
else
|
|
172
|
+
series_end = false
|
|
173
|
+
title.sub(/\s\[OASIS\s\d+\]$/, "").split(/[,:]?\s|-|(?<=[a-z])(?=[A-Z][a-z])/)
|
|
174
|
+
.each_with_object([""]) do |word, acc|
|
|
175
|
+
if word =~ /^v[\d.]+/
|
|
176
|
+
acc << $MATCH.to_s
|
|
177
|
+
series_end = true
|
|
178
|
+
elsif word.match?(/^Version/)
|
|
179
|
+
acc << "v"
|
|
180
|
+
series_end = false
|
|
181
|
+
elsif word.match?(/^\d|ebXML|ebMS/)
|
|
182
|
+
series_end ? acc << word : acc[-1] += word
|
|
183
|
+
series_end = true
|
|
184
|
+
elsif word.match?(/^\w+$/) && word == word.upcase
|
|
185
|
+
series_end ? acc << word : acc[-1] = word
|
|
186
|
+
series_end = true
|
|
187
|
+
elsif word.match?(/[A-Z]+[a-z]+/)
|
|
188
|
+
series_end ? acc << word[0] : acc[-1] += word[0]
|
|
189
|
+
series_end = false
|
|
190
|
+
end
|
|
191
|
+
end.join "-"
|
|
192
|
+
end
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
#
|
|
196
|
+
# Parse technology areas.
|
|
197
|
+
#
|
|
198
|
+
# @return [Array<String>] technology areas
|
|
199
|
+
#
|
|
200
|
+
def parse_technology_area
|
|
201
|
+
super @node
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
end
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
module RelatonOasis
|
|
2
|
+
module DataParserUtils
|
|
3
|
+
#
|
|
4
|
+
# Parse document identifier specification.
|
|
5
|
+
#
|
|
6
|
+
# @param [String] num document number
|
|
7
|
+
#
|
|
8
|
+
# @return [String] document identifier with specification if needed
|
|
9
|
+
#
|
|
10
|
+
def parse_spec(num)
|
|
11
|
+
id = case @text
|
|
12
|
+
when /OASIS Project Specification (\d+)/ then "#{num}-PS#{$1}"
|
|
13
|
+
when /Committee Specification (\d+)/ then "#{num}-CS#{$1}"
|
|
14
|
+
else num
|
|
15
|
+
end
|
|
16
|
+
parse_part(id)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
# Parse document identifier part.
|
|
21
|
+
#
|
|
22
|
+
# @param [<Type>] docid <description>
|
|
23
|
+
#
|
|
24
|
+
# @return [<Type>] <description>
|
|
25
|
+
#
|
|
26
|
+
def parse_part(docid)
|
|
27
|
+
return docid if docid.match?(/(?:Part|Pt)\d+/i)
|
|
28
|
+
|
|
29
|
+
id = case @title
|
|
30
|
+
when /Part\s(\d+)/ then "#{docid}-Pt#{$1}"
|
|
31
|
+
else docid
|
|
32
|
+
end
|
|
33
|
+
parse_errata(id)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
#
|
|
37
|
+
# Parse document identifier errata.
|
|
38
|
+
#
|
|
39
|
+
# @param [String] id document identifier
|
|
40
|
+
#
|
|
41
|
+
# @return [String] document identifier with errata if needed
|
|
42
|
+
#
|
|
43
|
+
def parse_errata(id)
|
|
44
|
+
return id if id.match?(/errata\d+/i)
|
|
45
|
+
|
|
46
|
+
case @title
|
|
47
|
+
when /Plus\sErrata\s(\d+)/ then "#{id}-plus-errata#{$1}"
|
|
48
|
+
when /Errata\s(\d+)/ then "#{id}-errata#{$1}"
|
|
49
|
+
else id
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
#
|
|
54
|
+
# Parse document identifier.
|
|
55
|
+
#
|
|
56
|
+
# @return [Array<RelatonBib::DocumentIdentifier>] document identifier
|
|
57
|
+
#
|
|
58
|
+
def parse_docid
|
|
59
|
+
id = "OASIS #{parse_docnumber}"
|
|
60
|
+
[RelatonBib::DocumentIdentifier.new(type: "OASIS", id: id, primary: true)]
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
#
|
|
64
|
+
# Parse document type.
|
|
65
|
+
#
|
|
66
|
+
# @return [String] document type
|
|
67
|
+
#
|
|
68
|
+
def parse_doctype
|
|
69
|
+
case @text
|
|
70
|
+
when /OASIS Project Specification/, /Committee Specification/
|
|
71
|
+
"specification"
|
|
72
|
+
when /Technical Memorandum/ then "memorandum"
|
|
73
|
+
when /Technical Resolution/ then "resolution"
|
|
74
|
+
else "standard"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
#
|
|
79
|
+
# Parse technology area.
|
|
80
|
+
#
|
|
81
|
+
# @return [Array<String>] technology areas
|
|
82
|
+
#
|
|
83
|
+
def parse_technology_area(node)
|
|
84
|
+
node.xpath(
|
|
85
|
+
"./summary/div/div/ul[@class='technology-areas__list']/li/a",
|
|
86
|
+
).map { |ta| ta.text.strip.gsub(/\s/, "-") }
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|