relaton-oasis 2.0.0.pre.alpha.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/grammars/basicdoc.rng +14 -1
- data/grammars/biblio.rng +8 -8
- data/grammars/relaton-oasis.rng +5 -19
- data/lib/relaton/oasis/data_fetcher.rb +7 -2
- data/lib/relaton/oasis/data_parser.rb +81 -50
- data/lib/relaton/oasis/data_parser_utils.rb +70 -30
- data/lib/relaton/oasis/data_part_parser.rb +98 -53
- data/lib/relaton/oasis/version.rb +1 -1
- data/relaton_oasis.gemspec +2 -2
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ab1cfb1ce7f59b50d782f32e4605b34cbf89045a645f6e41e64ec9347156f7db
|
|
4
|
+
data.tar.gz: ee82099958e7c5f24e3b13624e18e119641d8ce7e7da8d0334bb890bb633ff7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 70195e0c65741f05572f2f7094d808af2b4e5daa7f3104768c36390eb8f1a4635215f0305523e57c23d641c1601fa839db5eb609b29076ed7534943cf902bb1a
|
|
7
|
+
data.tar.gz: d6475eb7f0561b319970c742f639c8462d061578307a203a4331377a158f412332db20a684adbccff82a143a630167b5677e52ecfd281dcc7348cdd70548e0df
|
data/grammars/basicdoc.rng
CHANGED
|
@@ -187,6 +187,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
187
187
|
<a:documentation>Optional caption of this block</a:documentation>
|
|
188
188
|
</attribute>
|
|
189
189
|
</optional>
|
|
190
|
+
<optional>
|
|
191
|
+
<attribute name="position">
|
|
192
|
+
<a:documentation>For an "add" change, whether the change is added before or after the location</a:documentation>
|
|
193
|
+
<choice>
|
|
194
|
+
<value>before</value>
|
|
195
|
+
<value>after</value>
|
|
196
|
+
</choice>
|
|
197
|
+
</attribute>
|
|
198
|
+
</optional>
|
|
190
199
|
<optional>
|
|
191
200
|
<element name="location">
|
|
192
201
|
<a:documentation>The location(s) in the original document which have undergone the change described in this block</a:documentation>
|
|
@@ -208,11 +217,15 @@ Applicable to modify and delete</a:documentation>
|
|
|
208
217
|
</zeroOrMore>
|
|
209
218
|
<optional>
|
|
210
219
|
<element name="newcontent">
|
|
211
|
-
<a:documentation>New content to be added to the document; applicable to add and modify
|
|
220
|
+
<a:documentation>New content to be added to the document; applicable to add and modify.
|
|
221
|
+
Can be blocks and/or sections</a:documentation>
|
|
212
222
|
<ref name="OptionalId"/>
|
|
213
223
|
<zeroOrMore>
|
|
214
224
|
<ref name="BasicBlock"/>
|
|
215
225
|
</zeroOrMore>
|
|
226
|
+
<zeroOrMore>
|
|
227
|
+
<ref name="section"/>
|
|
228
|
+
</zeroOrMore>
|
|
216
229
|
</element>
|
|
217
230
|
</optional>
|
|
218
231
|
<zeroOrMore>
|
data/grammars/biblio.rng
CHANGED
|
@@ -1142,11 +1142,11 @@ NOTE: This should preferably be encoded as a URI or short identifier, rather th
|
|
|
1142
1142
|
<a:documentation>Information about how long the current description of the bibliographic item is valid for</a:documentation>
|
|
1143
1143
|
</ref>
|
|
1144
1144
|
</optional>
|
|
1145
|
-
<
|
|
1145
|
+
<zeroOrMore>
|
|
1146
1146
|
<ref name="depiction">
|
|
1147
1147
|
<a:documentation>Depiction of the bibliographic item, typically an image</a:documentation>
|
|
1148
1148
|
</ref>
|
|
1149
|
-
</
|
|
1149
|
+
</zeroOrMore>
|
|
1150
1150
|
</define>
|
|
1151
1151
|
<define name="ReducedBibliographicItem">
|
|
1152
1152
|
<a:documentation>Reduced description of a bibliographic resource, without mandatory title and docidentifier, used for document relations
|
|
@@ -1939,10 +1939,10 @@ Detailed in https://www.relaton.org/model/relations/</a:documentation>
|
|
|
1939
1939
|
<value>hasAnnotation</value>
|
|
1940
1940
|
<value>draftOf</value>
|
|
1941
1941
|
<value>hasDraft</value>
|
|
1942
|
-
<value>
|
|
1943
|
-
<value>
|
|
1944
|
-
<value>
|
|
1945
|
-
<value>
|
|
1942
|
+
<value>predecessorDraftOf</value>
|
|
1943
|
+
<value>hasPredecessorDraft</value>
|
|
1944
|
+
<value>successorDraftOf</value>
|
|
1945
|
+
<value>hasSuccessorDraft</value>
|
|
1946
1946
|
<value>editionOf</value>
|
|
1947
1947
|
<value>hasEdition</value>
|
|
1948
1948
|
<value>updates</value>
|
|
@@ -2063,13 +2063,13 @@ provided that it is not the entire bibliographic item that is so related</a:docu
|
|
|
2063
2063
|
<ref name="LocalizedString"/>
|
|
2064
2064
|
</element>
|
|
2065
2065
|
</optional>
|
|
2066
|
-
<
|
|
2066
|
+
<zeroOrMore>
|
|
2067
2067
|
<element name="taxon">
|
|
2068
2068
|
<a:documentation>The keywords as a hierarchical taxonomy. For example, the sequence of `taxon` elements
|
|
2069
2069
|
`pump`, `centrifugal pump`, `line shaft pump` represents a taxonomic classification</a:documentation>
|
|
2070
2070
|
<ref name="LocalizedString"/>
|
|
2071
2071
|
</element>
|
|
2072
|
-
</
|
|
2072
|
+
</zeroOrMore>
|
|
2073
2073
|
<zeroOrMore>
|
|
2074
2074
|
<ref name="vocabid">
|
|
2075
2075
|
<a:documentation>Identifiers for the keyword as a controlled vocabulary</a:documentation>
|
data/grammars/relaton-oasis.rng
CHANGED
|
@@ -1,25 +1,6 @@
|
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
2
|
<grammar xmlns="http://relaxng.org/ns/structure/1.0">
|
|
3
3
|
<include href="biblio-standoc.rng">
|
|
4
|
-
<define name="BibDataExtensionType">
|
|
5
|
-
<optional>
|
|
6
|
-
<attribute name="schema-version"/>
|
|
7
|
-
</optional>
|
|
8
|
-
<ref name="doctype"/>
|
|
9
|
-
<optional>
|
|
10
|
-
<ref name="docsubtype"/>
|
|
11
|
-
</optional>
|
|
12
|
-
<ref name="flavor"/>
|
|
13
|
-
<zeroOrMore>
|
|
14
|
-
<ref name="ics"/>
|
|
15
|
-
</zeroOrMore>
|
|
16
|
-
<zeroOrMore>
|
|
17
|
-
<ref name="structuredidentifier"/>
|
|
18
|
-
</zeroOrMore>
|
|
19
|
-
<zeroOrMore>
|
|
20
|
-
<ref name="technology-area"/>
|
|
21
|
-
</zeroOrMore>
|
|
22
|
-
</define>
|
|
23
4
|
<define name="DocumentType">
|
|
24
5
|
<choice>
|
|
25
6
|
<value>specification</value>
|
|
@@ -29,6 +10,11 @@
|
|
|
29
10
|
</choice>
|
|
30
11
|
</define>
|
|
31
12
|
</include>
|
|
13
|
+
<define name="BibDataExtensionType" combine="interleave">
|
|
14
|
+
<zeroOrMore>
|
|
15
|
+
<ref name="technology-area"/>
|
|
16
|
+
</zeroOrMore>
|
|
17
|
+
</define>
|
|
32
18
|
<define name="technology-area">
|
|
33
19
|
<element name="technology-area">
|
|
34
20
|
<ref name="TechnologyArea"/>
|
|
@@ -7,15 +7,20 @@ require_relative "data_part_parser"
|
|
|
7
7
|
module Relaton
|
|
8
8
|
module Oasis
|
|
9
9
|
class DataFetcher < Core::DataFetcher
|
|
10
|
+
def log_error(msg)
|
|
11
|
+
Util.error msg
|
|
12
|
+
end
|
|
13
|
+
|
|
10
14
|
def fetch(_source = nil)
|
|
11
15
|
agent = Mechanize.new
|
|
12
16
|
resp = agent.get "https://www.oasis-open.org/standards/"
|
|
13
17
|
doc = Nokogiri::HTML resp.body
|
|
14
18
|
doc.xpath("//details").map do |item|
|
|
15
|
-
save_doc DataParser.new(item).parse
|
|
19
|
+
save_doc DataParser.new(item, @errors).parse
|
|
16
20
|
fetch_parts item
|
|
17
21
|
end
|
|
18
22
|
index.save
|
|
23
|
+
report_errors
|
|
19
24
|
end
|
|
20
25
|
|
|
21
26
|
private
|
|
@@ -33,7 +38,7 @@ module Relaton
|
|
|
33
38
|
parts = item.xpath(xpath)
|
|
34
39
|
return unless parts.size > 1
|
|
35
40
|
|
|
36
|
-
parts.each { |part| save_doc DataPartParser.new(part).parse }
|
|
41
|
+
parts.each { |part| save_doc DataPartParser.new(part, @errors).parse }
|
|
37
42
|
end
|
|
38
43
|
|
|
39
44
|
def save_doc(doc) # rubocop:disable Metrics/AbcSize
|
|
@@ -9,8 +9,9 @@ module Relaton
|
|
|
9
9
|
#
|
|
10
10
|
# @param [Nokogiri::HTML::Element] node document node
|
|
11
11
|
#
|
|
12
|
-
def initialize(node)
|
|
12
|
+
def initialize(node, errors = {})
|
|
13
13
|
@node = node
|
|
14
|
+
@errors = errors
|
|
14
15
|
end
|
|
15
16
|
|
|
16
17
|
def title
|
|
@@ -52,8 +53,10 @@ module Relaton
|
|
|
52
53
|
# @return [Array<Bib::Title>] title
|
|
53
54
|
#
|
|
54
55
|
def parse_title
|
|
55
|
-
[Bib::Title.new(type: "main", content: title, language: "en",
|
|
56
|
-
|
|
56
|
+
result = [Bib::Title.new(type: "main", content: title, language: "en",
|
|
57
|
+
script: "Latn")]
|
|
58
|
+
@errors[:title] &&= result.empty?
|
|
59
|
+
result
|
|
57
60
|
end
|
|
58
61
|
|
|
59
62
|
#
|
|
@@ -62,11 +65,14 @@ module Relaton
|
|
|
62
65
|
# @return [Array<Bib::Date>] date
|
|
63
66
|
#
|
|
64
67
|
def parse_date
|
|
65
|
-
|
|
68
|
+
xpath = "./summary/div/time[@class='standard__date']"
|
|
69
|
+
result = @node.xpath(xpath).map do |d|
|
|
66
70
|
date_str = d.text.match(/\d{2}\s\w+\s\d{4}/).to_s
|
|
67
71
|
date = Date.parse(date_str).to_s
|
|
68
72
|
Bib::Date.new(at: date, type: "issued")
|
|
69
73
|
end
|
|
74
|
+
@errors[:date] &&= result.empty?
|
|
75
|
+
result
|
|
70
76
|
end
|
|
71
77
|
|
|
72
78
|
#
|
|
@@ -78,10 +84,15 @@ module Relaton
|
|
|
78
84
|
c = @node.xpath(
|
|
79
85
|
"./summary/div/div[@class='standard__description']/p",
|
|
80
86
|
).map { |a| a.text.gsub(/[\n\t]+/, " ").strip }.join("\n")
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
87
|
+
result = if c.empty?
|
|
88
|
+
[]
|
|
89
|
+
else
|
|
90
|
+
[Bib::Abstract.new(
|
|
91
|
+
content: c, language: "en", script: "Latn",
|
|
92
|
+
)]
|
|
93
|
+
end
|
|
94
|
+
@errors[:abstract] &&= result.empty?
|
|
95
|
+
result
|
|
85
96
|
end
|
|
86
97
|
|
|
87
98
|
#
|
|
@@ -89,28 +100,32 @@ module Relaton
|
|
|
89
100
|
#
|
|
90
101
|
# @return [Array<Bib::Contributor>] editorial group contributors
|
|
91
102
|
#
|
|
92
|
-
def parse_editorialgroup_contributor # rubocop:disable Metrics/MethodLength
|
|
103
|
+
def parse_editorialgroup_contributor # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
93
104
|
tcs = @node.xpath("./div[@class='standard__details']/a")
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
105
|
+
if tcs.empty?
|
|
106
|
+
result = []
|
|
107
|
+
else
|
|
108
|
+
subdivisions = tcs.map do |a|
|
|
109
|
+
name = [Bib::TypedLocalizedString.new(content: a.text.strip)]
|
|
110
|
+
Bib::Subdivision.new(type: "technical-committee",
|
|
111
|
+
name: name)
|
|
112
|
+
end
|
|
113
|
+
org = Bib::Organization.new(
|
|
114
|
+
name: [Bib::TypedLocalizedString.new(content: "OASIS")],
|
|
115
|
+
subdivision: subdivisions,
|
|
116
|
+
)
|
|
117
|
+
desc = [Bib::LocalizedMarkedUpString.new(content: "committee")]
|
|
118
|
+
role = Bib::Contributor::Role.new(
|
|
119
|
+
type: "author", description: desc,
|
|
120
|
+
)
|
|
121
|
+
result = [Bib::Contributor.new(organization: org, role: [role])]
|
|
100
122
|
end
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
subdivision: subdivisions,
|
|
104
|
-
)
|
|
105
|
-
desc = [Bib::LocalizedMarkedUpString.new(content: "committee")]
|
|
106
|
-
role = Bib::Contributor::Role.new(
|
|
107
|
-
type: "author", description: desc,
|
|
108
|
-
)
|
|
109
|
-
[Bib::Contributor.new(organization: org, role: [role])]
|
|
123
|
+
@errors[:editorialgroup_contributor] &&= result.empty?
|
|
124
|
+
result
|
|
110
125
|
end
|
|
111
126
|
|
|
112
|
-
def parse_authorizer
|
|
113
|
-
@node.xpath("./div[@class='standard__details']/a").map do |a|
|
|
127
|
+
def parse_authorizer # rubocop:disable Metrics/MethodLength
|
|
128
|
+
result = @node.xpath("./div[@class='standard__details']/a").map do |a|
|
|
114
129
|
org = Bib::Organization.new(
|
|
115
130
|
name: [Bib::TypedLocalizedString.new(content: a.text.strip)],
|
|
116
131
|
uri: [Bib::Uri.new(type: "uri", content: a[:href])],
|
|
@@ -120,6 +135,8 @@ module Relaton
|
|
|
120
135
|
description: desc)
|
|
121
136
|
Bib::Contributor.new(organization: org, role: [role])
|
|
122
137
|
end
|
|
138
|
+
@errors[:authorizer] &&= result.empty?
|
|
139
|
+
result
|
|
123
140
|
end
|
|
124
141
|
|
|
125
142
|
def link_node
|
|
@@ -134,18 +151,22 @@ module Relaton
|
|
|
134
151
|
#
|
|
135
152
|
# @return [Array<Bib::Relation>] relation
|
|
136
153
|
#
|
|
137
|
-
def parse_relation
|
|
154
|
+
def parse_relation # rubocop:disable Metrics/MethodLength
|
|
138
155
|
xpath = "./div/div/div[contains(@class, " \
|
|
139
156
|
"'standard__grid--cite-as')]" \
|
|
140
157
|
"/p[strong or span/strong or b/span]"
|
|
141
158
|
rels = @node.xpath(xpath)
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
159
|
+
result = if rels.size > 1
|
|
160
|
+
rels.map do |r|
|
|
161
|
+
docid = DataPartParser.new(r).parse_docid
|
|
162
|
+
bib = ItemData.new(formattedref: Bib::Formattedref.new(content: docid[0].content))
|
|
163
|
+
Bib::Relation.new(type: "hasPart", bibitem: bib)
|
|
164
|
+
end
|
|
165
|
+
else
|
|
166
|
+
[]
|
|
167
|
+
end
|
|
168
|
+
@errors[:relation] &&= result.empty?
|
|
169
|
+
result
|
|
149
170
|
end
|
|
150
171
|
|
|
151
172
|
#
|
|
@@ -160,16 +181,20 @@ module Relaton
|
|
|
160
181
|
).map { |p| p.text.gsub(/^\[{1,2}|\]$/, "").strip }
|
|
161
182
|
end
|
|
162
183
|
|
|
163
|
-
def parse_link
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
184
|
+
def parse_link # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
|
|
185
|
+
result = if parts.size > 1
|
|
186
|
+
[]
|
|
187
|
+
else
|
|
188
|
+
links.map do |l|
|
|
189
|
+
type = l[:href].match(/\.(\w+)$/)&.captures&.first
|
|
190
|
+
type ||= "src"
|
|
191
|
+
type.sub!("docx", "doc")
|
|
192
|
+
type.sub!("html", "src")
|
|
193
|
+
Bib::Uri.new(type: type, content: l[:href])
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
@errors[:link] &&= result.empty?
|
|
197
|
+
result
|
|
173
198
|
end
|
|
174
199
|
|
|
175
200
|
def parts
|
|
@@ -192,11 +217,15 @@ module Relaton
|
|
|
192
217
|
#
|
|
193
218
|
def parse_docnumber
|
|
194
219
|
parts = document_part_refs
|
|
195
|
-
case parts.size
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
220
|
+
result = case parts.size
|
|
221
|
+
when 0
|
|
222
|
+
txt = @node.at("./summary/div/h2").text
|
|
223
|
+
parse_spec title_to_docid(txt)
|
|
224
|
+
when 1 then parse_part parse_spec(parts[0])
|
|
225
|
+
else parts_to_docid parts
|
|
226
|
+
end
|
|
227
|
+
@errors[:docnumber] &&= result.nil?
|
|
228
|
+
result
|
|
200
229
|
end
|
|
201
230
|
|
|
202
231
|
#
|
|
@@ -266,7 +295,9 @@ module Relaton
|
|
|
266
295
|
# @return [Array<String>] technology areas
|
|
267
296
|
#
|
|
268
297
|
def parse_technology_area
|
|
269
|
-
super
|
|
298
|
+
result = super(@node)
|
|
299
|
+
@errors[:technology_area] &&= result.empty?
|
|
300
|
+
result
|
|
270
301
|
end
|
|
271
302
|
end
|
|
272
303
|
end
|
|
@@ -8,9 +8,11 @@ module Relaton
|
|
|
8
8
|
# @return [Array<Bib::Contributor>] contributors
|
|
9
9
|
#
|
|
10
10
|
def parse_contributor
|
|
11
|
-
publisher_oasis + parse_authorizer +
|
|
11
|
+
result = publisher_oasis + parse_authorizer +
|
|
12
12
|
parse_editorialgroup_contributor +
|
|
13
13
|
parse_chairs + parse_editors
|
|
14
|
+
@errors[:contributor] &&= result.empty?
|
|
15
|
+
result
|
|
14
16
|
end
|
|
15
17
|
|
|
16
18
|
def publisher_oasis
|
|
@@ -30,14 +32,19 @@ module Relaton
|
|
|
30
32
|
[Bib::Contributor.new(organization: org, role: role)]
|
|
31
33
|
end
|
|
32
34
|
|
|
33
|
-
def parse_editors_from_text
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
35
|
+
def parse_editors_from_text # rubocop:disable Metrics/MethodLength
|
|
36
|
+
result = if text
|
|
37
|
+
text.match(/(?<=Edited\sby\s)[^.]+/).to_s
|
|
38
|
+
.split(/,?\sand\s|,\s/).map do |c|
|
|
39
|
+
role = [Bib::Contributor::Role.new(type: "editor")]
|
|
40
|
+
Bib::Contributor.new(role: role,
|
|
41
|
+
person: create_person(c))
|
|
42
|
+
end
|
|
43
|
+
else
|
|
44
|
+
[]
|
|
45
|
+
end
|
|
46
|
+
@errors[:editors] &&= result.empty?
|
|
47
|
+
result
|
|
41
48
|
end
|
|
42
49
|
|
|
43
50
|
def page
|
|
@@ -69,25 +76,37 @@ module Relaton
|
|
|
69
76
|
nil
|
|
70
77
|
end
|
|
71
78
|
|
|
72
|
-
def parse_chairs
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
def parse_chairs # rubocop:disable Metrics/MethodLength
|
|
80
|
+
result = if page
|
|
81
|
+
xpath = "//p[preceding-sibling::p" \
|
|
82
|
+
"[starts-with(., 'Chair')]]" \
|
|
83
|
+
"[following-sibling::p" \
|
|
84
|
+
"[starts-with(., 'Editor')]]"
|
|
85
|
+
page.xpath(xpath).map do |p|
|
|
86
|
+
create_contribution_info(p, "editor", ["Chair"])
|
|
87
|
+
end
|
|
88
|
+
else
|
|
89
|
+
[]
|
|
90
|
+
end
|
|
91
|
+
@errors[:chairs] &&= result.empty?
|
|
92
|
+
result
|
|
80
93
|
end
|
|
81
94
|
|
|
82
|
-
def parse_editors
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
95
|
+
def parse_editors # rubocop:disable Metrics/MethodLength
|
|
96
|
+
result = if page
|
|
97
|
+
xpath = "//p[contains(@class, 'Contributor')]" \
|
|
98
|
+
"[preceding-sibling::p" \
|
|
99
|
+
"[starts-with(., 'Editor')]]" \
|
|
100
|
+
"[following-sibling::p" \
|
|
101
|
+
"[contains(@class, 'Title')]]"
|
|
102
|
+
page.xpath(xpath).map do |p|
|
|
103
|
+
create_contribution_info(p, "editor")
|
|
104
|
+
end
|
|
105
|
+
else
|
|
106
|
+
parse_editors_from_text
|
|
107
|
+
end
|
|
108
|
+
@errors[:editors] &&= result.empty?
|
|
109
|
+
result
|
|
91
110
|
end
|
|
92
111
|
|
|
93
112
|
def create_contribution_info(person_node, type, description = [])
|
|
@@ -113,7 +132,21 @@ module Relaton
|
|
|
113
132
|
def person_email(email)
|
|
114
133
|
return [] unless email
|
|
115
134
|
|
|
116
|
-
|
|
135
|
+
href = email[:href]
|
|
136
|
+
if href.start_with?("mailto:")
|
|
137
|
+
[href.split(":")[1]]
|
|
138
|
+
elsif (cf_email = email.at(".//span[@data-cfemail]"))
|
|
139
|
+
decoded = decode_cf_email(cf_email["data-cfemail"])
|
|
140
|
+
decoded.empty? ? [] : [decoded]
|
|
141
|
+
else
|
|
142
|
+
[]
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def decode_cf_email(encoded)
|
|
147
|
+
bytes = [encoded].pack("H*").bytes
|
|
148
|
+
key = bytes.first
|
|
149
|
+
bytes[1..].map { |b| (b ^ key).chr }.join
|
|
117
150
|
end
|
|
118
151
|
|
|
119
152
|
def person_affiliation(org)
|
|
@@ -182,7 +215,10 @@ module Relaton
|
|
|
182
215
|
#
|
|
183
216
|
def parse_docid
|
|
184
217
|
id = "OASIS #{parse_docnumber}"
|
|
185
|
-
[Bib::Docidentifier.new(type: "OASIS", content: id,
|
|
218
|
+
result = [Bib::Docidentifier.new(type: "OASIS", content: id,
|
|
219
|
+
primary: true)]
|
|
220
|
+
@errors[:docid] &&= result.empty?
|
|
221
|
+
result
|
|
186
222
|
end
|
|
187
223
|
|
|
188
224
|
#
|
|
@@ -198,7 +234,9 @@ module Relaton
|
|
|
198
234
|
when /Technical Resolution/ then "resolution"
|
|
199
235
|
else "standard"
|
|
200
236
|
end
|
|
201
|
-
Doctype.new(content: type)
|
|
237
|
+
result = Doctype.new(content: type)
|
|
238
|
+
@errors[:doctype] &&= result.nil?
|
|
239
|
+
result
|
|
202
240
|
end
|
|
203
241
|
|
|
204
242
|
#
|
|
@@ -209,10 +247,12 @@ module Relaton
|
|
|
209
247
|
def parse_technology_area(node)
|
|
210
248
|
xpath = "./summary/div/div" \
|
|
211
249
|
"/ul[@class='technology-areas__list']/li/a"
|
|
212
|
-
node.xpath(xpath).map do |ta|
|
|
250
|
+
result = node.xpath(xpath).map do |ta|
|
|
213
251
|
ta.text.strip.gsub(/\s/, "-")
|
|
214
252
|
.sub("development", "Development")
|
|
215
253
|
end
|
|
254
|
+
@errors[:technology_area] &&= result.empty?
|
|
255
|
+
result
|
|
216
256
|
end
|
|
217
257
|
|
|
218
258
|
def create_ext
|
|
@@ -9,8 +9,9 @@ module Relaton
|
|
|
9
9
|
#
|
|
10
10
|
# @param [Nokogiri::HTML::Element] node document node
|
|
11
11
|
#
|
|
12
|
-
def initialize(node)
|
|
12
|
+
def initialize(node, errors = {})
|
|
13
13
|
@node = node
|
|
14
|
+
@errors = errors
|
|
14
15
|
end
|
|
15
16
|
|
|
16
17
|
def text
|
|
@@ -69,8 +70,10 @@ module Relaton
|
|
|
69
70
|
# @return [Array<Bib::Title>] title
|
|
70
71
|
#
|
|
71
72
|
def parse_title
|
|
72
|
-
[Bib::Title.new(type: "main", content: title,
|
|
73
|
-
|
|
73
|
+
result = [Bib::Title.new(type: "main", content: title,
|
|
74
|
+
language: "en", script: "Latn")]
|
|
75
|
+
@errors[:part_title] &&= result.empty?
|
|
76
|
+
result
|
|
74
77
|
end
|
|
75
78
|
|
|
76
79
|
#
|
|
@@ -85,7 +88,9 @@ module Relaton
|
|
|
85
88
|
# some part refs need "Pt" to distinguish from root doc
|
|
86
89
|
id += "-Pt" if %w[CMIS-v1.1 DocBook-5.0 XACML-V3.0 mqtt-v3.1.1
|
|
87
90
|
OData-JSON-Format-v4.0].include?(id)
|
|
88
|
-
parse_part parse_spec id
|
|
91
|
+
result = parse_part parse_spec id
|
|
92
|
+
@errors[:part_docnumber] &&= result.nil?
|
|
93
|
+
result
|
|
89
94
|
end
|
|
90
95
|
|
|
91
96
|
#
|
|
@@ -94,7 +99,9 @@ module Relaton
|
|
|
94
99
|
# @return [Array<Bib::Uri>] link
|
|
95
100
|
#
|
|
96
101
|
def parse_link
|
|
97
|
-
[Bib::Uri.new(type: "src", content: link_node[:href])]
|
|
102
|
+
result = [Bib::Uri.new(type: "src", content: link_node[:href])]
|
|
103
|
+
@errors[:part_link] &&= result.empty?
|
|
104
|
+
result
|
|
98
105
|
end
|
|
99
106
|
|
|
100
107
|
#
|
|
@@ -104,19 +111,26 @@ module Relaton
|
|
|
104
111
|
#
|
|
105
112
|
def parse_date
|
|
106
113
|
/(?<on>\d{1,2}\s\w+\s\d{4})/ =~ text
|
|
107
|
-
[Bib::Date.new(at: Date.parse(on).to_s, type: "issued")]
|
|
114
|
+
result = [Bib::Date.new(at: Date.parse(on).to_s, type: "issued")]
|
|
115
|
+
@errors[:part_date] &&= result.empty?
|
|
116
|
+
result
|
|
108
117
|
end
|
|
109
118
|
|
|
110
|
-
def parse_abstract
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
119
|
+
def parse_abstract # rubocop:disable Metrics/MethodLength
|
|
120
|
+
result = if page
|
|
121
|
+
xpath = "//p[preceding-sibling::p" \
|
|
122
|
+
"[starts-with(., 'Abstract')]][1]"
|
|
123
|
+
page.xpath(xpath).map do |p|
|
|
124
|
+
cnt = p.text.gsub(/[\r\n]+/, " ").strip
|
|
125
|
+
Bib::Abstract.new(
|
|
126
|
+
content: cnt, language: "en", script: "Latn",
|
|
127
|
+
)
|
|
128
|
+
end
|
|
129
|
+
else
|
|
130
|
+
[]
|
|
131
|
+
end
|
|
132
|
+
@errors[:part_abstract] &&= result.empty?
|
|
133
|
+
result
|
|
120
134
|
end
|
|
121
135
|
|
|
122
136
|
#
|
|
@@ -125,25 +139,41 @@ module Relaton
|
|
|
125
139
|
# @return [Array<Bib::Contributor>] editorial group contributors
|
|
126
140
|
#
|
|
127
141
|
def parse_editorialgroup_contributor # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
142
|
+
result = if page
|
|
143
|
+
xpath = "//p[preceding-sibling::p" \
|
|
144
|
+
"[starts-with(., 'Technical')]][1]//a"
|
|
145
|
+
tcs = page.xpath(xpath)
|
|
146
|
+
if tcs.empty?
|
|
147
|
+
[]
|
|
148
|
+
else
|
|
149
|
+
subdivisions = tcs.map do |a|
|
|
150
|
+
name = [Bib::TypedLocalizedString.new(
|
|
151
|
+
content: a.text.strip,
|
|
152
|
+
)]
|
|
153
|
+
Bib::Subdivision.new(
|
|
154
|
+
type: "technical-committee", name: name,
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
org = Bib::Organization.new(
|
|
158
|
+
name: [Bib::TypedLocalizedString.new(
|
|
159
|
+
content: "OASIS",
|
|
160
|
+
)],
|
|
161
|
+
subdivision: subdivisions,
|
|
162
|
+
)
|
|
163
|
+
desc = [Bib::LocalizedMarkedUpString.new(
|
|
164
|
+
content: "committee",
|
|
165
|
+
)]
|
|
166
|
+
role = Bib::Contributor::Role.new(
|
|
167
|
+
type: "author", description: desc,
|
|
168
|
+
)
|
|
169
|
+
[Bib::Contributor.new(organization: org,
|
|
170
|
+
role: [role])]
|
|
171
|
+
end
|
|
172
|
+
else
|
|
173
|
+
[]
|
|
174
|
+
end
|
|
175
|
+
@errors[:part_editorialgroup_contributor] &&= result.empty?
|
|
176
|
+
result
|
|
147
177
|
end
|
|
148
178
|
|
|
149
179
|
#
|
|
@@ -154,26 +184,39 @@ module Relaton
|
|
|
154
184
|
def parse_relation
|
|
155
185
|
parser = DataParser.new @node.at("./ancestor::details")
|
|
156
186
|
fref = parser.parse_docid[0].content
|
|
157
|
-
bib = ItemData.new(formattedref: fref)
|
|
158
|
-
[Bib::Relation.new(type: "partOf", bibitem: bib)]
|
|
187
|
+
bib = ItemData.new(formattedref: Bib::Formattedref.new(content: fref))
|
|
188
|
+
result = [Bib::Relation.new(type: "partOf", bibitem: bib)]
|
|
189
|
+
@errors[:part_relation] &&= result.empty?
|
|
190
|
+
result
|
|
159
191
|
end
|
|
160
192
|
|
|
161
193
|
def parse_authorizer # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
194
|
+
result = if page
|
|
195
|
+
xpath = "//p[preceding-sibling::p" \
|
|
196
|
+
"[starts-with(., 'Technical')]][1]//a"
|
|
197
|
+
page.xpath(xpath).map do |a|
|
|
198
|
+
org_name = a.text.gsub(/[\r\n]+/, " ").strip
|
|
199
|
+
org = Bib::Organization.new(
|
|
200
|
+
name: [Bib::TypedLocalizedString.new(
|
|
201
|
+
content: org_name,
|
|
202
|
+
)],
|
|
203
|
+
uri: [Bib::Uri.new(type: "uri",
|
|
204
|
+
content: a[:href])],
|
|
205
|
+
)
|
|
206
|
+
desc = [Bib::LocalizedMarkedUpString.new(
|
|
207
|
+
content: "Committee",
|
|
208
|
+
)]
|
|
209
|
+
role = Bib::Contributor::Role.new(
|
|
210
|
+
type: "authorizer", description: desc,
|
|
211
|
+
)
|
|
212
|
+
Bib::Contributor.new(organization: org,
|
|
213
|
+
role: [role])
|
|
214
|
+
end
|
|
215
|
+
else
|
|
216
|
+
[]
|
|
217
|
+
end
|
|
218
|
+
@errors[:part_authorizer] &&= result.empty?
|
|
219
|
+
result
|
|
177
220
|
end
|
|
178
221
|
|
|
179
222
|
def link_node
|
|
@@ -186,7 +229,9 @@ module Relaton
|
|
|
186
229
|
# @return [Array<String>] technology areas
|
|
187
230
|
#
|
|
188
231
|
def parse_technology_area
|
|
189
|
-
super
|
|
232
|
+
result = super(@node.at("./ancestor::details"))
|
|
233
|
+
@errors[:part_technology_area] &&= result.empty?
|
|
234
|
+
result
|
|
190
235
|
end
|
|
191
236
|
end
|
|
192
237
|
end
|
data/relaton_oasis.gemspec
CHANGED
|
@@ -33,8 +33,8 @@ Gem::Specification.new do |spec|
|
|
|
33
33
|
|
|
34
34
|
spec.add_dependency "mechanize", "~> 2.10"
|
|
35
35
|
spec.add_dependency "multi_json", "~> 1.15.0"
|
|
36
|
-
spec.add_dependency "relaton-bib", "~> 2.0.0
|
|
37
|
-
spec.add_dependency "relaton-core", "~> 0.0.
|
|
36
|
+
spec.add_dependency "relaton-bib", "~> 2.0.0"
|
|
37
|
+
spec.add_dependency "relaton-core", "~> 0.0.13"
|
|
38
38
|
spec.add_dependency "relaton-index", "~> 0.2.0"
|
|
39
39
|
|
|
40
40
|
# For more information and examples about making a new gem, checkout our
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: relaton-oasis
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.0.0
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
@@ -43,28 +43,28 @@ dependencies:
|
|
|
43
43
|
requirements:
|
|
44
44
|
- - "~>"
|
|
45
45
|
- !ruby/object:Gem::Version
|
|
46
|
-
version: 2.0.0
|
|
46
|
+
version: 2.0.0
|
|
47
47
|
type: :runtime
|
|
48
48
|
prerelease: false
|
|
49
49
|
version_requirements: !ruby/object:Gem::Requirement
|
|
50
50
|
requirements:
|
|
51
51
|
- - "~>"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: 2.0.0
|
|
53
|
+
version: 2.0.0
|
|
54
54
|
- !ruby/object:Gem::Dependency
|
|
55
55
|
name: relaton-core
|
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
|
57
57
|
requirements:
|
|
58
58
|
- - "~>"
|
|
59
59
|
- !ruby/object:Gem::Version
|
|
60
|
-
version: 0.0.
|
|
60
|
+
version: 0.0.13
|
|
61
61
|
type: :runtime
|
|
62
62
|
prerelease: false
|
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
64
|
requirements:
|
|
65
65
|
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: 0.0.
|
|
67
|
+
version: 0.0.13
|
|
68
68
|
- !ruby/object:Gem::Dependency
|
|
69
69
|
name: relaton-index
|
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|