metanorma-standoc 2.3.2 → 2.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/isodoc/html/htmlstyle.css +30 -1
- data/lib/metanorma/standoc/blocks.rb +2 -1
- data/lib/metanorma/standoc/cleanup.rb +0 -1
- data/lib/metanorma/standoc/cleanup_asciibib.rb +0 -8
- data/lib/metanorma/standoc/cleanup_maths.rb +44 -80
- data/lib/metanorma/standoc/cleanup_ref.rb +33 -12
- data/lib/metanorma/standoc/cleanup_section_names.rb +2 -4
- data/lib/metanorma/standoc/front_contributor.rb +5 -5
- data/lib/metanorma/standoc/inline.rb +7 -8
- data/lib/metanorma/standoc/merge_bibitems.rb +107 -0
- data/lib/metanorma/standoc/ref.rb +9 -10
- data/lib/metanorma/standoc/ref_sect.rb +10 -1
- data/lib/metanorma/standoc/ref_utility.rb +1 -2
- data/lib/metanorma/standoc/render.rb +6 -6
- data/lib/metanorma/standoc/section.rb +2 -1
- data/lib/metanorma/standoc/spans_to_bibitem.rb +261 -0
- data/lib/metanorma/standoc/utils.rb +8 -0
- data/lib/metanorma/standoc/validate.rb +33 -10
- data/lib/metanorma/standoc/version.rb +1 -1
- metadata +4 -3
- data/lib/metanorma/standoc/cleanup_biblio.rb +0 -242
@@ -53,7 +53,8 @@ module Metanorma
|
|
53
53
|
end
|
54
54
|
|
55
55
|
PREFACE_CLAUSE_NAMES =
|
56
|
-
%w(abstract foreword introduction misc-container
|
56
|
+
%w(abstract foreword introduction misc-container
|
57
|
+
acknowledgements).freeze
|
57
58
|
|
58
59
|
MAIN_CLAUSE_NAMES =
|
59
60
|
["normative references", "terms and definitions", "scope",
|
@@ -0,0 +1,261 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
class SpansToBibitem
|
5
|
+
include ::Metanorma::Standoc::Utils
|
6
|
+
|
7
|
+
attr_reader :err, :out
|
8
|
+
|
9
|
+
def initialize(bib)
|
10
|
+
@bib = bib
|
11
|
+
@err = []
|
12
|
+
@spans = spans_preprocess(extract_content(bib))
|
13
|
+
end
|
14
|
+
|
15
|
+
def extract_content(bib)
|
16
|
+
extract_docid(bib) + extract_spans(bib)
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract_spans(bib)
|
20
|
+
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
21
|
+
s.at("./ancestor::span") and next
|
22
|
+
extract_spans1(s, m)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract_spans1(span, acc)
|
27
|
+
keys = span["class"].split(".", 2)
|
28
|
+
acc << { key: keys[0], type: keys[1],
|
29
|
+
val: span.children.to_xml }
|
30
|
+
(span["class"] == "type" and span.remove) or
|
31
|
+
span.replace(span.children)
|
32
|
+
end
|
33
|
+
|
34
|
+
def extract_docid(bib)
|
35
|
+
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
36
|
+
m << { key: "docid", type: d["type"], val: d.text }
|
37
|
+
d.remove unless bib.at("./title")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def empty_span_hash
|
42
|
+
{ contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
|
43
|
+
end
|
44
|
+
|
45
|
+
def spans_preprocess(spans)
|
46
|
+
ret = empty_span_hash
|
47
|
+
spans.each { |s| span_preprocess1(s, ret) }
|
48
|
+
host_rearrange(ret)
|
49
|
+
end
|
50
|
+
|
51
|
+
def span_preprocess1(span, ret)
|
52
|
+
case span[:key]
|
53
|
+
when "uri", "docid"
|
54
|
+
val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
|
55
|
+
ret[span[:key].to_sym] << { type: span[:type], val: val }
|
56
|
+
when "date"
|
57
|
+
ret[span[:key].to_sym] << { type: span[:type] || "published",
|
58
|
+
val: span[:val] }
|
59
|
+
when "pages", "volume", "issue"
|
60
|
+
ret[:extent][span[:key].to_sym] ||= []
|
61
|
+
ret[:extent][span[:key].to_sym] << span[:val]
|
62
|
+
when "pubplace", "title", "type", "series"
|
63
|
+
ret[span[:key].to_sym] = span[:val]
|
64
|
+
when "in_title"
|
65
|
+
ret[:in][:title] = span[:val]
|
66
|
+
when "publisher"
|
67
|
+
ret[:contrib] << { role: "publisher", entity: "organization",
|
68
|
+
name: span[:val] }
|
69
|
+
when "surname", "initials", "givenname", "formatted-initials"
|
70
|
+
ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
|
71
|
+
when "fullname"
|
72
|
+
ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
|
73
|
+
when "organization"
|
74
|
+
ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
|
75
|
+
when "in_surname", "in_initials", "in_givenname",
|
76
|
+
"in_formatted-initials"
|
77
|
+
ret[:in][:contrib] ||= []
|
78
|
+
span[:key].sub!(/^in_/, "")
|
79
|
+
ret[:in][:contrib] =
|
80
|
+
spans_preprocess_contrib(span, ret[:in][:contrib])
|
81
|
+
when "in_fullname"
|
82
|
+
ret[:in][:contrib] ||= []
|
83
|
+
span[:key].sub!(/^in_/, "")
|
84
|
+
ret[:in][:contrib] =
|
85
|
+
spans_preprocess_fullname(span, ret[:in][:contrib])
|
86
|
+
when "in_organization"
|
87
|
+
ret[:in][:contrib] ||= []
|
88
|
+
span[:key].sub!(/^in_/, "")
|
89
|
+
ret[:in][:contrib] =
|
90
|
+
spans_preprocess_org(span, ret[:in][:contrib])
|
91
|
+
else
|
92
|
+
msg = "unrecognised key '#{span[:key]}' in " \
|
93
|
+
"`span:#{span[:key]}[#{span[:val]}]`"
|
94
|
+
@err << { msg: msg }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def host_rearrange(ret)
|
99
|
+
ret[:in][:title] or return ret
|
100
|
+
ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
|
101
|
+
old
|
102
|
+
end
|
103
|
+
%i(series).each do |k|
|
104
|
+
ret[:in][k] = ret[k]
|
105
|
+
ret.delete(k)
|
106
|
+
end
|
107
|
+
/^in/.match?(ret[:type]) and ret[:in][:type] =
|
108
|
+
ret[:type].sub(/^in/, "")
|
109
|
+
ret
|
110
|
+
end
|
111
|
+
|
112
|
+
def spans_preprocess_contrib(span, contrib)
|
113
|
+
span[:key] == "initials" and span[:key] = "formatted-initials"
|
114
|
+
spans_preprocess_new_contrib?(span, contrib) and
|
115
|
+
contrib << { role: span[:type] || "author", entity: "person" }
|
116
|
+
if multiple_givennames?(span, contrib)
|
117
|
+
contrib[-1][:givenname] = [contrib[-1][:givenname],
|
118
|
+
span[:val]].flatten
|
119
|
+
else contrib[-1][span[:key].to_sym] = span[:val]
|
120
|
+
end
|
121
|
+
contrib
|
122
|
+
end
|
123
|
+
|
124
|
+
def spans_preprocess_new_contrib?(span, contrib)
|
125
|
+
contrib.empty? ||
|
126
|
+
(span[:key] == "surname" && contrib[-1][:surname]) ||
|
127
|
+
contrib[-1][:role] != (span[:type] || "author")
|
128
|
+
end
|
129
|
+
|
130
|
+
def multiple_givennames?(span, contrib)
|
131
|
+
(%w(formatted-initials givenname).include?(span[:key]) &&
|
132
|
+
(contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
|
133
|
+
return false
|
134
|
+
if contrib[-1][:"formatted-initials"]
|
135
|
+
contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
|
136
|
+
contrib[-1].delete(:"formatted-initials")
|
137
|
+
end
|
138
|
+
true
|
139
|
+
end
|
140
|
+
|
141
|
+
def spans_preprocess_fullname(span, contrib)
|
142
|
+
name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
|
143
|
+
out = { role: span[:type] || "author", entity: "person",
|
144
|
+
surname: name[-1] }
|
145
|
+
if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
|
146
|
+
out[:"formatted-initials"] = name[0..-2].join(" ")
|
147
|
+
else out[:givenname] = name[0..-2]
|
148
|
+
end
|
149
|
+
contrib << out
|
150
|
+
contrib
|
151
|
+
end
|
152
|
+
|
153
|
+
def spans_preprocess_org(span, contrib)
|
154
|
+
contrib << { role: span[:type] || "author", entity: "organization",
|
155
|
+
name: span[:val] }
|
156
|
+
contrib
|
157
|
+
end
|
158
|
+
|
159
|
+
def convert
|
160
|
+
ret = spans_to_bibitem(@spans)
|
161
|
+
@out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
|
162
|
+
@spans[:type] and @out["type"] = @spans[:type]
|
163
|
+
self
|
164
|
+
end
|
165
|
+
|
166
|
+
def spans_to_bibitem(spans)
|
167
|
+
ret = ""
|
168
|
+
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
169
|
+
ret += spans_to_bibitem_docid(spans)
|
170
|
+
spans[:contrib].each do |s|
|
171
|
+
ret += span_to_contrib(s, spans[:title])
|
172
|
+
end
|
173
|
+
spans[:series] and
|
174
|
+
ret += "<series><title>#{spans[:series]}</title></series>"
|
175
|
+
spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
|
176
|
+
ret += spans_to_bibitem_host(spans)
|
177
|
+
ret += spans_to_bibitem_extent(spans[:extent])
|
178
|
+
ret
|
179
|
+
end
|
180
|
+
|
181
|
+
def spans_to_bibitem_host(spans)
|
182
|
+
spans[:in].empty? and return ""
|
183
|
+
ret =
|
184
|
+
"<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
|
185
|
+
spans[:in].delete(:type)
|
186
|
+
ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
|
187
|
+
end
|
188
|
+
|
189
|
+
def spans_to_bibitem_docid(spans)
|
190
|
+
ret = ""
|
191
|
+
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
192
|
+
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
193
|
+
spans[:date].each { |s| ret += span_to_date(s) }
|
194
|
+
ret
|
195
|
+
end
|
196
|
+
|
197
|
+
def spans_to_bibitem_extent(spans)
|
198
|
+
ret = ""
|
199
|
+
{ volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
|
200
|
+
spans[k]&.each { |s| ret += span_to_extent(s, v) }
|
201
|
+
end
|
202
|
+
ret.empty? and return ""
|
203
|
+
"<extent>#{ret}</extent>"
|
204
|
+
end
|
205
|
+
|
206
|
+
def span_to_extent(span, key)
|
207
|
+
values = span.split(/[-–]/)
|
208
|
+
ret = "<locality type='#{key}'>" \
|
209
|
+
"<referenceFrom>#{values[0]}</referenceFrom>"
|
210
|
+
values[1] and
|
211
|
+
ret += "<referenceTo>#{values[1]}</referenceTo>"
|
212
|
+
"#{ret}</locality>"
|
213
|
+
end
|
214
|
+
|
215
|
+
def span_to_docid(span, key)
|
216
|
+
if span[:type]
|
217
|
+
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
218
|
+
else "<#{key}>#{span[:val]}</#{key}>"
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
def span_to_date(span)
|
223
|
+
val = if /[-–](?=\d{4})/.match?(span[:val])
|
224
|
+
from, to = span[:val].split(/[-–](?=\d{4})/, 2)
|
225
|
+
"<from>#{from}</from><to>#{to}</to>"
|
226
|
+
else "<on>#{span[:val]}</on>"
|
227
|
+
end
|
228
|
+
type = span[:type] ? " type='#{span[:type]}'" : ""
|
229
|
+
"<date#{type}>#{val}</date>"
|
230
|
+
end
|
231
|
+
|
232
|
+
def span_to_contrib(span, title)
|
233
|
+
e = if span[:entity] == "organization"
|
234
|
+
"<organization><name>#{span[:name]}</name></organization>"
|
235
|
+
else span_to_person(span, title)
|
236
|
+
end
|
237
|
+
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
238
|
+
end
|
239
|
+
|
240
|
+
def validate_span_to_person(span, title)
|
241
|
+
span[:surname] and return
|
242
|
+
msg = "Missing surname: issue with bibliographic markup " \
|
243
|
+
"in \"#{title}\": #{span}"
|
244
|
+
@err << { msg: msg, fatal: true }
|
245
|
+
end
|
246
|
+
|
247
|
+
def span_to_person(span, title)
|
248
|
+
validate_span_to_person(span, title)
|
249
|
+
pre = (span[:"formatted-initials"] and
|
250
|
+
"<formatted-initials>" \
|
251
|
+
"#{span[:"formatted-initials"]}</formatted-initials>") ||
|
252
|
+
Array(span[:givenname]).map do |x|
|
253
|
+
"<forename>#{x}</forename>"
|
254
|
+
end.join
|
255
|
+
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
|
256
|
+
"</person>"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
@@ -60,6 +60,14 @@ module Metanorma
|
|
60
60
|
"<expression><name>#{elem}</name></expression>"
|
61
61
|
end
|
62
62
|
|
63
|
+
def link_unwrap(para)
|
64
|
+
elems = para.elements
|
65
|
+
if elems.size == 1 && elems[0].name == "link"
|
66
|
+
para.at("./link").replace(elems[0]["target"].strip)
|
67
|
+
end
|
68
|
+
para
|
69
|
+
end
|
70
|
+
|
63
71
|
class EmptyAttr
|
64
72
|
def attr(_any_attribute)
|
65
73
|
nil
|
@@ -13,9 +13,8 @@ module Metanorma
|
|
13
13
|
"referenceFrom".freeze
|
14
14
|
|
15
15
|
def init_iev
|
16
|
-
return nil
|
17
|
-
|
18
|
-
|
16
|
+
@no_isobib and return nil
|
17
|
+
@iev and return @iev
|
19
18
|
@iev = Iev::Db.new(@iev_globalname, @iev_localname) unless @no_isobib
|
20
19
|
@iev
|
21
20
|
end
|
@@ -43,6 +42,8 @@ module Metanorma
|
|
43
42
|
end
|
44
43
|
|
45
44
|
def content_validate(doc)
|
45
|
+
nested_asset_validate_basic(doc)
|
46
|
+
nested_note_validate(doc)
|
46
47
|
xref_validate(doc)
|
47
48
|
section_validate(doc)
|
48
49
|
norm_ref_validate(doc)
|
@@ -57,12 +58,36 @@ module Metanorma
|
|
57
58
|
clean_abort(@fatalerror.join("\n"), doc)
|
58
59
|
end
|
59
60
|
|
61
|
+
def nested_asset_validate_basic(doc)
|
62
|
+
a = "//formula | //example | //figure | //termnote | //termexample | " \
|
63
|
+
"//table"
|
64
|
+
doc.xpath("#{a} | //note").each do |m|
|
65
|
+
m.xpath(a.gsub(%r{//}, ".//")).each do |n|
|
66
|
+
nested_asset_report(m, n, doc)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def nested_note_validate(doc)
|
72
|
+
doc.xpath("//termnote | //note").each do |m|
|
73
|
+
m.xpath(".//note").each do |n|
|
74
|
+
nested_asset_report(m, n, doc)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def nested_asset_report(outer, inner, _doc)
|
80
|
+
outer.name == "figure" && inner.name == "figure" and return
|
81
|
+
err = "There is an instance of #{inner.name} nested within #{outer.name}"
|
82
|
+
@log.add("Syntax", inner, err)
|
83
|
+
@fatalerror << "#{err}:\n#{inner.to_xml}"
|
84
|
+
end
|
85
|
+
|
60
86
|
def norm_ref_validate(doc)
|
61
87
|
found = false
|
62
88
|
doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
|
63
|
-
|
64
|
-
|
65
|
-
|
89
|
+
docid = b.at("./docidentifier[@type = 'metanorma']") or next
|
90
|
+
/^\[\d+\]$/.match?(docid.text) or next
|
66
91
|
@log.add("Bibliography", b,
|
67
92
|
"Numeric reference in normative references")
|
68
93
|
found = true
|
@@ -106,8 +131,7 @@ module Metanorma
|
|
106
131
|
@log.add("Anchors", elem, "Anchor #{elem['id']} has already been " \
|
107
132
|
"used at line #{ids[elem['id']]}")
|
108
133
|
@fatalerror << "Multiple instances of same ID: #{elem['id']}"
|
109
|
-
else
|
110
|
-
ids[elem["id"]] = elem.line
|
134
|
+
else ids[elem["id"]] = elem.line
|
111
135
|
end
|
112
136
|
ids
|
113
137
|
end
|
@@ -166,8 +190,7 @@ module Metanorma
|
|
166
190
|
def xref_validate(doc)
|
167
191
|
ids = doc.xpath("//*/@id").each_with_object({}) { |x, m| m[x.text] = 1 }
|
168
192
|
doc.xpath("//xref/@target | //xref/@to").each do |x|
|
169
|
-
|
170
|
-
|
193
|
+
ids[x.text] and next
|
171
194
|
@log.add("Anchors", x.parent,
|
172
195
|
"Crossreference target #{x.text} is undefined")
|
173
196
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metanorma-standoc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciidoctor
|
@@ -475,7 +475,6 @@ files:
|
|
475
475
|
- lib/metanorma/standoc/cleanup.rb
|
476
476
|
- lib/metanorma/standoc/cleanup_amend.rb
|
477
477
|
- lib/metanorma/standoc/cleanup_asciibib.rb
|
478
|
-
- lib/metanorma/standoc/cleanup_biblio.rb
|
479
478
|
- lib/metanorma/standoc/cleanup_block.rb
|
480
479
|
- lib/metanorma/standoc/cleanup_boilerplate.rb
|
481
480
|
- lib/metanorma/standoc/cleanup_footnotes.rb
|
@@ -511,6 +510,7 @@ files:
|
|
511
510
|
- lib/metanorma/standoc/macros_note.rb
|
512
511
|
- lib/metanorma/standoc/macros_plantuml.rb
|
513
512
|
- lib/metanorma/standoc/macros_terms.rb
|
513
|
+
- lib/metanorma/standoc/merge_bibitems.rb
|
514
514
|
- lib/metanorma/standoc/processor.rb
|
515
515
|
- lib/metanorma/standoc/ref.rb
|
516
516
|
- lib/metanorma/standoc/ref_sect.rb
|
@@ -519,6 +519,7 @@ files:
|
|
519
519
|
- lib/metanorma/standoc/reqt.rb
|
520
520
|
- lib/metanorma/standoc/reqt.rng
|
521
521
|
- lib/metanorma/standoc/section.rb
|
522
|
+
- lib/metanorma/standoc/spans_to_bibitem.rb
|
522
523
|
- lib/metanorma/standoc/table.rb
|
523
524
|
- lib/metanorma/standoc/term_lookup_cleanup.rb
|
524
525
|
- lib/metanorma/standoc/terms.rb
|
@@ -1,242 +0,0 @@
|
|
1
|
-
module Metanorma
|
2
|
-
module Standoc
|
3
|
-
module Cleanup
|
4
|
-
def formattedref_spans(xmldoc)
|
5
|
-
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
|
6
|
-
spans = spans_preprocess(extract_content(b))
|
7
|
-
ret = spans_to_bibitem(spans)
|
8
|
-
spans[:type] and b["type"] = spans[:type]
|
9
|
-
b << ret
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def extract_content(bib)
|
14
|
-
extract_docid(bib) + extract_spans(bib)
|
15
|
-
end
|
16
|
-
|
17
|
-
def extract_spans(bib)
|
18
|
-
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
19
|
-
next if s.at("./ancestor::span")
|
20
|
-
|
21
|
-
extract_spans1(s, m)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def extract_spans1(span, acc)
|
26
|
-
keys = span["class"].split(".", 2)
|
27
|
-
acc << { key: keys[0], type: keys[1],
|
28
|
-
val: span.children.to_xml }
|
29
|
-
(span["class"] == "type" and span.remove) or span.replace(span.children)
|
30
|
-
end
|
31
|
-
|
32
|
-
def extract_docid(bib)
|
33
|
-
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
34
|
-
m << { key: "docid", type: d["type"], val: d.text }
|
35
|
-
d.remove
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def empty_span_hash
|
40
|
-
{ contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
|
41
|
-
end
|
42
|
-
|
43
|
-
def spans_preprocess(spans)
|
44
|
-
ret = empty_span_hash
|
45
|
-
spans.each { |s| span_preprocess1(s, ret) }
|
46
|
-
host_rearrange(ret)
|
47
|
-
end
|
48
|
-
|
49
|
-
def span_preprocess1(span, ret)
|
50
|
-
case span[:key]
|
51
|
-
when "uri", "docid"
|
52
|
-
val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
|
53
|
-
ret[span[:key].to_sym] << { type: span[:type], val: val }
|
54
|
-
when "date"
|
55
|
-
ret[span[:key].to_sym] << { type: span[:type] || "published",
|
56
|
-
val: span[:val] }
|
57
|
-
when "pages", "volume", "issue"
|
58
|
-
ret[:extent][span[:key].to_sym] ||= []
|
59
|
-
ret[:extent][span[:key].to_sym] << span[:val]
|
60
|
-
when "pubplace", "title", "type", "series"
|
61
|
-
ret[span[:key].to_sym] = span[:val]
|
62
|
-
when "in_title"
|
63
|
-
ret[:in][:title] = span[:val]
|
64
|
-
when "publisher"
|
65
|
-
ret[:contrib] << { role: "publisher", entity: "organization",
|
66
|
-
name: span[:val] }
|
67
|
-
when "surname", "initials", "givenname", "formatted-initials"
|
68
|
-
ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
|
69
|
-
when "fullname"
|
70
|
-
ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
|
71
|
-
when "organization"
|
72
|
-
ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
|
73
|
-
when "in_surname", "in_initials", "in_givenname",
|
74
|
-
"in_formatted-initials"
|
75
|
-
ret[:in][:contrib] ||= []
|
76
|
-
span[:key].sub!(/^in_/, "")
|
77
|
-
ret[:in][:contrib] =
|
78
|
-
spans_preprocess_contrib(span, ret[:in][:contrib])
|
79
|
-
when "in_fullname"
|
80
|
-
ret[:in][:contrib] ||= []
|
81
|
-
span[:key].sub!(/^in_/, "")
|
82
|
-
ret[:in][:contrib] =
|
83
|
-
spans_preprocess_fullname(span, ret[:in][:contrib])
|
84
|
-
when "in_organization"
|
85
|
-
ret[:in][:contrib] ||= []
|
86
|
-
span[:key].sub!(/^in_/, "")
|
87
|
-
ret[:in][:contrib] =
|
88
|
-
spans_preprocess_org(span, ret[:in][:contrib])
|
89
|
-
else
|
90
|
-
warn "unrecognised `span:#{span['key']}`"
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
def host_rearrange(ret)
|
95
|
-
ret[:in][:title] or return ret
|
96
|
-
ret[:in].merge!(empty_span_hash, { type: "misc" }) { |_, old, _| old }
|
97
|
-
|
98
|
-
%i(series).each do |k|
|
99
|
-
ret[:in][k] = ret[k]
|
100
|
-
ret.delete(k)
|
101
|
-
end
|
102
|
-
/^in/.match?(ret[:type]) and ret[:in][:type] =
|
103
|
-
ret[:type].sub(/^in/, "")
|
104
|
-
ret
|
105
|
-
end
|
106
|
-
|
107
|
-
def spans_preprocess_contrib(span, contrib)
|
108
|
-
span[:key] = "formatted-initials" if span[:key] == "initials"
|
109
|
-
|
110
|
-
spans_preprocess_new_contrib?(span, contrib) and
|
111
|
-
contrib << { role: span[:type] || "author", entity: "person" }
|
112
|
-
if span[:key] == "givenname" && contrib[-1][span[:key].to_sym]
|
113
|
-
contrib[-1][span[:key].to_sym] =
|
114
|
-
Array(contrib[-1][span[:key].to_sym]) + span[:val]
|
115
|
-
else
|
116
|
-
contrib[-1][span[:key].to_sym] = span[:val]
|
117
|
-
end
|
118
|
-
contrib
|
119
|
-
end
|
120
|
-
|
121
|
-
def spans_preprocess_new_contrib?(span, contrib)
|
122
|
-
contrib.empty? ||
|
123
|
-
(if span[:key] == "surname" then contrib[-1][:surname]
|
124
|
-
else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
|
125
|
-
end) ||
|
126
|
-
contrib[-1][:role] != (span[:type] || "author")
|
127
|
-
end
|
128
|
-
|
129
|
-
def spans_preprocess_fullname(span, contrib)
|
130
|
-
name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
|
131
|
-
out = { role: span[:type] || "author", entity: "person",
|
132
|
-
surname: name[-1] }
|
133
|
-
if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
|
134
|
-
out[:"formatted-initials"] = name[0..-2].join(" ")
|
135
|
-
else
|
136
|
-
out[:givenname] = name[0..-2]
|
137
|
-
end
|
138
|
-
contrib << out
|
139
|
-
contrib
|
140
|
-
end
|
141
|
-
|
142
|
-
def spans_preprocess_org(span, contrib)
|
143
|
-
contrib << { role: span[:type] || "author", entity: "organization",
|
144
|
-
name: span[:val] }
|
145
|
-
contrib
|
146
|
-
end
|
147
|
-
|
148
|
-
def spans_to_bibitem(spans)
|
149
|
-
ret = ""
|
150
|
-
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
151
|
-
ret += spans_to_bibitem_docid(spans)
|
152
|
-
spans[:contrib].each { |s| ret += span_to_contrib(s, spans[:title]) }
|
153
|
-
spans[:series] and
|
154
|
-
ret += "<series><title>#{spans[:series]}</title></series>"
|
155
|
-
spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
|
156
|
-
ret += spans_to_bibitem_host(spans)
|
157
|
-
ret + spans_to_bibitem_extent(spans[:extent])
|
158
|
-
end
|
159
|
-
|
160
|
-
def spans_to_bibitem_host(spans)
|
161
|
-
return "" if spans[:in].empty?
|
162
|
-
|
163
|
-
ret =
|
164
|
-
"<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
|
165
|
-
spans[:in].delete(:type)
|
166
|
-
ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
|
167
|
-
end
|
168
|
-
|
169
|
-
def spans_to_bibitem_docid(spans)
|
170
|
-
ret = ""
|
171
|
-
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
172
|
-
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
173
|
-
spans[:date].each { |s| ret += span_to_date(s) }
|
174
|
-
ret
|
175
|
-
end
|
176
|
-
|
177
|
-
def spans_to_bibitem_extent(spans)
|
178
|
-
ret = ""
|
179
|
-
{ volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
|
180
|
-
spans[k]&.each { |s| ret += span_to_extent(s, v) }
|
181
|
-
end
|
182
|
-
return "" if ret.empty?
|
183
|
-
|
184
|
-
"<extent>#{ret}</extent>"
|
185
|
-
end
|
186
|
-
|
187
|
-
def span_to_extent(span, key)
|
188
|
-
values = span.split(/[-–]/)
|
189
|
-
ret = "<locality type='#{key}'>" \
|
190
|
-
"<referenceFrom>#{values[0]}</referenceFrom>"
|
191
|
-
values[1] and
|
192
|
-
ret += "<referenceTo>#{values[1]}</referenceTo>"
|
193
|
-
"#{ret}</locality>"
|
194
|
-
end
|
195
|
-
|
196
|
-
def span_to_docid(span, key)
|
197
|
-
if span[:type]
|
198
|
-
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
199
|
-
else
|
200
|
-
"<#{key}>#{span[:val]}</#{key}>"
|
201
|
-
end
|
202
|
-
end
|
203
|
-
|
204
|
-
def span_to_date(span)
|
205
|
-
val = if /[-–](?=\d{4})/.match?(span[:val])
|
206
|
-
from, to = span[:val].split(/[-–](?=\d{4})/, 2)
|
207
|
-
"<from>#{from}</from><to>#{to}</to>"
|
208
|
-
else
|
209
|
-
"<on>#{span[:val]}</on>"
|
210
|
-
end
|
211
|
-
type = span[:type] ? " type='#{span[:type]}'" : ""
|
212
|
-
"<date#{type}>#{val}</date>"
|
213
|
-
end
|
214
|
-
|
215
|
-
def span_to_contrib(span, title)
|
216
|
-
e = if span[:entity] == "organization"
|
217
|
-
"<organization><name>#{span[:name]}</name></organization>"
|
218
|
-
else span_to_person(span, title)
|
219
|
-
end
|
220
|
-
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
221
|
-
end
|
222
|
-
|
223
|
-
def validate_span_to_person(span, title)
|
224
|
-
span[:surname] and return
|
225
|
-
msg = "Missing surname: issue with bibliographic markup " \
|
226
|
-
"in \"#{title}\": #{span}"
|
227
|
-
@log.add("Bibliography", nil, msg)
|
228
|
-
@fatalerror << msg
|
229
|
-
end
|
230
|
-
|
231
|
-
def span_to_person(span, title)
|
232
|
-
validate_span_to_person(span, title)
|
233
|
-
pre = (span[:"formatted-initials"] and
|
234
|
-
"<formatted-initials>" \
|
235
|
-
"#{span[:"formatted-initials"]}</formatted-initials>") ||
|
236
|
-
Array(span[:givenname]).map { |x| "<forename>#{x}</forename>" }.join
|
237
|
-
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
|
238
|
-
"</person>"
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|