metanorma-standoc 2.3.3 → 2.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/isodoc/html/htmlstyle.css +30 -1
- data/lib/metanorma/standoc/blocks.rb +2 -1
- data/lib/metanorma/standoc/cleanup.rb +0 -1
- data/lib/metanorma/standoc/cleanup_asciibib.rb +0 -8
- data/lib/metanorma/standoc/cleanup_maths.rb +44 -80
- data/lib/metanorma/standoc/cleanup_ref.rb +33 -12
- data/lib/metanorma/standoc/cleanup_section_names.rb +2 -4
- data/lib/metanorma/standoc/front_contributor.rb +5 -5
- data/lib/metanorma/standoc/inline.rb +7 -8
- data/lib/metanorma/standoc/merge_bibitems.rb +107 -0
- data/lib/metanorma/standoc/ref.rb +9 -10
- data/lib/metanorma/standoc/ref_sect.rb +10 -1
- data/lib/metanorma/standoc/ref_utility.rb +1 -2
- data/lib/metanorma/standoc/render.rb +6 -6
- data/lib/metanorma/standoc/section.rb +2 -1
- data/lib/metanorma/standoc/spans_to_bibitem.rb +261 -0
- data/lib/metanorma/standoc/utils.rb +8 -0
- data/lib/metanorma/standoc/validate.rb +55 -23
- data/lib/metanorma/standoc/version.rb +1 -1
- metadata +4 -3
- data/lib/metanorma/standoc/cleanup_biblio.rb +0 -243
@@ -1,243 +0,0 @@
|
|
1
|
-
module Metanorma
|
2
|
-
module Standoc
|
3
|
-
module Cleanup
|
4
|
-
def formattedref_spans(xmldoc)
|
5
|
-
xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
|
6
|
-
spans = spans_preprocess(extract_content(b), b)
|
7
|
-
ret = spans_to_bibitem(spans)
|
8
|
-
spans[:type] and b["type"] = spans[:type]
|
9
|
-
b << ret
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def extract_content(bib)
|
14
|
-
extract_docid(bib) + extract_spans(bib)
|
15
|
-
end
|
16
|
-
|
17
|
-
def extract_spans(bib)
|
18
|
-
bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
|
19
|
-
next if s.at("./ancestor::span")
|
20
|
-
|
21
|
-
extract_spans1(s, m)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def extract_spans1(span, acc)
|
26
|
-
keys = span["class"].split(".", 2)
|
27
|
-
acc << { key: keys[0], type: keys[1],
|
28
|
-
val: span.children.to_xml }
|
29
|
-
(span["class"] == "type" and span.remove) or span.replace(span.children)
|
30
|
-
end
|
31
|
-
|
32
|
-
def extract_docid(bib)
|
33
|
-
bib.xpath("./docidentifier").each_with_object([]) do |d, m|
|
34
|
-
m << { key: "docid", type: d["type"], val: d.text }
|
35
|
-
d.remove
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def empty_span_hash
|
40
|
-
{ contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
|
41
|
-
end
|
42
|
-
|
43
|
-
def spans_preprocess(spans, bib)
|
44
|
-
ret = empty_span_hash
|
45
|
-
spans.each { |s| span_preprocess1(s, ret, bib) }
|
46
|
-
host_rearrange(ret)
|
47
|
-
end
|
48
|
-
|
49
|
-
def span_preprocess1(span, ret, bib)
|
50
|
-
case span[:key]
|
51
|
-
when "uri", "docid"
|
52
|
-
val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
|
53
|
-
ret[span[:key].to_sym] << { type: span[:type], val: val }
|
54
|
-
when "date"
|
55
|
-
ret[span[:key].to_sym] << { type: span[:type] || "published",
|
56
|
-
val: span[:val] }
|
57
|
-
when "pages", "volume", "issue"
|
58
|
-
ret[:extent][span[:key].to_sym] ||= []
|
59
|
-
ret[:extent][span[:key].to_sym] << span[:val]
|
60
|
-
when "pubplace", "title", "type", "series"
|
61
|
-
ret[span[:key].to_sym] = span[:val]
|
62
|
-
when "in_title"
|
63
|
-
ret[:in][:title] = span[:val]
|
64
|
-
when "publisher"
|
65
|
-
ret[:contrib] << { role: "publisher", entity: "organization",
|
66
|
-
name: span[:val] }
|
67
|
-
when "surname", "initials", "givenname", "formatted-initials"
|
68
|
-
ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
|
69
|
-
when "fullname"
|
70
|
-
ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
|
71
|
-
when "organization"
|
72
|
-
ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
|
73
|
-
when "in_surname", "in_initials", "in_givenname",
|
74
|
-
"in_formatted-initials"
|
75
|
-
ret[:in][:contrib] ||= []
|
76
|
-
span[:key].sub!(/^in_/, "")
|
77
|
-
ret[:in][:contrib] =
|
78
|
-
spans_preprocess_contrib(span, ret[:in][:contrib])
|
79
|
-
when "in_fullname"
|
80
|
-
ret[:in][:contrib] ||= []
|
81
|
-
span[:key].sub!(/^in_/, "")
|
82
|
-
ret[:in][:contrib] =
|
83
|
-
spans_preprocess_fullname(span, ret[:in][:contrib])
|
84
|
-
when "in_organization"
|
85
|
-
ret[:in][:contrib] ||= []
|
86
|
-
span[:key].sub!(/^in_/, "")
|
87
|
-
ret[:in][:contrib] =
|
88
|
-
spans_preprocess_org(span, ret[:in][:contrib])
|
89
|
-
else
|
90
|
-
msg = "unrecognised key '#{span[:key]}' in `span:#{span[:key]}[#{span[:val]}]`"
|
91
|
-
@log.add("Bibliography", bib, msg)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
def host_rearrange(ret)
|
96
|
-
ret[:in][:title] or return ret
|
97
|
-
ret[:in].merge!(empty_span_hash, { type: "misc" }) { |_, old, _| old }
|
98
|
-
|
99
|
-
%i(series).each do |k|
|
100
|
-
ret[:in][k] = ret[k]
|
101
|
-
ret.delete(k)
|
102
|
-
end
|
103
|
-
/^in/.match?(ret[:type]) and ret[:in][:type] =
|
104
|
-
ret[:type].sub(/^in/, "")
|
105
|
-
ret
|
106
|
-
end
|
107
|
-
|
108
|
-
def spans_preprocess_contrib(span, contrib)
|
109
|
-
span[:key] = "formatted-initials" if span[:key] == "initials"
|
110
|
-
|
111
|
-
spans_preprocess_new_contrib?(span, contrib) and
|
112
|
-
contrib << { role: span[:type] || "author", entity: "person" }
|
113
|
-
if span[:key] == "givenname" && contrib[-1][span[:key].to_sym]
|
114
|
-
contrib[-1][span[:key].to_sym] =
|
115
|
-
Array(contrib[-1][span[:key].to_sym]) + span[:val]
|
116
|
-
else
|
117
|
-
contrib[-1][span[:key].to_sym] = span[:val]
|
118
|
-
end
|
119
|
-
contrib
|
120
|
-
end
|
121
|
-
|
122
|
-
def spans_preprocess_new_contrib?(span, contrib)
|
123
|
-
contrib.empty? ||
|
124
|
-
(if span[:key] == "surname" then contrib[-1][:surname]
|
125
|
-
else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
|
126
|
-
end) ||
|
127
|
-
contrib[-1][:role] != (span[:type] || "author")
|
128
|
-
end
|
129
|
-
|
130
|
-
def spans_preprocess_fullname(span, contrib)
|
131
|
-
name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
|
132
|
-
out = { role: span[:type] || "author", entity: "person",
|
133
|
-
surname: name[-1] }
|
134
|
-
if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
|
135
|
-
out[:"formatted-initials"] = name[0..-2].join(" ")
|
136
|
-
else
|
137
|
-
out[:givenname] = name[0..-2]
|
138
|
-
end
|
139
|
-
contrib << out
|
140
|
-
contrib
|
141
|
-
end
|
142
|
-
|
143
|
-
def spans_preprocess_org(span, contrib)
|
144
|
-
contrib << { role: span[:type] || "author", entity: "organization",
|
145
|
-
name: span[:val] }
|
146
|
-
contrib
|
147
|
-
end
|
148
|
-
|
149
|
-
def spans_to_bibitem(spans)
|
150
|
-
ret = ""
|
151
|
-
spans[:title] and ret += "<title>#{spans[:title]}</title>"
|
152
|
-
ret += spans_to_bibitem_docid(spans)
|
153
|
-
spans[:contrib].each { |s| ret += span_to_contrib(s, spans[:title]) }
|
154
|
-
spans[:series] and
|
155
|
-
ret += "<series><title>#{spans[:series]}</title></series>"
|
156
|
-
spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
|
157
|
-
ret += spans_to_bibitem_host(spans)
|
158
|
-
ret + spans_to_bibitem_extent(spans[:extent])
|
159
|
-
end
|
160
|
-
|
161
|
-
def spans_to_bibitem_host(spans)
|
162
|
-
return "" if spans[:in].empty?
|
163
|
-
|
164
|
-
ret =
|
165
|
-
"<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
|
166
|
-
spans[:in].delete(:type)
|
167
|
-
ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
|
168
|
-
end
|
169
|
-
|
170
|
-
def spans_to_bibitem_docid(spans)
|
171
|
-
ret = ""
|
172
|
-
spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
|
173
|
-
spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
|
174
|
-
spans[:date].each { |s| ret += span_to_date(s) }
|
175
|
-
ret
|
176
|
-
end
|
177
|
-
|
178
|
-
def spans_to_bibitem_extent(spans)
|
179
|
-
ret = ""
|
180
|
-
{ volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
|
181
|
-
spans[k]&.each { |s| ret += span_to_extent(s, v) }
|
182
|
-
end
|
183
|
-
return "" if ret.empty?
|
184
|
-
|
185
|
-
"<extent>#{ret}</extent>"
|
186
|
-
end
|
187
|
-
|
188
|
-
def span_to_extent(span, key)
|
189
|
-
values = span.split(/[-–]/)
|
190
|
-
ret = "<locality type='#{key}'>" \
|
191
|
-
"<referenceFrom>#{values[0]}</referenceFrom>"
|
192
|
-
values[1] and
|
193
|
-
ret += "<referenceTo>#{values[1]}</referenceTo>"
|
194
|
-
"#{ret}</locality>"
|
195
|
-
end
|
196
|
-
|
197
|
-
def span_to_docid(span, key)
|
198
|
-
if span[:type]
|
199
|
-
"<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
|
200
|
-
else
|
201
|
-
"<#{key}>#{span[:val]}</#{key}>"
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
def span_to_date(span)
|
206
|
-
val = if /[-–](?=\d{4})/.match?(span[:val])
|
207
|
-
from, to = span[:val].split(/[-–](?=\d{4})/, 2)
|
208
|
-
"<from>#{from}</from><to>#{to}</to>"
|
209
|
-
else
|
210
|
-
"<on>#{span[:val]}</on>"
|
211
|
-
end
|
212
|
-
type = span[:type] ? " type='#{span[:type]}'" : ""
|
213
|
-
"<date#{type}>#{val}</date>"
|
214
|
-
end
|
215
|
-
|
216
|
-
def span_to_contrib(span, title)
|
217
|
-
e = if span[:entity] == "organization"
|
218
|
-
"<organization><name>#{span[:name]}</name></organization>"
|
219
|
-
else span_to_person(span, title)
|
220
|
-
end
|
221
|
-
"<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
|
222
|
-
end
|
223
|
-
|
224
|
-
def validate_span_to_person(span, title)
|
225
|
-
span[:surname] and return
|
226
|
-
msg = "Missing surname: issue with bibliographic markup " \
|
227
|
-
"in \"#{title}\": #{span}"
|
228
|
-
@log.add("Bibliography", nil, msg)
|
229
|
-
@fatalerror << msg
|
230
|
-
end
|
231
|
-
|
232
|
-
def span_to_person(span, title)
|
233
|
-
validate_span_to_person(span, title)
|
234
|
-
pre = (span[:"formatted-initials"] and
|
235
|
-
"<formatted-initials>" \
|
236
|
-
"#{span[:"formatted-initials"]}</formatted-initials>") ||
|
237
|
-
Array(span[:givenname]).map { |x| "<forename>#{x}</forename>" }.join
|
238
|
-
"<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
|
239
|
-
"</person>"
|
240
|
-
end
|
241
|
-
end
|
242
|
-
end
|
243
|
-
end
|