metanorma-standoc 2.3.3 → 2.3.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,243 +0,0 @@
1
- module Metanorma
2
- module Standoc
3
- module Cleanup
4
- def formattedref_spans(xmldoc)
5
- xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
6
- spans = spans_preprocess(extract_content(b), b)
7
- ret = spans_to_bibitem(spans)
8
- spans[:type] and b["type"] = spans[:type]
9
- b << ret
10
- end
11
- end
12
-
13
- def extract_content(bib)
14
- extract_docid(bib) + extract_spans(bib)
15
- end
16
-
17
- def extract_spans(bib)
18
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
19
- next if s.at("./ancestor::span")
20
-
21
- extract_spans1(s, m)
22
- end
23
- end
24
-
25
- def extract_spans1(span, acc)
26
- keys = span["class"].split(".", 2)
27
- acc << { key: keys[0], type: keys[1],
28
- val: span.children.to_xml }
29
- (span["class"] == "type" and span.remove) or span.replace(span.children)
30
- end
31
-
32
- def extract_docid(bib)
33
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
34
- m << { key: "docid", type: d["type"], val: d.text }
35
- d.remove
36
- end
37
- end
38
-
39
- def empty_span_hash
40
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
41
- end
42
-
43
- def spans_preprocess(spans, bib)
44
- ret = empty_span_hash
45
- spans.each { |s| span_preprocess1(s, ret, bib) }
46
- host_rearrange(ret)
47
- end
48
-
49
- def span_preprocess1(span, ret, bib)
50
- case span[:key]
51
- when "uri", "docid"
52
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
53
- ret[span[:key].to_sym] << { type: span[:type], val: val }
54
- when "date"
55
- ret[span[:key].to_sym] << { type: span[:type] || "published",
56
- val: span[:val] }
57
- when "pages", "volume", "issue"
58
- ret[:extent][span[:key].to_sym] ||= []
59
- ret[:extent][span[:key].to_sym] << span[:val]
60
- when "pubplace", "title", "type", "series"
61
- ret[span[:key].to_sym] = span[:val]
62
- when "in_title"
63
- ret[:in][:title] = span[:val]
64
- when "publisher"
65
- ret[:contrib] << { role: "publisher", entity: "organization",
66
- name: span[:val] }
67
- when "surname", "initials", "givenname", "formatted-initials"
68
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
69
- when "fullname"
70
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
71
- when "organization"
72
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
73
- when "in_surname", "in_initials", "in_givenname",
74
- "in_formatted-initials"
75
- ret[:in][:contrib] ||= []
76
- span[:key].sub!(/^in_/, "")
77
- ret[:in][:contrib] =
78
- spans_preprocess_contrib(span, ret[:in][:contrib])
79
- when "in_fullname"
80
- ret[:in][:contrib] ||= []
81
- span[:key].sub!(/^in_/, "")
82
- ret[:in][:contrib] =
83
- spans_preprocess_fullname(span, ret[:in][:contrib])
84
- when "in_organization"
85
- ret[:in][:contrib] ||= []
86
- span[:key].sub!(/^in_/, "")
87
- ret[:in][:contrib] =
88
- spans_preprocess_org(span, ret[:in][:contrib])
89
- else
90
- msg = "unrecognised key '#{span[:key]}' in `span:#{span[:key]}[#{span[:val]}]`"
91
- @log.add("Bibliography", bib, msg)
92
- end
93
- end
94
-
95
- def host_rearrange(ret)
96
- ret[:in][:title] or return ret
97
- ret[:in].merge!(empty_span_hash, { type: "misc" }) { |_, old, _| old }
98
-
99
- %i(series).each do |k|
100
- ret[:in][k] = ret[k]
101
- ret.delete(k)
102
- end
103
- /^in/.match?(ret[:type]) and ret[:in][:type] =
104
- ret[:type].sub(/^in/, "")
105
- ret
106
- end
107
-
108
- def spans_preprocess_contrib(span, contrib)
109
- span[:key] = "formatted-initials" if span[:key] == "initials"
110
-
111
- spans_preprocess_new_contrib?(span, contrib) and
112
- contrib << { role: span[:type] || "author", entity: "person" }
113
- if span[:key] == "givenname" && contrib[-1][span[:key].to_sym]
114
- contrib[-1][span[:key].to_sym] =
115
- Array(contrib[-1][span[:key].to_sym]) + span[:val]
116
- else
117
- contrib[-1][span[:key].to_sym] = span[:val]
118
- end
119
- contrib
120
- end
121
-
122
- def spans_preprocess_new_contrib?(span, contrib)
123
- contrib.empty? ||
124
- (if span[:key] == "surname" then contrib[-1][:surname]
125
- else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
126
- end) ||
127
- contrib[-1][:role] != (span[:type] || "author")
128
- end
129
-
130
- def spans_preprocess_fullname(span, contrib)
131
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
132
- out = { role: span[:type] || "author", entity: "person",
133
- surname: name[-1] }
134
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
135
- out[:"formatted-initials"] = name[0..-2].join(" ")
136
- else
137
- out[:givenname] = name[0..-2]
138
- end
139
- contrib << out
140
- contrib
141
- end
142
-
143
- def spans_preprocess_org(span, contrib)
144
- contrib << { role: span[:type] || "author", entity: "organization",
145
- name: span[:val] }
146
- contrib
147
- end
148
-
149
- def spans_to_bibitem(spans)
150
- ret = ""
151
- spans[:title] and ret += "<title>#{spans[:title]}</title>"
152
- ret += spans_to_bibitem_docid(spans)
153
- spans[:contrib].each { |s| ret += span_to_contrib(s, spans[:title]) }
154
- spans[:series] and
155
- ret += "<series><title>#{spans[:series]}</title></series>"
156
- spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
157
- ret += spans_to_bibitem_host(spans)
158
- ret + spans_to_bibitem_extent(spans[:extent])
159
- end
160
-
161
- def spans_to_bibitem_host(spans)
162
- return "" if spans[:in].empty?
163
-
164
- ret =
165
- "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
166
- spans[:in].delete(:type)
167
- ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
168
- end
169
-
170
- def spans_to_bibitem_docid(spans)
171
- ret = ""
172
- spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
173
- spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
174
- spans[:date].each { |s| ret += span_to_date(s) }
175
- ret
176
- end
177
-
178
- def spans_to_bibitem_extent(spans)
179
- ret = ""
180
- { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
181
- spans[k]&.each { |s| ret += span_to_extent(s, v) }
182
- end
183
- return "" if ret.empty?
184
-
185
- "<extent>#{ret}</extent>"
186
- end
187
-
188
- def span_to_extent(span, key)
189
- values = span.split(/[-–]/)
190
- ret = "<locality type='#{key}'>" \
191
- "<referenceFrom>#{values[0]}</referenceFrom>"
192
- values[1] and
193
- ret += "<referenceTo>#{values[1]}</referenceTo>"
194
- "#{ret}</locality>"
195
- end
196
-
197
- def span_to_docid(span, key)
198
- if span[:type]
199
- "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
200
- else
201
- "<#{key}>#{span[:val]}</#{key}>"
202
- end
203
- end
204
-
205
- def span_to_date(span)
206
- val = if /[-–](?=\d{4})/.match?(span[:val])
207
- from, to = span[:val].split(/[-–](?=\d{4})/, 2)
208
- "<from>#{from}</from><to>#{to}</to>"
209
- else
210
- "<on>#{span[:val]}</on>"
211
- end
212
- type = span[:type] ? " type='#{span[:type]}'" : ""
213
- "<date#{type}>#{val}</date>"
214
- end
215
-
216
- def span_to_contrib(span, title)
217
- e = if span[:entity] == "organization"
218
- "<organization><name>#{span[:name]}</name></organization>"
219
- else span_to_person(span, title)
220
- end
221
- "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
222
- end
223
-
224
- def validate_span_to_person(span, title)
225
- span[:surname] and return
226
- msg = "Missing surname: issue with bibliographic markup " \
227
- "in \"#{title}\": #{span}"
228
- @log.add("Bibliography", nil, msg)
229
- @fatalerror << msg
230
- end
231
-
232
- def span_to_person(span, title)
233
- validate_span_to_person(span, title)
234
- pre = (span[:"formatted-initials"] and
235
- "<formatted-initials>" \
236
- "#{span[:"formatted-initials"]}</formatted-initials>") ||
237
- Array(span[:givenname]).map { |x| "<forename>#{x}</forename>" }.join
238
- "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
239
- "</person>"
240
- end
241
- end
242
- end
243
- end