metanorma-standoc 2.3.3 → 2.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,243 +0,0 @@
1
- module Metanorma
2
- module Standoc
3
- module Cleanup
4
- def formattedref_spans(xmldoc)
5
- xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
6
- spans = spans_preprocess(extract_content(b), b)
7
- ret = spans_to_bibitem(spans)
8
- spans[:type] and b["type"] = spans[:type]
9
- b << ret
10
- end
11
- end
12
-
13
- def extract_content(bib)
14
- extract_docid(bib) + extract_spans(bib)
15
- end
16
-
17
- def extract_spans(bib)
18
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
19
- next if s.at("./ancestor::span")
20
-
21
- extract_spans1(s, m)
22
- end
23
- end
24
-
25
- def extract_spans1(span, acc)
26
- keys = span["class"].split(".", 2)
27
- acc << { key: keys[0], type: keys[1],
28
- val: span.children.to_xml }
29
- (span["class"] == "type" and span.remove) or span.replace(span.children)
30
- end
31
-
32
- def extract_docid(bib)
33
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
34
- m << { key: "docid", type: d["type"], val: d.text }
35
- d.remove
36
- end
37
- end
38
-
39
- def empty_span_hash
40
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
41
- end
42
-
43
- def spans_preprocess(spans, bib)
44
- ret = empty_span_hash
45
- spans.each { |s| span_preprocess1(s, ret, bib) }
46
- host_rearrange(ret)
47
- end
48
-
49
- def span_preprocess1(span, ret, bib)
50
- case span[:key]
51
- when "uri", "docid"
52
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
53
- ret[span[:key].to_sym] << { type: span[:type], val: val }
54
- when "date"
55
- ret[span[:key].to_sym] << { type: span[:type] || "published",
56
- val: span[:val] }
57
- when "pages", "volume", "issue"
58
- ret[:extent][span[:key].to_sym] ||= []
59
- ret[:extent][span[:key].to_sym] << span[:val]
60
- when "pubplace", "title", "type", "series"
61
- ret[span[:key].to_sym] = span[:val]
62
- when "in_title"
63
- ret[:in][:title] = span[:val]
64
- when "publisher"
65
- ret[:contrib] << { role: "publisher", entity: "organization",
66
- name: span[:val] }
67
- when "surname", "initials", "givenname", "formatted-initials"
68
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
69
- when "fullname"
70
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
71
- when "organization"
72
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
73
- when "in_surname", "in_initials", "in_givenname",
74
- "in_formatted-initials"
75
- ret[:in][:contrib] ||= []
76
- span[:key].sub!(/^in_/, "")
77
- ret[:in][:contrib] =
78
- spans_preprocess_contrib(span, ret[:in][:contrib])
79
- when "in_fullname"
80
- ret[:in][:contrib] ||= []
81
- span[:key].sub!(/^in_/, "")
82
- ret[:in][:contrib] =
83
- spans_preprocess_fullname(span, ret[:in][:contrib])
84
- when "in_organization"
85
- ret[:in][:contrib] ||= []
86
- span[:key].sub!(/^in_/, "")
87
- ret[:in][:contrib] =
88
- spans_preprocess_org(span, ret[:in][:contrib])
89
- else
90
- msg = "unrecognised key '#{span[:key]}' in `span:#{span[:key]}[#{span[:val]}]`"
91
- @log.add("Bibliography", bib, msg)
92
- end
93
- end
94
-
95
- def host_rearrange(ret)
96
- ret[:in][:title] or return ret
97
- ret[:in].merge!(empty_span_hash, { type: "misc" }) { |_, old, _| old }
98
-
99
- %i(series).each do |k|
100
- ret[:in][k] = ret[k]
101
- ret.delete(k)
102
- end
103
- /^in/.match?(ret[:type]) and ret[:in][:type] =
104
- ret[:type].sub(/^in/, "")
105
- ret
106
- end
107
-
108
- def spans_preprocess_contrib(span, contrib)
109
- span[:key] = "formatted-initials" if span[:key] == "initials"
110
-
111
- spans_preprocess_new_contrib?(span, contrib) and
112
- contrib << { role: span[:type] || "author", entity: "person" }
113
- if span[:key] == "givenname" && contrib[-1][span[:key].to_sym]
114
- contrib[-1][span[:key].to_sym] =
115
- Array(contrib[-1][span[:key].to_sym]) + span[:val]
116
- else
117
- contrib[-1][span[:key].to_sym] = span[:val]
118
- end
119
- contrib
120
- end
121
-
122
- def spans_preprocess_new_contrib?(span, contrib)
123
- contrib.empty? ||
124
- (if span[:key] == "surname" then contrib[-1][:surname]
125
- else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
126
- end) ||
127
- contrib[-1][:role] != (span[:type] || "author")
128
- end
129
-
130
- def spans_preprocess_fullname(span, contrib)
131
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
132
- out = { role: span[:type] || "author", entity: "person",
133
- surname: name[-1] }
134
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
135
- out[:"formatted-initials"] = name[0..-2].join(" ")
136
- else
137
- out[:givenname] = name[0..-2]
138
- end
139
- contrib << out
140
- contrib
141
- end
142
-
143
- def spans_preprocess_org(span, contrib)
144
- contrib << { role: span[:type] || "author", entity: "organization",
145
- name: span[:val] }
146
- contrib
147
- end
148
-
149
- def spans_to_bibitem(spans)
150
- ret = ""
151
- spans[:title] and ret += "<title>#{spans[:title]}</title>"
152
- ret += spans_to_bibitem_docid(spans)
153
- spans[:contrib].each { |s| ret += span_to_contrib(s, spans[:title]) }
154
- spans[:series] and
155
- ret += "<series><title>#{spans[:series]}</title></series>"
156
- spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
157
- ret += spans_to_bibitem_host(spans)
158
- ret + spans_to_bibitem_extent(spans[:extent])
159
- end
160
-
161
- def spans_to_bibitem_host(spans)
162
- return "" if spans[:in].empty?
163
-
164
- ret =
165
- "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
166
- spans[:in].delete(:type)
167
- ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
168
- end
169
-
170
- def spans_to_bibitem_docid(spans)
171
- ret = ""
172
- spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
173
- spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
174
- spans[:date].each { |s| ret += span_to_date(s) }
175
- ret
176
- end
177
-
178
- def spans_to_bibitem_extent(spans)
179
- ret = ""
180
- { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
181
- spans[k]&.each { |s| ret += span_to_extent(s, v) }
182
- end
183
- return "" if ret.empty?
184
-
185
- "<extent>#{ret}</extent>"
186
- end
187
-
188
- def span_to_extent(span, key)
189
- values = span.split(/[-–]/)
190
- ret = "<locality type='#{key}'>" \
191
- "<referenceFrom>#{values[0]}</referenceFrom>"
192
- values[1] and
193
- ret += "<referenceTo>#{values[1]}</referenceTo>"
194
- "#{ret}</locality>"
195
- end
196
-
197
- def span_to_docid(span, key)
198
- if span[:type]
199
- "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
200
- else
201
- "<#{key}>#{span[:val]}</#{key}>"
202
- end
203
- end
204
-
205
- def span_to_date(span)
206
- val = if /[-–](?=\d{4})/.match?(span[:val])
207
- from, to = span[:val].split(/[-–](?=\d{4})/, 2)
208
- "<from>#{from}</from><to>#{to}</to>"
209
- else
210
- "<on>#{span[:val]}</on>"
211
- end
212
- type = span[:type] ? " type='#{span[:type]}'" : ""
213
- "<date#{type}>#{val}</date>"
214
- end
215
-
216
- def span_to_contrib(span, title)
217
- e = if span[:entity] == "organization"
218
- "<organization><name>#{span[:name]}</name></organization>"
219
- else span_to_person(span, title)
220
- end
221
- "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
222
- end
223
-
224
- def validate_span_to_person(span, title)
225
- span[:surname] and return
226
- msg = "Missing surname: issue with bibliographic markup " \
227
- "in \"#{title}\": #{span}"
228
- @log.add("Bibliography", nil, msg)
229
- @fatalerror << msg
230
- end
231
-
232
- def span_to_person(span, title)
233
- validate_span_to_person(span, title)
234
- pre = (span[:"formatted-initials"] and
235
- "<formatted-initials>" \
236
- "#{span[:"formatted-initials"]}</formatted-initials>") ||
237
- Array(span[:givenname]).map { |x| "<forename>#{x}</forename>" }.join
238
- "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
239
- "</person>"
240
- end
241
- end
242
- end
243
- end