metanorma-standoc 2.3.3 → 2.3.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -53,7 +53,8 @@ module Metanorma
53
53
  end
54
54
 
55
55
  PREFACE_CLAUSE_NAMES =
56
- %w(abstract foreword introduction misc-container acknowledgements).freeze
56
+ %w(abstract foreword introduction misc-container
57
+ acknowledgements).freeze
57
58
 
58
59
  MAIN_CLAUSE_NAMES =
59
60
  ["normative references", "terms and definitions", "scope",
@@ -0,0 +1,261 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ class SpansToBibitem
5
+ include ::Metanorma::Standoc::Utils
6
+
7
+ attr_reader :err, :out
8
+
9
+ def initialize(bib)
10
+ @bib = bib
11
+ @err = []
12
+ @spans = spans_preprocess(extract_content(bib))
13
+ end
14
+
15
+ def extract_content(bib)
16
+ extract_docid(bib) + extract_spans(bib)
17
+ end
18
+
19
+ def extract_spans(bib)
20
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
21
+ s.at("./ancestor::span") and next
22
+ extract_spans1(s, m)
23
+ end
24
+ end
25
+
26
+ def extract_spans1(span, acc)
27
+ keys = span["class"].split(".", 2)
28
+ acc << { key: keys[0], type: keys[1],
29
+ val: span.children.to_xml }
30
+ (span["class"] == "type" and span.remove) or
31
+ span.replace(span.children)
32
+ end
33
+
34
+ def extract_docid(bib)
35
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
36
+ m << { key: "docid", type: d["type"], val: d.text }
37
+ d.remove unless bib.at("./title")
38
+ end
39
+ end
40
+
41
+ def empty_span_hash
42
+ { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
43
+ end
44
+
45
+ def spans_preprocess(spans)
46
+ ret = empty_span_hash
47
+ spans.each { |s| span_preprocess1(s, ret) }
48
+ host_rearrange(ret)
49
+ end
50
+
51
+ def span_preprocess1(span, ret)
52
+ case span[:key]
53
+ when "uri", "docid"
54
+ val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
55
+ ret[span[:key].to_sym] << { type: span[:type], val: val }
56
+ when "date"
57
+ ret[span[:key].to_sym] << { type: span[:type] || "published",
58
+ val: span[:val] }
59
+ when "pages", "volume", "issue"
60
+ ret[:extent][span[:key].to_sym] ||= []
61
+ ret[:extent][span[:key].to_sym] << span[:val]
62
+ when "pubplace", "title", "type", "series"
63
+ ret[span[:key].to_sym] = span[:val]
64
+ when "in_title"
65
+ ret[:in][:title] = span[:val]
66
+ when "publisher"
67
+ ret[:contrib] << { role: "publisher", entity: "organization",
68
+ name: span[:val] }
69
+ when "surname", "initials", "givenname", "formatted-initials"
70
+ ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
71
+ when "fullname"
72
+ ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
73
+ when "organization"
74
+ ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
75
+ when "in_surname", "in_initials", "in_givenname",
76
+ "in_formatted-initials"
77
+ ret[:in][:contrib] ||= []
78
+ span[:key].sub!(/^in_/, "")
79
+ ret[:in][:contrib] =
80
+ spans_preprocess_contrib(span, ret[:in][:contrib])
81
+ when "in_fullname"
82
+ ret[:in][:contrib] ||= []
83
+ span[:key].sub!(/^in_/, "")
84
+ ret[:in][:contrib] =
85
+ spans_preprocess_fullname(span, ret[:in][:contrib])
86
+ when "in_organization"
87
+ ret[:in][:contrib] ||= []
88
+ span[:key].sub!(/^in_/, "")
89
+ ret[:in][:contrib] =
90
+ spans_preprocess_org(span, ret[:in][:contrib])
91
+ else
92
+ msg = "unrecognised key '#{span[:key]}' in " \
93
+ "`span:#{span[:key]}[#{span[:val]}]`"
94
+ @err << { msg: msg }
95
+ end
96
+ end
97
+
98
+ def host_rearrange(ret)
99
+ ret[:in][:title] or return ret
100
+ ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
101
+ old
102
+ end
103
+ %i(series).each do |k|
104
+ ret[:in][k] = ret[k]
105
+ ret.delete(k)
106
+ end
107
+ /^in/.match?(ret[:type]) and ret[:in][:type] =
108
+ ret[:type].sub(/^in/, "")
109
+ ret
110
+ end
111
+
112
+ def spans_preprocess_contrib(span, contrib)
113
+ span[:key] == "initials" and span[:key] = "formatted-initials"
114
+ spans_preprocess_new_contrib?(span, contrib) and
115
+ contrib << { role: span[:type] || "author", entity: "person" }
116
+ if multiple_givennames?(span, contrib)
117
+ contrib[-1][:givenname] = [contrib[-1][:givenname],
118
+ span[:val]].flatten
119
+ else contrib[-1][span[:key].to_sym] = span[:val]
120
+ end
121
+ contrib
122
+ end
123
+
124
+ def spans_preprocess_new_contrib?(span, contrib)
125
+ contrib.empty? ||
126
+ (span[:key] == "surname" && contrib[-1][:surname]) ||
127
+ contrib[-1][:role] != (span[:type] || "author")
128
+ end
129
+
130
+ def multiple_givennames?(span, contrib)
131
+ (%w(formatted-initials givenname).include?(span[:key]) &&
132
+ (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
133
+ return false
134
+ if contrib[-1][:"formatted-initials"]
135
+ contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
136
+ contrib[-1].delete(:"formatted-initials")
137
+ end
138
+ true
139
+ end
140
+
141
+ def spans_preprocess_fullname(span, contrib)
142
+ name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
143
+ out = { role: span[:type] || "author", entity: "person",
144
+ surname: name[-1] }
145
+ if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
146
+ out[:"formatted-initials"] = name[0..-2].join(" ")
147
+ else out[:givenname] = name[0..-2]
148
+ end
149
+ contrib << out
150
+ contrib
151
+ end
152
+
153
+ def spans_preprocess_org(span, contrib)
154
+ contrib << { role: span[:type] || "author", entity: "organization",
155
+ name: span[:val] }
156
+ contrib
157
+ end
158
+
159
+ def convert
160
+ ret = spans_to_bibitem(@spans)
161
+ @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
162
+ @spans[:type] and @out["type"] = @spans[:type]
163
+ self
164
+ end
165
+
166
+ def spans_to_bibitem(spans)
167
+ ret = ""
168
+ spans[:title] and ret += "<title>#{spans[:title]}</title>"
169
+ ret += spans_to_bibitem_docid(spans)
170
+ spans[:contrib].each do |s|
171
+ ret += span_to_contrib(s, spans[:title])
172
+ end
173
+ spans[:series] and
174
+ ret += "<series><title>#{spans[:series]}</title></series>"
175
+ spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
176
+ ret += spans_to_bibitem_host(spans)
177
+ ret += spans_to_bibitem_extent(spans[:extent])
178
+ ret
179
+ end
180
+
181
+ def spans_to_bibitem_host(spans)
182
+ spans[:in].empty? and return ""
183
+ ret =
184
+ "<relation type='includedIn'><bibitem type='#{spans[:in][:type]}'>"
185
+ spans[:in].delete(:type)
186
+ ret + "#{spans_to_bibitem(spans[:in])}</bibitem></relation>"
187
+ end
188
+
189
+ def spans_to_bibitem_docid(spans)
190
+ ret = ""
191
+ spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
192
+ spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
193
+ spans[:date].each { |s| ret += span_to_date(s) }
194
+ ret
195
+ end
196
+
197
+ def spans_to_bibitem_extent(spans)
198
+ ret = ""
199
+ { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
200
+ spans[k]&.each { |s| ret += span_to_extent(s, v) }
201
+ end
202
+ ret.empty? and return ""
203
+ "<extent>#{ret}</extent>"
204
+ end
205
+
206
+ def span_to_extent(span, key)
207
+ values = span.split(/[-–]/)
208
+ ret = "<locality type='#{key}'>" \
209
+ "<referenceFrom>#{values[0]}</referenceFrom>"
210
+ values[1] and
211
+ ret += "<referenceTo>#{values[1]}</referenceTo>"
212
+ "#{ret}</locality>"
213
+ end
214
+
215
+ def span_to_docid(span, key)
216
+ if span[:type]
217
+ "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
218
+ else "<#{key}>#{span[:val]}</#{key}>"
219
+ end
220
+ end
221
+
222
+ def span_to_date(span)
223
+ val = if /[-–](?=\d{4})/.match?(span[:val])
224
+ from, to = span[:val].split(/[-–](?=\d{4})/, 2)
225
+ "<from>#{from}</from><to>#{to}</to>"
226
+ else "<on>#{span[:val]}</on>"
227
+ end
228
+ type = span[:type] ? " type='#{span[:type]}'" : ""
229
+ "<date#{type}>#{val}</date>"
230
+ end
231
+
232
+ def span_to_contrib(span, title)
233
+ e = if span[:entity] == "organization"
234
+ "<organization><name>#{span[:name]}</name></organization>"
235
+ else span_to_person(span, title)
236
+ end
237
+ "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
238
+ end
239
+
240
+ def validate_span_to_person(span, title)
241
+ span[:surname] and return
242
+ msg = "Missing surname: issue with bibliographic markup " \
243
+ "in \"#{title}\": #{span}"
244
+ @err << { msg: msg, fatal: true }
245
+ end
246
+
247
+ def span_to_person(span, title)
248
+ validate_span_to_person(span, title)
249
+ pre = (span[:"formatted-initials"] and
250
+ "<formatted-initials>" \
251
+ "#{span[:"formatted-initials"]}</formatted-initials>") ||
252
+ Array(span[:givenname]).map do |x|
253
+ "<forename>#{x}</forename>"
254
+ end.join
255
+ "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>" \
256
+ "</person>"
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end
@@ -60,6 +60,14 @@ module Metanorma
60
60
  "<expression><name>#{elem}</name></expression>"
61
61
  end
62
62
 
63
+ def link_unwrap(para)
64
+ elems = para.elements
65
+ if elems.size == 1 && elems[0].name == "link"
66
+ para.at("./link").replace(elems[0]["target"].strip)
67
+ end
68
+ para
69
+ end
70
+
63
71
  class EmptyAttr
64
72
  def attr(_any_attribute)
65
73
  nil
@@ -13,9 +13,8 @@ module Metanorma
13
13
  "referenceFrom".freeze
14
14
 
15
15
  def init_iev
16
- return nil if @no_isobib
17
- return @iev if @iev
18
-
16
+ @no_isobib and return nil
17
+ @iev and return @iev
19
18
  @iev = Iev::Db.new(@iev_globalname, @iev_localname) unless @no_isobib
20
19
  @iev
21
20
  end
@@ -43,10 +42,11 @@ module Metanorma
43
42
  end
44
43
 
45
44
  def content_validate(doc)
46
- xref_validate(doc)
45
+ repeat_id_validate(doc.root) # feeds xref_validate
46
+ xref_validate(doc) # feeds nested_asset_validate
47
+ nested_asset_validate(doc)
47
48
  section_validate(doc)
48
49
  norm_ref_validate(doc)
49
- repeat_id_validate(doc.root)
50
50
  iev_validate(doc.root)
51
51
  concept_validate(doc, "concept", "refterm")
52
52
  concept_validate(doc, "related", "preferred//name")
@@ -57,12 +57,46 @@ module Metanorma
57
57
  clean_abort(@fatalerror.join("\n"), doc)
58
58
  end
59
59
 
60
+ def nested_asset_validate(doc)
61
+ nested_asset_validate_basic(doc)
62
+ nested_note_validate(doc)
63
+ end
64
+
65
+ def nested_asset_validate_basic(doc)
66
+ a = "//formula | //example | //figure | //termnote | //termexample | " \
67
+ "//table"
68
+ doc.xpath("#{a} | //note").each do |m|
69
+ m.xpath(a.gsub(%r{//}, ".//")).each do |n|
70
+ nested_asset_report(m, n, doc)
71
+ end
72
+ end
73
+ end
74
+
75
+ def nested_note_validate(doc)
76
+ doc.xpath("//termnote | //note").each do |m|
77
+ m.xpath(".//note").each do |n|
78
+ nested_asset_report(m, n, doc)
79
+ end
80
+ end
81
+ end
82
+
83
+ def nested_asset_report(outer, inner, _doc)
84
+ outer.name == "figure" && inner.name == "figure" and return
85
+ err =
86
+ "There is an instance of #{inner.name} nested within #{outer.name}"
87
+ @log.add("Syntax", inner, err)
88
+ i = @doc_xrefs[inner["id"]] or return
89
+ err2 = "There is a crossreference to an instance of #{inner.name} " \
90
+ "nested within #{outer.name}: #{i.to_xml}"
91
+ @log.add("Style", i, err2)
92
+ @fatalerror << err2
93
+ end
94
+
60
95
  def norm_ref_validate(doc)
61
96
  found = false
62
97
  doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
63
- next unless docid = b.at("./docidentifier[@type = 'metanorma']")
64
- next unless /^\[\d+\]$/.match?(docid.text)
65
-
98
+ docid = b.at("./docidentifier[@type = 'metanorma']") or next
99
+ /^\[\d+\]$/.match?(docid.text) or next
66
100
  @log.add("Bibliography", b,
67
101
  "Numeric reference in normative references")
68
102
  found = true
@@ -78,8 +112,8 @@ module Metanorma
78
112
  @log.add("Anchors", x, concept_validate_msg(doc, tag, refterm, x))
79
113
  found = true
80
114
  end
81
- found and
82
- @fatalerror << "#{tag.capitalize} not cross-referencing term or symbol"
115
+ found and @fatalerror << "#{tag.capitalize} not cross-referencing " \
116
+ "term or symbol"
83
117
  end
84
118
 
85
119
  def concept_validate_ids(doc)
@@ -101,21 +135,19 @@ module Metanorma
101
135
  ret
102
136
  end
103
137
 
104
- def repeat_id_validate1(ids, elem)
105
- if ids[elem["id"]]
138
+ def repeat_id_validate1(elem)
139
+ if @doc_ids[elem["id"]]
106
140
  @log.add("Anchors", elem, "Anchor #{elem['id']} has already been " \
107
- "used at line #{ids[elem['id']]}")
141
+ "used at line #{@doc_ids[elem['id']]}")
108
142
  @fatalerror << "Multiple instances of same ID: #{elem['id']}"
109
- else
110
- ids[elem["id"]] = elem.line
111
143
  end
112
- ids
144
+ @doc_ids[elem["id"]] = elem.line
113
145
  end
114
146
 
115
147
  def repeat_id_validate(doc)
116
- ids = {}
148
+ @doc_ids = {}
117
149
  doc.xpath("//*[@id]").each do |x|
118
- ids = repeat_id_validate1(ids, x)
150
+ repeat_id_validate1(x)
119
151
  end
120
152
  end
121
153
 
@@ -164,12 +196,12 @@ module Metanorma
164
196
 
165
197
  # manually check for xref/@target, xref/@to integrity
166
198
  def xref_validate(doc)
167
- ids = doc.xpath("//*/@id").each_with_object({}) { |x, m| m[x.text] = 1 }
168
- doc.xpath("//xref/@target | //xref/@to").each do |x|
169
- next if ids[x.text]
170
-
199
+ @doc_xrefs = doc.xpath("//xref/@target | //xref/@to")
200
+ .each_with_object({}) do |x, m|
201
+ m[x.text] = x
202
+ @doc_ids[x] and next
171
203
  @log.add("Anchors", x.parent,
172
- "Crossreference target #{x.text} is undefined")
204
+ "Crossreference target #{x} is undefined")
173
205
  end
174
206
  end
175
207
 
@@ -19,6 +19,6 @@ module Metanorma
19
19
  end
20
20
 
21
21
  module Standoc
22
- VERSION = "2.3.3".freeze
22
+ VERSION = "2.3.5".freeze
23
23
  end
24
24
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-standoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.3
4
+ version: 2.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-22 00:00:00.000000000 Z
11
+ date: 2022-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -475,7 +475,6 @@ files:
475
475
  - lib/metanorma/standoc/cleanup.rb
476
476
  - lib/metanorma/standoc/cleanup_amend.rb
477
477
  - lib/metanorma/standoc/cleanup_asciibib.rb
478
- - lib/metanorma/standoc/cleanup_biblio.rb
479
478
  - lib/metanorma/standoc/cleanup_block.rb
480
479
  - lib/metanorma/standoc/cleanup_boilerplate.rb
481
480
  - lib/metanorma/standoc/cleanup_footnotes.rb
@@ -511,6 +510,7 @@ files:
511
510
  - lib/metanorma/standoc/macros_note.rb
512
511
  - lib/metanorma/standoc/macros_plantuml.rb
513
512
  - lib/metanorma/standoc/macros_terms.rb
513
+ - lib/metanorma/standoc/merge_bibitems.rb
514
514
  - lib/metanorma/standoc/processor.rb
515
515
  - lib/metanorma/standoc/ref.rb
516
516
  - lib/metanorma/standoc/ref_sect.rb
@@ -519,6 +519,7 @@ files:
519
519
  - lib/metanorma/standoc/reqt.rb
520
520
  - lib/metanorma/standoc/reqt.rng
521
521
  - lib/metanorma/standoc/section.rb
522
+ - lib/metanorma/standoc/spans_to_bibitem.rb
522
523
  - lib/metanorma/standoc/table.rb
523
524
  - lib/metanorma/standoc/term_lookup_cleanup.rb
524
525
  - lib/metanorma/standoc/terms.rb