metanorma-standoc 2.1.4 → 2.2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/lib/metanorma/standoc/base.rb +1 -0
  3. data/lib/metanorma/standoc/blocks.rb +3 -7
  4. data/lib/metanorma/standoc/cleanup.rb +4 -2
  5. data/lib/metanorma/standoc/cleanup_biblio.rb +204 -0
  6. data/lib/metanorma/standoc/cleanup_block.rb +46 -4
  7. data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
  8. data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
  9. data/lib/metanorma/standoc/cleanup_reqt.rb +37 -4
  10. data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
  11. data/lib/metanorma/standoc/cleanup_terms.rb +6 -2
  12. data/lib/metanorma/standoc/cleanup_text.rb +10 -8
  13. data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
  14. data/lib/metanorma/standoc/converter.rb +2 -0
  15. data/lib/metanorma/standoc/front.rb +1 -1
  16. data/lib/metanorma/standoc/inline.rb +8 -4
  17. data/lib/metanorma/standoc/isodoc.rng +16 -1
  18. data/lib/metanorma/standoc/macros.rb +1 -180
  19. data/lib/metanorma/standoc/macros_inline.rb +194 -0
  20. data/lib/metanorma/standoc/ref_sect.rb +2 -2
  21. data/lib/metanorma/standoc/ref_utility.rb +5 -6
  22. data/lib/metanorma/standoc/reqt.rb +5 -5
  23. data/lib/metanorma/standoc/reqt.rng +1 -1
  24. data/lib/metanorma/standoc/section.rb +2 -0
  25. data/lib/metanorma/standoc/term_lookup_cleanup.rb +1 -1
  26. data/lib/metanorma/standoc/utils.rb +1 -1
  27. data/lib/metanorma/standoc/validate.rb +1 -69
  28. data/lib/metanorma/standoc/validate_table.rb +91 -0
  29. data/lib/metanorma/standoc/version.rb +1 -1
  30. data/metanorma-standoc.gemspec +2 -3
  31. data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +84 -1
  32. data/spec/metanorma/blocks_spec.rb +68 -8
  33. data/spec/metanorma/cleanup_blocks_spec.rb +107 -27
  34. data/spec/metanorma/inline_spec.rb +4 -0
  35. data/spec/metanorma/isobib_cache_spec.rb +6 -4
  36. data/spec/metanorma/macros_spec.rb +6 -2
  37. data/spec/metanorma/refs_spec.rb +261 -232
  38. data/spec/metanorma/validate_spec.rb +106 -7
  39. data/spec/vcr_cassettes/bsi16341.yml +63 -51
  40. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +62 -62
  41. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  42. data/spec/vcr_cassettes/hide_refs.yml +58 -58
  43. data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
  44. data/spec/vcr_cassettes/isobib_get_123_1.yml +22 -22
  45. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +33 -33
  46. data/spec/vcr_cassettes/isobib_get_123_2.yml +295 -0
  47. data/spec/vcr_cassettes/isobib_get_123_2001.yml +11 -11
  48. data/spec/vcr_cassettes/isobib_get_124.yml +12 -12
  49. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +24 -30
  50. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
  51. data/spec/vcr_cassettes/std-link.yml +14 -72
  52. metadata +9 -6
  53. data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d55c699cf84e05288a133b4e0b9305cfb14809780ec379b8d617a928338f1fca
4
- data.tar.gz: 772eaa9e0e09e9ad7c40b70ade1825d437ba079673767b31c3c5b493cf29af4f
3
+ metadata.gz: e6a6d6a98133ba0cf81631287aff2157906da7d0978899abe5c00c78f0da6b2d
4
+ data.tar.gz: 81c33e16f952e56dcff81bd0485ead5ba998839b307e30eb8de500bf2448a600
5
5
  SHA512:
6
- metadata.gz: fba5328a1ebbc7d236174893d3e98ece5768c3d8935c326506692023d1254c1a2bda95f842956b4830d18c949ba817d4ca4f00548c569999ae38381c1406f02f
7
- data.tar.gz: d3aed7321347c856c5f8ff600118773094ffc948a5f7b3be6655dff48749c4780a5a4048baf1a04d4aad94e22b650648c91c7ca9bc159680beb48ec4275a8904
6
+ metadata.gz: b36c1f1766233c69f9b5a63298d8f7cd8745cac7f7186f210500bc23a27983b3202a724ebd799f6d305b928377ed5d2ef28dcb43aac4e9c91f65b9300a98ac19
7
+ data.tar.gz: e4b1be10928b81269486083791e7dd23fab26b2697d1b1a82fa607dfa4c6f46a00bdc4373403605c2807a993ab2f4015f34f643607610d10344cfbd95a6a83b3
@@ -50,6 +50,7 @@ module Metanorma
50
50
  @index_terms = node.attr("index-terms")
51
51
  @boilerplateauthority = node.attr("boilerplate-authority")
52
52
  @embed_hdr = node.attr("embed_hdr")
53
+ @document_scheme = node.attr("document-scheme")
53
54
  end
54
55
 
55
56
  def init_processing(node)
@@ -15,8 +15,7 @@ module Metanorma
15
15
  attr_code(id_attr(node).merge(
16
16
  unnumbered: node.option?("unnumbered") ? "true" : nil,
17
17
  number: node.attr("number"),
18
- subsequence: node.attr("subsequence"),
19
- ))
18
+ subsequence: node.attr("subsequence")))
20
19
  end
21
20
 
22
21
  def formula_attrs(node)
@@ -105,9 +104,8 @@ module Metanorma
105
104
 
106
105
  def svgmap_attrs(node)
107
106
  attr_code(id_attr(node)
108
- .merge(id: node.id,
107
+ .merge(id: node.id, number: node.attr("number"),
109
108
  unnumbered: node.option?("unnumbered") ? "true" : nil,
110
- number: node.attr("number"),
111
109
  subsequence: node.attr("subsequence"))
112
110
  .merge(keep_attrs(node)))
113
111
  end
@@ -225,7 +223,6 @@ module Metanorma
225
223
  filename: node.attr("filename"))))
226
224
  end
227
225
 
228
- # NOTE: html escaping is performed by Nokogiri
229
226
  def listing(node)
230
227
  fragment = ::Nokogiri::XML::Builder.new do |xml|
231
228
  xml.sourcecode **listing_attrs(node) do |s|
@@ -238,11 +235,10 @@ module Metanorma
238
235
  end
239
236
 
240
237
  def pass(node)
241
- c = HTMLEntities.new
242
238
  noko do |xml|
243
239
  xml.passthrough **attr_code(formats:
244
240
  node.attr("format") || "metanorma") do |p|
245
- p << c.encode(c.decode(node.content), :basic, :hexadecimal)
241
+ p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
246
242
  end
247
243
  end
248
244
  end
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
5
5
  require_relative "./cleanup_table"
6
6
  require_relative "./cleanup_footnotes"
7
7
  require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
8
+ require_relative "./cleanup_biblio"
9
9
  require_relative "./cleanup_boilerplate"
10
10
  require_relative "./cleanup_section"
11
11
  require_relative "./cleanup_terms"
@@ -28,6 +28,8 @@ module Metanorma
28
28
  passthrough_cleanup(xmldoc)
29
29
  sections_cleanup(xmldoc)
30
30
  obligations_cleanup(xmldoc)
31
+ para_index_cleanup(xmldoc)
32
+ block_index_cleanup(xmldoc)
31
33
  table_cleanup(xmldoc)
32
34
  formula_cleanup(xmldoc)
33
35
  form_cleanup(xmldoc)
@@ -51,7 +53,7 @@ module Metanorma
51
53
  termdef_cleanup(xmldoc)
52
54
  RelatonIev::iev_cleanup(xmldoc, @bibdb)
53
55
  element_name_cleanup(xmldoc)
54
- index_cleanup(xmldoc)
56
+ term_index_cleanup(xmldoc)
55
57
  bpart_cleanup(xmldoc)
56
58
  quotesource_cleanup(xmldoc)
57
59
  callout_cleanup(xmldoc)
@@ -0,0 +1,204 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def ref_dl_cleanup(xmldoc)
8
+ xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
9
+ bib = dl_bib_extract(c) or next
10
+ validate_ref_dl(bib, c)
11
+ bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
12
+ bibitem = Nokogiri::XML(bibitemxml)
13
+ bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
14
+ c.replace(bibitem.root)
15
+ end
16
+ end
17
+
18
+ # do not accept implicit id
19
+ def validate_ref_dl(bib, clause)
20
+ id = bib["id"]
21
+ id ||= clause["id"] unless /^_/.match?(clause["id"])
22
+ unless id
23
+ @log.add("Anchors", clause,
24
+ "The following reference is missing an anchor:\n"\
25
+ "#{clause.to_xml}")
26
+ return
27
+ end
28
+ @refids << id
29
+ validate_ref_dl1(bib, id, clause)
30
+ end
31
+
32
+ def validate_ref_dl1(bib, id, clause)
33
+ bib["title"] or
34
+ @log.add("Bibliography", clause, "Reference #{id} is missing a title")
35
+ bib["docid"] or
36
+ @log.add("Bibliography", clause,
37
+ "Reference #{id} is missing a document identifier (docid)")
38
+ end
39
+
40
+ def extract_from_p(tag, bib, key)
41
+ return unless bib[tag]
42
+
43
+ "<#{key}>#{bib[tag].at('p').children}</#{key}>"
44
+ end
45
+
46
+ # if the content is a single paragraph, replace it with its children
47
+ # single links replaced with uri
48
+ def p_unwrap(para)
49
+ elems = para.elements
50
+ if elems.size == 1 && elems[0].name == "p"
51
+ link_unwrap(elems[0]).children.to_xml.strip
52
+ else
53
+ para.to_xml.strip
54
+ end
55
+ end
56
+
57
+ def link_unwrap(para)
58
+ elems = para.elements
59
+ if elems.size == 1 && elems[0].name == "link"
60
+ para.at("./link").replace(elems[0]["target"].strip)
61
+ end
62
+ para
63
+ end
64
+
65
+ def dd_bib_extract(dtd)
66
+ return nil if dtd.children.empty?
67
+
68
+ dtd.at("./dl") and return dl_bib_extract(dtd)
69
+ elems = dtd.remove.elements
70
+ return p_unwrap(dtd) unless elems.size == 1 &&
71
+ %w(ol ul).include?(elems[0].name)
72
+
73
+ elems[0].xpath("./li").each_with_object([]) do |li, ret|
74
+ ret << p_unwrap(li)
75
+ end
76
+ end
77
+
78
+ def add_to_hash(bib, key, val)
79
+ Metanorma::Utils::set_nested_value(bib, key.split("."), val)
80
+ end
81
+
82
+ # definition list, with at most one level of unordered lists
83
+ def dl_bib_extract(clause, nested = false)
84
+ dl = clause.at("./dl") or return
85
+ key = ""
86
+ bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
87
+ (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
88
+ add_to_hash(m, key, dd_bib_extract(dtd))
89
+ end
90
+ clause.xpath("./clause").each do |c1|
91
+ key = c1&.at("./title")&.text&.downcase&.strip
92
+ next unless %w(contributor relation series).include? key
93
+
94
+ add_to_hash(bib, key, dl_bib_extract(c1, true))
95
+ end
96
+ dl_bib_extract_title(bib, clause, nested)
97
+ end
98
+
99
+ def dl_bib_extract_title(bib, clause, nested)
100
+ (!nested && clause.at("./title")) or return bib
101
+ title = clause.at("./title").remove.children.to_xml
102
+ bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
103
+ bib["title"].is_a?(String)
104
+ bib["title"] ||= []
105
+ bib["title"] << title if !title.empty?
106
+ bib
107
+ end
108
+
109
+ # ---
110
+
111
+ def formattedref_spans(xmldoc)
112
+ xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
113
+ spans_to_bibitem(b, spans_preprocess(extract_content(b)))
114
+ end
115
+ end
116
+
117
+ def extract_content(bib)
118
+ extract_docid(bib) + extract_spans(bib)
119
+ end
120
+
121
+ def extract_spans(bib)
122
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
123
+ keys = s["class"].split(".", 2)
124
+ m << { key: keys[0], type: keys[1],
125
+ val: s.children.to_xml }
126
+ (s["class"] == "type" and s.remove) or s.replace(s.children)
127
+ end
128
+ end
129
+
130
+ def extract_docid(bib)
131
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
132
+ m << { key: "docid", type: d["type"], val: d.text }
133
+ d.remove
134
+ end
135
+ end
136
+
137
+ def spans_preprocess(spans)
138
+ ret = { contributor: [], docid: [], uri: [], date: [] }
139
+ spans.each do |s|
140
+ case s[:key]
141
+ when "uri", "docid"
142
+ ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
143
+ when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
144
+ when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
145
+ when "publisher"
146
+ ret[:contributor] << { role: "publisher", entity: "organization",
147
+ name: s[:val] }
148
+ when "surname", "initials", "givenname"
149
+ ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
150
+ end
151
+ end
152
+ ret
153
+ end
154
+
155
+ def spans_preprocess_contrib(span, contrib)
156
+ spans_preprocess_new_contrib?(span, contrib) and
157
+ contrib << { role: span[:type] || "author", entity: "person" }
158
+ contrib[-1][span[:key].to_sym] = span[:val]
159
+ contrib
160
+ end
161
+
162
+ def spans_preprocess_new_contrib?(span, contrib)
163
+ contrib.empty? ||
164
+ (if span[:key] == "surname" then contrib[-1][:surname]
165
+ else (contrib[-1][:initials] || contrib[-1][:givenname])
166
+ end) ||
167
+ contrib[-1][:role] != (span[:type] || "author")
168
+ end
169
+
170
+ def spans_to_bibitem(bib, spans)
171
+ ret = ""
172
+ spans[:title] and ret += "<title>#{spans[:title]}</title>"
173
+ spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
174
+ spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
175
+ spans[:date].each { |s| ret += span_to_docid(s, "date") }
176
+ spans[:contributor].each { |s| ret += span_to_contrib(s) }
177
+ spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
178
+ spans[:type] and bib["type"] = spans[:type]
179
+ bib << ret
180
+ end
181
+
182
+ def span_to_docid(span, key)
183
+ if span[:type]
184
+ "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
185
+ else
186
+ "<#{key}>#{span[:val]}</#{key}>"
187
+ end
188
+ end
189
+
190
+ def span_to_contrib(span)
191
+ e = if span[:entity] == "organization"
192
+ "<organization><name>#{span[:name]}</name></organization>"
193
+ else
194
+ pre = (span[:initials] and
195
+ "<initial>#{span[:initials]}</initial>") ||
196
+ "<forename>#{span[:givenname]}</forename>"
197
+ "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
198
+ "</person>"
199
+ end
200
+ "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
201
+ end
202
+ end
203
+ end
204
+ end
@@ -45,7 +45,8 @@ module Metanorma
45
45
 
46
46
  def figure_dl_cleanup1(xmldoc)
47
47
  q = "//figure/following-sibling::*[self::dl]"
48
- xmldoc.xpath(q).each do |s|
48
+ q1 = "//figure/figure/following-sibling::*[self::dl]"
49
+ (xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
49
50
  s["key"] == "true" and s.previous_element << s.remove
50
51
  end
51
52
  end
@@ -65,8 +66,9 @@ module Metanorma
65
66
  # examples containing only figures become subfigures of figures
66
67
  def subfigure_cleanup(xmldoc)
67
68
  xmldoc.xpath("//example[figure]").each do |e|
68
- next unless e.elements.map(&:name).reject do |m|
69
- %w(name figure).include? m
69
+ next unless e.elements.reject do |m|
70
+ %w(name figure index note).include?(m.name) ||
71
+ (m.name == "dl" && m["key"] == "true")
70
72
  end.empty?
71
73
 
72
74
  e.name = "figure"
@@ -84,9 +86,9 @@ module Metanorma
84
86
 
85
87
  def figure_cleanup(xmldoc)
86
88
  figure_footnote_cleanup(xmldoc)
89
+ subfigure_cleanup(xmldoc)
87
90
  figure_dl_cleanup1(xmldoc)
88
91
  figure_dl_cleanup2(xmldoc)
89
- subfigure_cleanup(xmldoc)
90
92
  single_subfigure_cleanup(xmldoc)
91
93
  end
92
94
 
@@ -178,6 +180,46 @@ module Metanorma
178
180
  end
179
181
  end
180
182
  end
183
+
184
+ def block_index_cleanup(xmldoc)
185
+ xmldoc.xpath("//quote | //td | //th | //formula | //li | //dt | "\
186
+ "//dd | //example | //note | //figure | //sourcecode | "\
187
+ "//admonition | //termnote | //termexample | //form | "\
188
+ "//requirement | //recommendation | //permission | "\
189
+ "//imagemap | //svgmap").each do |b|
190
+ b.xpath("./p[indexterm]").each do |p|
191
+ indexterm_para?(p) or next
192
+ p.replace(p.children)
193
+ end
194
+ end
195
+ end
196
+
197
+ def indexterm_para?(para)
198
+ p = para.dup
199
+ p.xpath("./index").each(&:remove)
200
+ p.text.strip.empty?
201
+ end
202
+
203
+ def include_indexterm?(elem)
204
+ return false if elem.nil?
205
+
206
+ !%w(image literal sourcecode).include?(elem.name)
207
+ end
208
+
209
+ def para_index_cleanup(xmldoc)
210
+ xmldoc.xpath("//p[index]").select { |p| indexterm_para?(p) }
211
+ .each do |p|
212
+ para_index_cleanup1(p, p.previous_element, p.next_element)
213
+ end
214
+ end
215
+
216
+ def para_index_cleanup1(para, prev, foll)
217
+ if include_indexterm?(prev)
218
+ prev << para.remove.children
219
+ elsif include_indexterm?(foll) && !foll.children.empty?
220
+ foll.children.first.previous = para.remove.children
221
+ end
222
+ end
181
223
  end
182
224
  end
183
225
  end
@@ -1,24 +1,11 @@
1
- require "nokogiri"
2
- require "pathname"
3
- require "html2doc"
4
1
  require "asciimath2unitsml"
5
- require_relative "./cleanup_block"
6
- require_relative "./cleanup_footnotes"
7
- require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
9
- require_relative "./cleanup_boilerplate"
10
- require_relative "./cleanup_section"
11
- require_relative "./cleanup_terms"
12
- require_relative "./cleanup_inline"
13
- require_relative "./cleanup_amend"
14
- require "relaton_iev"
15
2
 
16
3
  module Metanorma
17
4
  module Standoc
18
5
  module Cleanup
19
6
  def asciimath2mathml(text)
20
7
  text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
- "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
8
+ "<amathstem>#{@c.decode($1)}</amathstem>"
22
9
  end
23
10
  text = Html2Doc.new({})
24
11
  .asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
@@ -60,7 +47,7 @@ module Metanorma
60
47
  def mathml_italicise(xml)
61
48
  xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
49
  "m" => MATHML_NS).each do |i|
63
- char = HTMLEntities.new.decode(i.text)
50
+ char = @c.decode(i.text)
64
51
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
52
  end
66
53
  end
@@ -153,29 +153,28 @@ module Metanorma
153
153
 
154
154
  def reference_names(xmldoc)
155
155
  xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
156
- docid = ref.at("./docidentifier[@type = 'metanorma']") ||
157
- ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
158
- ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
159
- ref.at("./docidentifier[@primary = 'true']") ||
160
- ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
161
- ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
162
- ref.at("./docidentifier[not(@type = 'DOI')]") or next
156
+ docid = select_docid(ref) or next
163
157
  reference = format_ref(docid.children.to_xml, docid["type"])
164
158
  @anchors[ref["id"]] = { xref: reference }
165
159
  end
166
160
  end
167
161
 
162
+ def select_docid(ref)
163
+ ref.at("./docidentifier[@type = 'metanorma']") ||
164
+ ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
165
+ ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
166
+ ref.at("./docidentifier[@primary = 'true']") ||
167
+ ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
168
+ ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
169
+ ref.at("./docidentifier[not(@type = 'DOI')]")
170
+ end
171
+
168
172
  def fetch_termbase(_termbase, _id)
169
173
  ""
170
174
  end
171
175
 
172
176
  def read_local_bibitem(uri)
173
- return nil if %r{^https?://}.match?(uri)
174
-
175
- file = "#{@localdir}#{uri}.rxl"
176
- File.file?(file) or file = "#{@localdir}#{uri}.xml"
177
- File.file?(file) or return nil
178
- xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
177
+ xml = read_local_bibitem_file(uri) or return nil
179
178
  ret = xml.at("//*[local-name() = 'bibdata']") or return nil
180
179
  ret = Nokogiri::XML(ret.to_xml
181
180
  .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
@@ -186,6 +185,15 @@ module Metanorma
186
185
  ret
187
186
  end
188
187
 
188
+ def read_local_bibitem_file(uri)
189
+ return nil if %r{^https?://}.match?(uri)
190
+
191
+ file = "#{@localdir}#{uri}.rxl"
192
+ File.file?(file) or file = "#{@localdir}#{uri}.xml"
193
+ File.file?(file) or return nil
194
+ Nokogiri::XML(File.read(file, encoding: "utf-8"))
195
+ end
196
+
189
197
  # if citation uri points to local file, get bibitem from it
190
198
  def fetch_local_bibitem(xmldoc)
191
199
  xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
@@ -206,6 +214,7 @@ module Metanorma
206
214
  def bibitem_cleanup(xmldoc)
207
215
  bibitem_nested_id(xmldoc)
208
216
  ref_dl_cleanup(xmldoc)
217
+ formattedref_spans(xmldoc)
209
218
  fetch_local_bibitem(xmldoc)
210
219
  end
211
220
  end
@@ -5,10 +5,19 @@ module Metanorma
5
5
  requirement_metadata(xmldoc)
6
6
  requirement_inherit(xmldoc)
7
7
  requirement_descriptions(xmldoc)
8
+ requirement_identifier(xmldoc)
8
9
  end
9
10
 
10
11
  REQRECPER = "//requirement | //recommendation | //permission".freeze
11
12
 
13
+ def requirement_identifier(xmldoc)
14
+ xmldoc.xpath(REQRECPER).each do |r|
15
+ r.xpath("./identifier[link] | ./inherit[link]").each do |i|
16
+ i.children = i.at("./link/@target").text
17
+ end
18
+ end
19
+ end
20
+
12
21
  def requirement_inherit(xmldoc)
13
22
  xmldoc.xpath(REQRECPER).each do |r|
14
23
  ins = requirement_inherit_insert(r)
@@ -76,8 +85,16 @@ module Metanorma
76
85
  end
77
86
  end
78
87
 
88
+ def requirement_metadata1_attrs
89
+ %w(obligation model type)
90
+ end
91
+
79
92
  def requirement_metadata1_tags
80
- %w(label subject inherit)
93
+ %w(identifier subject inherit)
94
+ end
95
+
96
+ def requirement_metadata_component_tags
97
+ []
81
98
  end
82
99
 
83
100
  def requirement_metadata1(reqt, dlist, ins)
@@ -85,20 +102,21 @@ module Metanorma
85
102
  reqt.children.first.previous = " "
86
103
  ins = reqt.children.first
87
104
  end
88
- %w(obligation model type).each do |a|
105
+ requirement_metadata1_attrs.each do |a|
89
106
  dl_to_attrs(reqt, dlist, a)
90
107
  end
91
108
  requirement_metadata1_tags.each do |a|
92
109
  ins = dl_to_elems(ins, reqt, dlist, a)
93
110
  end
94
- reqt_dl_to_classif(ins, reqt, dlist)
111
+ ins = reqt_dl_to_classif(ins, reqt, dlist)
112
+ reqt_dl_to_classif1(ins, reqt, dlist)
95
113
  end
96
114
 
97
115
  def reqt_dl_to_classif(ins, reqt, dlist)
98
116
  if a = reqt.at("./classification[last()]") then ins = a end
99
117
  dlist.xpath("./dt[text()='classification']").each do |e|
100
118
  val = e.at("./following::dd/p") || e.at("./following::dd")
101
- req_classif_parse(val.text).each do |r|
119
+ req_classif_parse(val.children.to_xml).each do |r|
102
120
  ins.next = "<classification><tag>#{r[0]}</tag>"\
103
121
  "<value>#{r[1]}</value></classification>"
104
122
  ins = ins.next
@@ -106,6 +124,21 @@ module Metanorma
106
124
  end
107
125
  ins
108
126
  end
127
+
128
+ def reqt_dl_to_classif1(ins, reqt, dlist)
129
+ if a = reqt.at("./classification[last()]") then ins = a end
130
+ dlist.xpath("./dt").each do |e|
131
+ next if (requirement_metadata1_attrs + requirement_metadata1_tags +
132
+ requirement_metadata_component_tags + %w(classification))
133
+ .include?(e.text)
134
+
135
+ val = e.at("./following::dd/p") || e.at("./following::dd")
136
+ ins.next = "<classification><tag>#{e.text}</tag>"\
137
+ "<value>#{val.children.to_xml}</value></classification>"
138
+ ins = ins.next
139
+ end
140
+ ins
141
+ end
109
142
  end
110
143
  end
111
144
  end
@@ -11,7 +11,7 @@ module Metanorma
11
11
  n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
12
12
  end
13
13
  ret = Nokogiri::XML(key.to_xml)
14
- HTMLEntities.new.decode(ret.text.downcase)
14
+ @c.decode(ret.text.downcase)
15
15
  .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
16
16
  .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
17
17
  .gsub(/[0-9]+/, "þ\\0")
@@ -100,9 +100,13 @@ module Metanorma
100
100
  end
101
101
  end
102
102
 
103
+ def termlookup_cleanup(xmldoc)
104
+ Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
105
+ end
106
+
103
107
  def termdef_cleanup(xmldoc)
104
108
  termdef_unnest_cleanup(xmldoc)
105
- Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
109
+ termlookup_cleanup(xmldoc)
106
110
  term_nonverbal_designations(xmldoc)
107
111
  term_dl_to_metadata(xmldoc)
108
112
  term_termsource_to_designation(xmldoc)
@@ -117,7 +121,7 @@ module Metanorma
117
121
  termdocsource_cleanup(xmldoc)
118
122
  end
119
123
 
120
- def index_cleanup(xmldoc)
124
+ def term_index_cleanup(xmldoc)
121
125
  return unless @index_terms
122
126
 
123
127
  xmldoc.xpath("//preferred").each do |p|
@@ -7,7 +7,7 @@ module Metanorma
7
7
  text = text.gsub(/\s+<fn /, "<fn ")
8
8
  %w(passthrough passthrough-inline).each do |v|
9
9
  text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
10
- </#{v}>}mx) { HTMLEntities.new.decode($1) }
10
+ </#{v}>}mx) { @c.decode($1) }
11
11
  end
12
12
  text
13
13
  end
@@ -34,11 +34,13 @@ module Metanorma
34
34
  end
35
35
  end
36
36
 
37
+ IGNORE_QUOTES_ELEMENTS =
38
+ %w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
39
+
37
40
  def uninterrupt_quotes_around_xml_skip(elem)
38
41
  !(/\A['"]/.match?(elem.text) &&
39
- elem.previous.ancestors("pre, tt, sourcecode, stem, figure, bibdata,
40
- passthrough, identifer")
41
- .empty? &&
42
+ elem.previous.path.split(%r{/})[1..-2]
43
+ .intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
42
44
  ((elem.previous.text.strip.empty? &&
43
45
  !empty_tag_with_text_content?(elem.previous)) ||
44
46
  elem.previous.name == "index"))
@@ -49,7 +51,7 @@ module Metanorma
49
51
  /\S\Z/.match?(prev.text) or return
50
52
  foll = elem.at(".//following::text()[1]")
51
53
  m = /\A(["'][[:punct:]]*)(\s|\Z)/
52
- .match(HTMLEntities.new.decode(foll&.text)) or return
54
+ .match(@c.decode(foll&.text)) or return
53
55
  foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
54
56
  prev.content = "#{prev.text}#{m[1]}"
55
57
  end
@@ -74,10 +76,10 @@ module Metanorma
74
76
  empty_tag_with_text_content?(x) and prev = "dummy"
75
77
  next unless x.text?
76
78
 
77
- x.ancestors("pre, tt, sourcecode, stem, figure, bibdata, passthrough,
78
- identifier").empty? and
79
+ ancestors = x.path.split(%r{/})[1..-2]
80
+ ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
79
81
  dumb2smart_quotes1(x, prev)
80
- prev = x.text if x.ancestors("index").empty?
82
+ prev = x.text unless ancestors.include?("index")
81
83
  end
82
84
  end
83
85
 
@@ -68,10 +68,9 @@ module Metanorma
68
68
  end
69
69
 
70
70
  def xref_to_eref(elem)
71
- c = HTMLEntities.new
72
71
  elem["bibitemid"] = elem["target"]
73
72
  if ref = @anchors&.dig(elem["target"], :xref)
74
- elem["citeas"] = c.decode(ref)
73
+ elem["citeas"] = @c.decode(ref)
75
74
  else
76
75
  elem["citeas"] = ""
77
76
  xref_to_eref1(elem)
@@ -29,6 +29,7 @@ module Metanorma
29
29
  preprocessor Metanorma::Plugin::Lutaml::LutamlUmlAttributesTablePreprocessor
30
30
  preprocessor Metanorma::Plugin::Lutaml::LutamlUmlDatamodelDescriptionPreprocessor
31
31
  inline_macro Metanorma::Standoc::PreferredTermInlineMacro
32
+ inline_macro Metanorma::Standoc::SpanInlineMacro
32
33
  inline_macro Metanorma::Standoc::AltTermInlineMacro
33
34
  inline_macro Metanorma::Standoc::AdmittedTermInlineMacro
34
35
  inline_macro Metanorma::Standoc::DeprecatedTermInlineMacro
@@ -93,6 +94,7 @@ module Metanorma
93
94
  basebackend "html"
94
95
  outfilesuffix ".xml"
95
96
  @libdir = File.dirname(self.class::_file || __FILE__)
97
+ @c = HTMLEntities.new
96
98
  end
97
99
 
98
100
  class << self