metanorma-standoc 2.1.4 → 2.2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/lib/metanorma/standoc/base.rb +1 -0
  3. data/lib/metanorma/standoc/blocks.rb +3 -7
  4. data/lib/metanorma/standoc/cleanup.rb +4 -2
  5. data/lib/metanorma/standoc/cleanup_biblio.rb +204 -0
  6. data/lib/metanorma/standoc/cleanup_block.rb +46 -4
  7. data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
  8. data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
  9. data/lib/metanorma/standoc/cleanup_reqt.rb +37 -4
  10. data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
  11. data/lib/metanorma/standoc/cleanup_terms.rb +6 -2
  12. data/lib/metanorma/standoc/cleanup_text.rb +10 -8
  13. data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
  14. data/lib/metanorma/standoc/converter.rb +2 -0
  15. data/lib/metanorma/standoc/front.rb +1 -1
  16. data/lib/metanorma/standoc/inline.rb +8 -4
  17. data/lib/metanorma/standoc/isodoc.rng +16 -1
  18. data/lib/metanorma/standoc/macros.rb +1 -180
  19. data/lib/metanorma/standoc/macros_inline.rb +194 -0
  20. data/lib/metanorma/standoc/ref_sect.rb +2 -2
  21. data/lib/metanorma/standoc/ref_utility.rb +5 -6
  22. data/lib/metanorma/standoc/reqt.rb +5 -5
  23. data/lib/metanorma/standoc/reqt.rng +1 -1
  24. data/lib/metanorma/standoc/section.rb +2 -0
  25. data/lib/metanorma/standoc/term_lookup_cleanup.rb +1 -1
  26. data/lib/metanorma/standoc/utils.rb +1 -1
  27. data/lib/metanorma/standoc/validate.rb +1 -69
  28. data/lib/metanorma/standoc/validate_table.rb +91 -0
  29. data/lib/metanorma/standoc/version.rb +1 -1
  30. data/metanorma-standoc.gemspec +2 -3
  31. data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +84 -1
  32. data/spec/metanorma/blocks_spec.rb +68 -8
  33. data/spec/metanorma/cleanup_blocks_spec.rb +107 -27
  34. data/spec/metanorma/inline_spec.rb +4 -0
  35. data/spec/metanorma/isobib_cache_spec.rb +6 -4
  36. data/spec/metanorma/macros_spec.rb +6 -2
  37. data/spec/metanorma/refs_spec.rb +261 -232
  38. data/spec/metanorma/validate_spec.rb +106 -7
  39. data/spec/vcr_cassettes/bsi16341.yml +63 -51
  40. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +62 -62
  41. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  42. data/spec/vcr_cassettes/hide_refs.yml +58 -58
  43. data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
  44. data/spec/vcr_cassettes/isobib_get_123_1.yml +22 -22
  45. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +33 -33
  46. data/spec/vcr_cassettes/isobib_get_123_2.yml +295 -0
  47. data/spec/vcr_cassettes/isobib_get_123_2001.yml +11 -11
  48. data/spec/vcr_cassettes/isobib_get_124.yml +12 -12
  49. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +24 -30
  50. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
  51. data/spec/vcr_cassettes/std-link.yml +14 -72
  52. metadata +9 -6
  53. data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d55c699cf84e05288a133b4e0b9305cfb14809780ec379b8d617a928338f1fca
4
- data.tar.gz: 772eaa9e0e09e9ad7c40b70ade1825d437ba079673767b31c3c5b493cf29af4f
3
+ metadata.gz: e6a6d6a98133ba0cf81631287aff2157906da7d0978899abe5c00c78f0da6b2d
4
+ data.tar.gz: 81c33e16f952e56dcff81bd0485ead5ba998839b307e30eb8de500bf2448a600
5
5
  SHA512:
6
- metadata.gz: fba5328a1ebbc7d236174893d3e98ece5768c3d8935c326506692023d1254c1a2bda95f842956b4830d18c949ba817d4ca4f00548c569999ae38381c1406f02f
7
- data.tar.gz: d3aed7321347c856c5f8ff600118773094ffc948a5f7b3be6655dff48749c4780a5a4048baf1a04d4aad94e22b650648c91c7ca9bc159680beb48ec4275a8904
6
+ metadata.gz: b36c1f1766233c69f9b5a63298d8f7cd8745cac7f7186f210500bc23a27983b3202a724ebd799f6d305b928377ed5d2ef28dcb43aac4e9c91f65b9300a98ac19
7
+ data.tar.gz: e4b1be10928b81269486083791e7dd23fab26b2697d1b1a82fa607dfa4c6f46a00bdc4373403605c2807a993ab2f4015f34f643607610d10344cfbd95a6a83b3
@@ -50,6 +50,7 @@ module Metanorma
50
50
  @index_terms = node.attr("index-terms")
51
51
  @boilerplateauthority = node.attr("boilerplate-authority")
52
52
  @embed_hdr = node.attr("embed_hdr")
53
+ @document_scheme = node.attr("document-scheme")
53
54
  end
54
55
 
55
56
  def init_processing(node)
@@ -15,8 +15,7 @@ module Metanorma
15
15
  attr_code(id_attr(node).merge(
16
16
  unnumbered: node.option?("unnumbered") ? "true" : nil,
17
17
  number: node.attr("number"),
18
- subsequence: node.attr("subsequence"),
19
- ))
18
+ subsequence: node.attr("subsequence")))
20
19
  end
21
20
 
22
21
  def formula_attrs(node)
@@ -105,9 +104,8 @@ module Metanorma
105
104
 
106
105
  def svgmap_attrs(node)
107
106
  attr_code(id_attr(node)
108
- .merge(id: node.id,
107
+ .merge(id: node.id, number: node.attr("number"),
109
108
  unnumbered: node.option?("unnumbered") ? "true" : nil,
110
- number: node.attr("number"),
111
109
  subsequence: node.attr("subsequence"))
112
110
  .merge(keep_attrs(node)))
113
111
  end
@@ -225,7 +223,6 @@ module Metanorma
225
223
  filename: node.attr("filename"))))
226
224
  end
227
225
 
228
- # NOTE: html escaping is performed by Nokogiri
229
226
  def listing(node)
230
227
  fragment = ::Nokogiri::XML::Builder.new do |xml|
231
228
  xml.sourcecode **listing_attrs(node) do |s|
@@ -238,11 +235,10 @@ module Metanorma
238
235
  end
239
236
 
240
237
  def pass(node)
241
- c = HTMLEntities.new
242
238
  noko do |xml|
243
239
  xml.passthrough **attr_code(formats:
244
240
  node.attr("format") || "metanorma") do |p|
245
- p << c.encode(c.decode(node.content), :basic, :hexadecimal)
241
+ p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
246
242
  end
247
243
  end
248
244
  end
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
5
5
  require_relative "./cleanup_table"
6
6
  require_relative "./cleanup_footnotes"
7
7
  require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
8
+ require_relative "./cleanup_biblio"
9
9
  require_relative "./cleanup_boilerplate"
10
10
  require_relative "./cleanup_section"
11
11
  require_relative "./cleanup_terms"
@@ -28,6 +28,8 @@ module Metanorma
28
28
  passthrough_cleanup(xmldoc)
29
29
  sections_cleanup(xmldoc)
30
30
  obligations_cleanup(xmldoc)
31
+ para_index_cleanup(xmldoc)
32
+ block_index_cleanup(xmldoc)
31
33
  table_cleanup(xmldoc)
32
34
  formula_cleanup(xmldoc)
33
35
  form_cleanup(xmldoc)
@@ -51,7 +53,7 @@ module Metanorma
51
53
  termdef_cleanup(xmldoc)
52
54
  RelatonIev::iev_cleanup(xmldoc, @bibdb)
53
55
  element_name_cleanup(xmldoc)
54
- index_cleanup(xmldoc)
56
+ term_index_cleanup(xmldoc)
55
57
  bpart_cleanup(xmldoc)
56
58
  quotesource_cleanup(xmldoc)
57
59
  callout_cleanup(xmldoc)
@@ -0,0 +1,204 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def ref_dl_cleanup(xmldoc)
8
+ xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
9
+ bib = dl_bib_extract(c) or next
10
+ validate_ref_dl(bib, c)
11
+ bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
12
+ bibitem = Nokogiri::XML(bibitemxml)
13
+ bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
14
+ c.replace(bibitem.root)
15
+ end
16
+ end
17
+
18
+ # do not accept implicit id
19
+ def validate_ref_dl(bib, clause)
20
+ id = bib["id"]
21
+ id ||= clause["id"] unless /^_/.match?(clause["id"])
22
+ unless id
23
+ @log.add("Anchors", clause,
24
+ "The following reference is missing an anchor:\n"\
25
+ "#{clause.to_xml}")
26
+ return
27
+ end
28
+ @refids << id
29
+ validate_ref_dl1(bib, id, clause)
30
+ end
31
+
32
+ def validate_ref_dl1(bib, id, clause)
33
+ bib["title"] or
34
+ @log.add("Bibliography", clause, "Reference #{id} is missing a title")
35
+ bib["docid"] or
36
+ @log.add("Bibliography", clause,
37
+ "Reference #{id} is missing a document identifier (docid)")
38
+ end
39
+
40
+ def extract_from_p(tag, bib, key)
41
+ return unless bib[tag]
42
+
43
+ "<#{key}>#{bib[tag].at('p').children}</#{key}>"
44
+ end
45
+
46
+ # if the content is a single paragraph, replace it with its children
47
+ # single links replaced with uri
48
+ def p_unwrap(para)
49
+ elems = para.elements
50
+ if elems.size == 1 && elems[0].name == "p"
51
+ link_unwrap(elems[0]).children.to_xml.strip
52
+ else
53
+ para.to_xml.strip
54
+ end
55
+ end
56
+
57
+ def link_unwrap(para)
58
+ elems = para.elements
59
+ if elems.size == 1 && elems[0].name == "link"
60
+ para.at("./link").replace(elems[0]["target"].strip)
61
+ end
62
+ para
63
+ end
64
+
65
+ def dd_bib_extract(dtd)
66
+ return nil if dtd.children.empty?
67
+
68
+ dtd.at("./dl") and return dl_bib_extract(dtd)
69
+ elems = dtd.remove.elements
70
+ return p_unwrap(dtd) unless elems.size == 1 &&
71
+ %w(ol ul).include?(elems[0].name)
72
+
73
+ elems[0].xpath("./li").each_with_object([]) do |li, ret|
74
+ ret << p_unwrap(li)
75
+ end
76
+ end
77
+
78
+ def add_to_hash(bib, key, val)
79
+ Metanorma::Utils::set_nested_value(bib, key.split("."), val)
80
+ end
81
+
82
+ # definition list, with at most one level of unordered lists
83
+ def dl_bib_extract(clause, nested = false)
84
+ dl = clause.at("./dl") or return
85
+ key = ""
86
+ bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
87
+ (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
88
+ add_to_hash(m, key, dd_bib_extract(dtd))
89
+ end
90
+ clause.xpath("./clause").each do |c1|
91
+ key = c1&.at("./title")&.text&.downcase&.strip
92
+ next unless %w(contributor relation series).include? key
93
+
94
+ add_to_hash(bib, key, dl_bib_extract(c1, true))
95
+ end
96
+ dl_bib_extract_title(bib, clause, nested)
97
+ end
98
+
99
+ def dl_bib_extract_title(bib, clause, nested)
100
+ (!nested && clause.at("./title")) or return bib
101
+ title = clause.at("./title").remove.children.to_xml
102
+ bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
103
+ bib["title"].is_a?(String)
104
+ bib["title"] ||= []
105
+ bib["title"] << title if !title.empty?
106
+ bib
107
+ end
108
+
109
+ # ---
110
+
111
+ def formattedref_spans(xmldoc)
112
+ xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
113
+ spans_to_bibitem(b, spans_preprocess(extract_content(b)))
114
+ end
115
+ end
116
+
117
+ def extract_content(bib)
118
+ extract_docid(bib) + extract_spans(bib)
119
+ end
120
+
121
+ def extract_spans(bib)
122
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
123
+ keys = s["class"].split(".", 2)
124
+ m << { key: keys[0], type: keys[1],
125
+ val: s.children.to_xml }
126
+ (s["class"] == "type" and s.remove) or s.replace(s.children)
127
+ end
128
+ end
129
+
130
+ def extract_docid(bib)
131
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
132
+ m << { key: "docid", type: d["type"], val: d.text }
133
+ d.remove
134
+ end
135
+ end
136
+
137
+ def spans_preprocess(spans)
138
+ ret = { contributor: [], docid: [], uri: [], date: [] }
139
+ spans.each do |s|
140
+ case s[:key]
141
+ when "uri", "docid"
142
+ ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
143
+ when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
144
+ when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
145
+ when "publisher"
146
+ ret[:contributor] << { role: "publisher", entity: "organization",
147
+ name: s[:val] }
148
+ when "surname", "initials", "givenname"
149
+ ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
150
+ end
151
+ end
152
+ ret
153
+ end
154
+
155
+ def spans_preprocess_contrib(span, contrib)
156
+ spans_preprocess_new_contrib?(span, contrib) and
157
+ contrib << { role: span[:type] || "author", entity: "person" }
158
+ contrib[-1][span[:key].to_sym] = span[:val]
159
+ contrib
160
+ end
161
+
162
+ def spans_preprocess_new_contrib?(span, contrib)
163
+ contrib.empty? ||
164
+ (if span[:key] == "surname" then contrib[-1][:surname]
165
+ else (contrib[-1][:initials] || contrib[-1][:givenname])
166
+ end) ||
167
+ contrib[-1][:role] != (span[:type] || "author")
168
+ end
169
+
170
+ def spans_to_bibitem(bib, spans)
171
+ ret = ""
172
+ spans[:title] and ret += "<title>#{spans[:title]}</title>"
173
+ spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
174
+ spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
175
+ spans[:date].each { |s| ret += span_to_docid(s, "date") }
176
+ spans[:contributor].each { |s| ret += span_to_contrib(s) }
177
+ spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
178
+ spans[:type] and bib["type"] = spans[:type]
179
+ bib << ret
180
+ end
181
+
182
+ def span_to_docid(span, key)
183
+ if span[:type]
184
+ "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
185
+ else
186
+ "<#{key}>#{span[:val]}</#{key}>"
187
+ end
188
+ end
189
+
190
+ def span_to_contrib(span)
191
+ e = if span[:entity] == "organization"
192
+ "<organization><name>#{span[:name]}</name></organization>"
193
+ else
194
+ pre = (span[:initials] and
195
+ "<initial>#{span[:initials]}</initial>") ||
196
+ "<forename>#{span[:givenname]}</forename>"
197
+ "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
198
+ "</person>"
199
+ end
200
+ "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
201
+ end
202
+ end
203
+ end
204
+ end
@@ -45,7 +45,8 @@ module Metanorma
45
45
 
46
46
  def figure_dl_cleanup1(xmldoc)
47
47
  q = "//figure/following-sibling::*[self::dl]"
48
- xmldoc.xpath(q).each do |s|
48
+ q1 = "//figure/figure/following-sibling::*[self::dl]"
49
+ (xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
49
50
  s["key"] == "true" and s.previous_element << s.remove
50
51
  end
51
52
  end
@@ -65,8 +66,9 @@ module Metanorma
65
66
  # examples containing only figures become subfigures of figures
66
67
  def subfigure_cleanup(xmldoc)
67
68
  xmldoc.xpath("//example[figure]").each do |e|
68
- next unless e.elements.map(&:name).reject do |m|
69
- %w(name figure).include? m
69
+ next unless e.elements.reject do |m|
70
+ %w(name figure index note).include?(m.name) ||
71
+ (m.name == "dl" && m["key"] == "true")
70
72
  end.empty?
71
73
 
72
74
  e.name = "figure"
@@ -84,9 +86,9 @@ module Metanorma
84
86
 
85
87
  def figure_cleanup(xmldoc)
86
88
  figure_footnote_cleanup(xmldoc)
89
+ subfigure_cleanup(xmldoc)
87
90
  figure_dl_cleanup1(xmldoc)
88
91
  figure_dl_cleanup2(xmldoc)
89
- subfigure_cleanup(xmldoc)
90
92
  single_subfigure_cleanup(xmldoc)
91
93
  end
92
94
 
@@ -178,6 +180,46 @@ module Metanorma
178
180
  end
179
181
  end
180
182
  end
183
+
184
+ def block_index_cleanup(xmldoc)
185
+ xmldoc.xpath("//quote | //td | //th | //formula | //li | //dt | "\
186
+ "//dd | //example | //note | //figure | //sourcecode | "\
187
+ "//admonition | //termnote | //termexample | //form | "\
188
+ "//requirement | //recommendation | //permission | "\
189
+ "//imagemap | //svgmap").each do |b|
190
+ b.xpath("./p[indexterm]").each do |p|
191
+ indexterm_para?(p) or next
192
+ p.replace(p.children)
193
+ end
194
+ end
195
+ end
196
+
197
+ def indexterm_para?(para)
198
+ p = para.dup
199
+ p.xpath("./index").each(&:remove)
200
+ p.text.strip.empty?
201
+ end
202
+
203
+ def include_indexterm?(elem)
204
+ return false if elem.nil?
205
+
206
+ !%w(image literal sourcecode).include?(elem.name)
207
+ end
208
+
209
+ def para_index_cleanup(xmldoc)
210
+ xmldoc.xpath("//p[index]").select { |p| indexterm_para?(p) }
211
+ .each do |p|
212
+ para_index_cleanup1(p, p.previous_element, p.next_element)
213
+ end
214
+ end
215
+
216
+ def para_index_cleanup1(para, prev, foll)
217
+ if include_indexterm?(prev)
218
+ prev << para.remove.children
219
+ elsif include_indexterm?(foll) && !foll.children.empty?
220
+ foll.children.first.previous = para.remove.children
221
+ end
222
+ end
181
223
  end
182
224
  end
183
225
  end
@@ -1,24 +1,11 @@
1
- require "nokogiri"
2
- require "pathname"
3
- require "html2doc"
4
1
  require "asciimath2unitsml"
5
- require_relative "./cleanup_block"
6
- require_relative "./cleanup_footnotes"
7
- require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
9
- require_relative "./cleanup_boilerplate"
10
- require_relative "./cleanup_section"
11
- require_relative "./cleanup_terms"
12
- require_relative "./cleanup_inline"
13
- require_relative "./cleanup_amend"
14
- require "relaton_iev"
15
2
 
16
3
  module Metanorma
17
4
  module Standoc
18
5
  module Cleanup
19
6
  def asciimath2mathml(text)
20
7
  text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
- "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
8
+ "<amathstem>#{@c.decode($1)}</amathstem>"
22
9
  end
23
10
  text = Html2Doc.new({})
24
11
  .asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
@@ -60,7 +47,7 @@ module Metanorma
60
47
  def mathml_italicise(xml)
61
48
  xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
49
  "m" => MATHML_NS).each do |i|
63
- char = HTMLEntities.new.decode(i.text)
50
+ char = @c.decode(i.text)
64
51
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
52
  end
66
53
  end
@@ -153,29 +153,28 @@ module Metanorma
153
153
 
154
154
  def reference_names(xmldoc)
155
155
  xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
156
- docid = ref.at("./docidentifier[@type = 'metanorma']") ||
157
- ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
158
- ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
159
- ref.at("./docidentifier[@primary = 'true']") ||
160
- ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
161
- ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
162
- ref.at("./docidentifier[not(@type = 'DOI')]") or next
156
+ docid = select_docid(ref) or next
163
157
  reference = format_ref(docid.children.to_xml, docid["type"])
164
158
  @anchors[ref["id"]] = { xref: reference }
165
159
  end
166
160
  end
167
161
 
162
+ def select_docid(ref)
163
+ ref.at("./docidentifier[@type = 'metanorma']") ||
164
+ ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
165
+ ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
166
+ ref.at("./docidentifier[@primary = 'true']") ||
167
+ ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
168
+ ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
169
+ ref.at("./docidentifier[not(@type = 'DOI')]")
170
+ end
171
+
168
172
  def fetch_termbase(_termbase, _id)
169
173
  ""
170
174
  end
171
175
 
172
176
  def read_local_bibitem(uri)
173
- return nil if %r{^https?://}.match?(uri)
174
-
175
- file = "#{@localdir}#{uri}.rxl"
176
- File.file?(file) or file = "#{@localdir}#{uri}.xml"
177
- File.file?(file) or return nil
178
- xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
177
+ xml = read_local_bibitem_file(uri) or return nil
179
178
  ret = xml.at("//*[local-name() = 'bibdata']") or return nil
180
179
  ret = Nokogiri::XML(ret.to_xml
181
180
  .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
@@ -186,6 +185,15 @@ module Metanorma
186
185
  ret
187
186
  end
188
187
 
188
+ def read_local_bibitem_file(uri)
189
+ return nil if %r{^https?://}.match?(uri)
190
+
191
+ file = "#{@localdir}#{uri}.rxl"
192
+ File.file?(file) or file = "#{@localdir}#{uri}.xml"
193
+ File.file?(file) or return nil
194
+ Nokogiri::XML(File.read(file, encoding: "utf-8"))
195
+ end
196
+
189
197
  # if citation uri points to local file, get bibitem from it
190
198
  def fetch_local_bibitem(xmldoc)
191
199
  xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
@@ -206,6 +214,7 @@ module Metanorma
206
214
  def bibitem_cleanup(xmldoc)
207
215
  bibitem_nested_id(xmldoc)
208
216
  ref_dl_cleanup(xmldoc)
217
+ formattedref_spans(xmldoc)
209
218
  fetch_local_bibitem(xmldoc)
210
219
  end
211
220
  end
@@ -5,10 +5,19 @@ module Metanorma
5
5
  requirement_metadata(xmldoc)
6
6
  requirement_inherit(xmldoc)
7
7
  requirement_descriptions(xmldoc)
8
+ requirement_identifier(xmldoc)
8
9
  end
9
10
 
10
11
  REQRECPER = "//requirement | //recommendation | //permission".freeze
11
12
 
13
+ def requirement_identifier(xmldoc)
14
+ xmldoc.xpath(REQRECPER).each do |r|
15
+ r.xpath("./identifier[link] | ./inherit[link]").each do |i|
16
+ i.children = i.at("./link/@target").text
17
+ end
18
+ end
19
+ end
20
+
12
21
  def requirement_inherit(xmldoc)
13
22
  xmldoc.xpath(REQRECPER).each do |r|
14
23
  ins = requirement_inherit_insert(r)
@@ -76,8 +85,16 @@ module Metanorma
76
85
  end
77
86
  end
78
87
 
88
+ def requirement_metadata1_attrs
89
+ %w(obligation model type)
90
+ end
91
+
79
92
  def requirement_metadata1_tags
80
- %w(label subject inherit)
93
+ %w(identifier subject inherit)
94
+ end
95
+
96
+ def requirement_metadata_component_tags
97
+ []
81
98
  end
82
99
 
83
100
  def requirement_metadata1(reqt, dlist, ins)
@@ -85,20 +102,21 @@ module Metanorma
85
102
  reqt.children.first.previous = " "
86
103
  ins = reqt.children.first
87
104
  end
88
- %w(obligation model type).each do |a|
105
+ requirement_metadata1_attrs.each do |a|
89
106
  dl_to_attrs(reqt, dlist, a)
90
107
  end
91
108
  requirement_metadata1_tags.each do |a|
92
109
  ins = dl_to_elems(ins, reqt, dlist, a)
93
110
  end
94
- reqt_dl_to_classif(ins, reqt, dlist)
111
+ ins = reqt_dl_to_classif(ins, reqt, dlist)
112
+ reqt_dl_to_classif1(ins, reqt, dlist)
95
113
  end
96
114
 
97
115
  def reqt_dl_to_classif(ins, reqt, dlist)
98
116
  if a = reqt.at("./classification[last()]") then ins = a end
99
117
  dlist.xpath("./dt[text()='classification']").each do |e|
100
118
  val = e.at("./following::dd/p") || e.at("./following::dd")
101
- req_classif_parse(val.text).each do |r|
119
+ req_classif_parse(val.children.to_xml).each do |r|
102
120
  ins.next = "<classification><tag>#{r[0]}</tag>"\
103
121
  "<value>#{r[1]}</value></classification>"
104
122
  ins = ins.next
@@ -106,6 +124,21 @@ module Metanorma
106
124
  end
107
125
  ins
108
126
  end
127
+
128
+ def reqt_dl_to_classif1(ins, reqt, dlist)
129
+ if a = reqt.at("./classification[last()]") then ins = a end
130
+ dlist.xpath("./dt").each do |e|
131
+ next if (requirement_metadata1_attrs + requirement_metadata1_tags +
132
+ requirement_metadata_component_tags + %w(classification))
133
+ .include?(e.text)
134
+
135
+ val = e.at("./following::dd/p") || e.at("./following::dd")
136
+ ins.next = "<classification><tag>#{e.text}</tag>"\
137
+ "<value>#{val.children.to_xml}</value></classification>"
138
+ ins = ins.next
139
+ end
140
+ ins
141
+ end
109
142
  end
110
143
  end
111
144
  end
@@ -11,7 +11,7 @@ module Metanorma
11
11
  n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
12
12
  end
13
13
  ret = Nokogiri::XML(key.to_xml)
14
- HTMLEntities.new.decode(ret.text.downcase)
14
+ @c.decode(ret.text.downcase)
15
15
  .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
16
16
  .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
17
17
  .gsub(/[0-9]+/, "þ\\0")
@@ -100,9 +100,13 @@ module Metanorma
100
100
  end
101
101
  end
102
102
 
103
+ def termlookup_cleanup(xmldoc)
104
+ Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
105
+ end
106
+
103
107
  def termdef_cleanup(xmldoc)
104
108
  termdef_unnest_cleanup(xmldoc)
105
- Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
109
+ termlookup_cleanup(xmldoc)
106
110
  term_nonverbal_designations(xmldoc)
107
111
  term_dl_to_metadata(xmldoc)
108
112
  term_termsource_to_designation(xmldoc)
@@ -117,7 +121,7 @@ module Metanorma
117
121
  termdocsource_cleanup(xmldoc)
118
122
  end
119
123
 
120
- def index_cleanup(xmldoc)
124
+ def term_index_cleanup(xmldoc)
121
125
  return unless @index_terms
122
126
 
123
127
  xmldoc.xpath("//preferred").each do |p|
@@ -7,7 +7,7 @@ module Metanorma
7
7
  text = text.gsub(/\s+<fn /, "<fn ")
8
8
  %w(passthrough passthrough-inline).each do |v|
9
9
  text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
10
- </#{v}>}mx) { HTMLEntities.new.decode($1) }
10
+ </#{v}>}mx) { @c.decode($1) }
11
11
  end
12
12
  text
13
13
  end
@@ -34,11 +34,13 @@ module Metanorma
34
34
  end
35
35
  end
36
36
 
37
+ IGNORE_QUOTES_ELEMENTS =
38
+ %w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
39
+
37
40
  def uninterrupt_quotes_around_xml_skip(elem)
38
41
  !(/\A['"]/.match?(elem.text) &&
39
- elem.previous.ancestors("pre, tt, sourcecode, stem, figure, bibdata,
40
- passthrough, identifer")
41
- .empty? &&
42
+ elem.previous.path.split(%r{/})[1..-2]
43
+ .intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
42
44
  ((elem.previous.text.strip.empty? &&
43
45
  !empty_tag_with_text_content?(elem.previous)) ||
44
46
  elem.previous.name == "index"))
@@ -49,7 +51,7 @@ module Metanorma
49
51
  /\S\Z/.match?(prev.text) or return
50
52
  foll = elem.at(".//following::text()[1]")
51
53
  m = /\A(["'][[:punct:]]*)(\s|\Z)/
52
- .match(HTMLEntities.new.decode(foll&.text)) or return
54
+ .match(@c.decode(foll&.text)) or return
53
55
  foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
54
56
  prev.content = "#{prev.text}#{m[1]}"
55
57
  end
@@ -74,10 +76,10 @@ module Metanorma
74
76
  empty_tag_with_text_content?(x) and prev = "dummy"
75
77
  next unless x.text?
76
78
 
77
- x.ancestors("pre, tt, sourcecode, stem, figure, bibdata, passthrough,
78
- identifier").empty? and
79
+ ancestors = x.path.split(%r{/})[1..-2]
80
+ ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
79
81
  dumb2smart_quotes1(x, prev)
80
- prev = x.text if x.ancestors("index").empty?
82
+ prev = x.text unless ancestors.include?("index")
81
83
  end
82
84
  end
83
85
 
@@ -68,10 +68,9 @@ module Metanorma
68
68
  end
69
69
 
70
70
  def xref_to_eref(elem)
71
- c = HTMLEntities.new
72
71
  elem["bibitemid"] = elem["target"]
73
72
  if ref = @anchors&.dig(elem["target"], :xref)
74
- elem["citeas"] = c.decode(ref)
73
+ elem["citeas"] = @c.decode(ref)
75
74
  else
76
75
  elem["citeas"] = ""
77
76
  xref_to_eref1(elem)
@@ -29,6 +29,7 @@ module Metanorma
29
29
  preprocessor Metanorma::Plugin::Lutaml::LutamlUmlAttributesTablePreprocessor
30
30
  preprocessor Metanorma::Plugin::Lutaml::LutamlUmlDatamodelDescriptionPreprocessor
31
31
  inline_macro Metanorma::Standoc::PreferredTermInlineMacro
32
+ inline_macro Metanorma::Standoc::SpanInlineMacro
32
33
  inline_macro Metanorma::Standoc::AltTermInlineMacro
33
34
  inline_macro Metanorma::Standoc::AdmittedTermInlineMacro
34
35
  inline_macro Metanorma::Standoc::DeprecatedTermInlineMacro
@@ -93,6 +94,7 @@ module Metanorma
93
94
  basebackend "html"
94
95
  outfilesuffix ".xml"
95
96
  @libdir = File.dirname(self.class::_file || __FILE__)
97
+ @c = HTMLEntities.new
96
98
  end
97
99
 
98
100
  class << self