metanorma-standoc 2.1.5 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/lib/metanorma/standoc/base.rb +13 -0
  3. data/lib/metanorma/standoc/blocks.rb +26 -17
  4. data/lib/metanorma/standoc/cleanup.rb +1 -1
  5. data/lib/metanorma/standoc/cleanup_biblio.rb +210 -0
  6. data/lib/metanorma/standoc/cleanup_block.rb +6 -4
  7. data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
  8. data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
  9. data/lib/metanorma/standoc/cleanup_reqt.rb +3 -103
  10. data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
  11. data/lib/metanorma/standoc/cleanup_text.rb +10 -8
  12. data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
  13. data/lib/metanorma/standoc/converter.rb +2 -0
  14. data/lib/metanorma/standoc/front.rb +1 -1
  15. data/lib/metanorma/standoc/front_contributor.rb +0 -10
  16. data/lib/metanorma/standoc/inline.rb +8 -4
  17. data/lib/metanorma/standoc/isodoc.rng +6 -1
  18. data/lib/metanorma/standoc/macros.rb +1 -180
  19. data/lib/metanorma/standoc/macros_inline.rb +194 -0
  20. data/lib/metanorma/standoc/ref_sect.rb +2 -2
  21. data/lib/metanorma/standoc/ref_utility.rb +1 -1
  22. data/lib/metanorma/standoc/reqt.rb +19 -75
  23. data/lib/metanorma/standoc/reqt.rng +1 -1
  24. data/lib/metanorma/standoc/section.rb +35 -3
  25. data/lib/metanorma/standoc/utils.rb +9 -43
  26. data/lib/metanorma/standoc/validate.rb +1 -69
  27. data/lib/metanorma/standoc/validate_table.rb +91 -0
  28. data/lib/metanorma/standoc/version.rb +1 -1
  29. data/metanorma-standoc.gemspec +4 -5
  30. data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +90 -7
  31. data/spec/metanorma/blocks_spec.rb +31 -267
  32. data/spec/metanorma/cleanup_blocks_spec.rb +0 -171
  33. data/spec/metanorma/inline_spec.rb +4 -0
  34. data/spec/metanorma/macros_concept_spec.rb +1033 -0
  35. data/spec/metanorma/macros_spec.rb +2 -1030
  36. data/spec/metanorma/refs_spec.rb +0 -2
  37. data/spec/metanorma/reqt_spec.rb +130 -0
  38. data/spec/metanorma/section_spec.rb +5 -0
  39. data/spec/metanorma/validate_spec.rb +46 -6
  40. data/spec/vcr_cassettes/bsi16341.yml +80 -52
  41. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +94 -94
  42. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  43. data/spec/vcr_cassettes/hide_refs.yml +70 -70
  44. data/spec/vcr_cassettes/isobib_get_123.yml +11 -11
  45. data/spec/vcr_cassettes/isobib_get_123_1.yml +23 -23
  46. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
  47. data/spec/vcr_cassettes/isobib_get_123_2.yml +22 -22
  48. data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
  49. data/spec/vcr_cassettes/isobib_get_124.yml +11 -11
  50. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +34 -64
  51. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +45 -45
  52. data/spec/vcr_cassettes/std-link.yml +12 -12
  53. metadata +15 -11
  54. data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9487b5701cc86946c27e560e244cc4f84fceb61b0a93dd271068875654eb49a0
4
- data.tar.gz: c647cf6fb530aba3bc96c9087e7a9d49408ce239a6c6aa3a4a46a0e8cf1a5763
3
+ metadata.gz: 837bde4e0b1dcb3e1eeb5ffa7c56d063456f4a4cb3666eb00c18aaf6cae7d164
4
+ data.tar.gz: 69dcdc35a59b471bb2c6ac3cfd52a84b15625a45402c66f83c9e1ec70296b4f3
5
5
  SHA512:
6
- metadata.gz: c8356539be6d28f5dc6d4d148cfcabb8ed7dbc033392544d382d36e1e392e33da6b29a64b535f6c24cdab85a4d17d1dc5698367f0edff8f45eb02b1f8ed97330
7
- data.tar.gz: 13c49550edea0d6169fef67357a5030e146cdf62fec3cb41900db7490d95c09f9dbb1c4f4b55bea4836651ba11fcaf0714b20f37fc3a96a48dd31596cf564549
6
+ metadata.gz: 88ff22adce3bd32d0c2875acf926520a4950beb6e36e3b7e70cd7ffce68c75869fac1e6449ea0b975d2c87a2cce1b2d0e2898f7d7c29d30065abf6708df863f8
7
+ data.tar.gz: 86493fb18c13786a09a019a9a184684d90c19957c4154f01f13acd4d4b5cb1e7999cc8f0d0413f0b10ea870e676ab2bbe3e78cacc848a9799e0f9b5189d1efe3
@@ -9,6 +9,7 @@ require "fileutils"
9
9
  require "metanorma-utils"
10
10
  require "isodoc/xslfo_convert"
11
11
  require_relative "render"
12
+ require "mn-requirements"
12
13
 
13
14
  module Metanorma
14
15
  module Standoc
@@ -29,6 +30,7 @@ module Metanorma
29
30
  init_vars
30
31
  init_misc(node)
31
32
  init_processing(node)
33
+ init_reqt(node)
32
34
  init_toc(node)
33
35
  init_output(node)
34
36
  init_i18n(node)
@@ -42,7 +44,10 @@ module Metanorma
42
44
  @anchors = {}
43
45
  @internal_eref_namespaces = []
44
46
  @seen_headers = []
47
+ @seen_headers_canonical = []
45
48
  @embed_hdr = []
49
+ @reqt_model = nil
50
+ @preface = true
46
51
  end
47
52
 
48
53
  def init_misc(node)
@@ -50,6 +55,7 @@ module Metanorma
50
55
  @index_terms = node.attr("index-terms")
51
56
  @boilerplateauthority = node.attr("boilerplate-authority")
52
57
  @embed_hdr = node.attr("embed_hdr")
58
+ @document_scheme = node.attr("document-scheme")
53
59
  end
54
60
 
55
61
  def init_processing(node)
@@ -62,6 +68,13 @@ module Metanorma
62
68
  @datauriimage = node.attr("data-uri-image") != "false"
63
69
  end
64
70
 
71
+ def init_reqt(node)
72
+ @default_requirement_model = (node.attr("requirements-model") ||
73
+ default_requirement_model)
74
+ @reqt_models = Metanorma::Requirements
75
+ .new({ default: @default_requirement_model })
76
+ end
77
+
65
78
  def init_toc(node)
66
79
  @htmltoclevels = node.attr("htmltoclevels")
67
80
  @doctoclevels = node.attr("doctoclevels")
@@ -34,7 +34,7 @@ module Metanorma
34
34
  # We append each contained block to its parent
35
35
  def open(node)
36
36
  role = node.role || node.attr("style")
37
- reqt_subpart(role) and return requirement_subpart(node)
37
+ reqt_subpart?(role) and return requirement_subpart(node)
38
38
  role == "form" and return form(node)
39
39
  role == "definition" and return termdefinition(node)
40
40
  result = []
@@ -88,26 +88,37 @@ module Metanorma
88
88
  end
89
89
 
90
90
  def example(node)
91
- return term_example(node) if in_terms? || node.option?("termexample")
92
-
91
+ (in_terms? || node.option?("termexample")) and return term_example(node)
93
92
  role = node.role || node.attr("style")
94
- %w(recommendation requirement permission).include?(role) and
95
- return requirement(node, role)
96
- return pseudocode_example(node) if role == "pseudocode"
97
- return svgmap_example(node) if role == "svgmap"
98
- return form(node) if role == "form"
99
- return termdefinition(node) if role == "definition"
100
- return figure_example(node) if role == "figure"
101
-
102
- reqt_subpart(role) and return requirement_subpart(node)
93
+ ret = example_to_requirement(node, role) ||
94
+ example_by_role(node, role) and return ret
95
+ reqt_subpart?(role) and return requirement_subpart(node)
103
96
  example_proper(node)
104
97
  end
105
98
 
99
+ def example_by_role(node, role)
100
+ case role
101
+ when "pseudocode" then pseudocode_example(node)
102
+ when "svgmap" then svgmap_example(node)
103
+ when "form" then form(node)
104
+ when "definition" then termdefinition(node)
105
+ when "figure" then figure_example(node)
106
+ end
107
+ end
108
+
109
+ def example_to_requirement(node, role)
110
+ return unless @reqt_models.requirement_roles.key?(role&.to_sym)
111
+
112
+ # need to call here for proper recursion ordering
113
+ select_requirement_model(node)
114
+ requirement(node,
115
+ @reqt_models.requirement_roles[role.to_sym], role)
116
+ end
117
+
106
118
  def svgmap_attrs(node)
107
119
  attr_code(id_attr(node)
108
- .merge(id: node.id,
120
+ .merge(id: node.id, number: node.attr("number"),
109
121
  unnumbered: node.option?("unnumbered") ? "true" : nil,
110
- number: node.attr("number"),
111
122
  subsequence: node.attr("subsequence"))
112
123
  .merge(keep_attrs(node)))
113
124
  end
@@ -225,7 +236,6 @@ module Metanorma
225
236
  filename: node.attr("filename"))))
226
237
  end
227
238
 
228
- # NOTE: html escaping is performed by Nokogiri
229
239
  def listing(node)
230
240
  fragment = ::Nokogiri::XML::Builder.new do |xml|
231
241
  xml.sourcecode **listing_attrs(node) do |s|
@@ -238,11 +248,10 @@ module Metanorma
238
248
  end
239
249
 
240
250
  def pass(node)
241
- c = HTMLEntities.new
242
251
  noko do |xml|
243
252
  xml.passthrough **attr_code(formats:
244
253
  node.attr("format") || "metanorma") do |p|
245
- p << c.encode(c.decode(node.content), :basic, :hexadecimal)
254
+ p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
246
255
  end
247
256
  end
248
257
  end
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
5
5
  require_relative "./cleanup_table"
6
6
  require_relative "./cleanup_footnotes"
7
7
  require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
8
+ require_relative "./cleanup_biblio"
9
9
  require_relative "./cleanup_boilerplate"
10
10
  require_relative "./cleanup_section"
11
11
  require_relative "./cleanup_terms"
@@ -0,0 +1,210 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def ref_dl_cleanup(xmldoc)
8
+ xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
9
+ bib = dl_bib_extract(c) or next
10
+ validate_ref_dl(bib, c)
11
+ bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
12
+ bibitem = Nokogiri::XML(bibitemxml)
13
+ bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
14
+ c.replace(bibitem.root)
15
+ end
16
+ end
17
+
18
+ # do not accept implicit id
19
+ def validate_ref_dl(bib, clause)
20
+ id = bib["id"]
21
+ id ||= clause["id"] unless /^_/.match?(clause["id"])
22
+ unless id
23
+ @log.add("Anchors", clause,
24
+ "The following reference is missing an anchor:\n"\
25
+ "#{clause.to_xml}")
26
+ return
27
+ end
28
+ @refids << id
29
+ validate_ref_dl1(bib, id, clause)
30
+ end
31
+
32
+ def validate_ref_dl1(bib, id, clause)
33
+ bib["title"] or
34
+ @log.add("Bibliography", clause, "Reference #{id} is missing a title")
35
+ bib["docid"] or
36
+ @log.add("Bibliography", clause,
37
+ "Reference #{id} is missing a document identifier (docid)")
38
+ end
39
+
40
+ def extract_from_p(tag, bib, key)
41
+ return unless bib[tag]
42
+
43
+ "<#{key}>#{bib[tag].at('p').children}</#{key}>"
44
+ end
45
+
46
+ # if the content is a single paragraph, replace it with its children
47
+ # single links replaced with uri
48
+ def p_unwrap(para)
49
+ elems = para.elements
50
+ if elems.size == 1 && elems[0].name == "p"
51
+ link_unwrap(elems[0]).children.to_xml.strip
52
+ else
53
+ para.to_xml.strip
54
+ end
55
+ end
56
+
57
+ def link_unwrap(para)
58
+ elems = para.elements
59
+ if elems.size == 1 && elems[0].name == "link"
60
+ para.at("./link").replace(elems[0]["target"].strip)
61
+ end
62
+ para
63
+ end
64
+
65
+ def dd_bib_extract(dtd)
66
+ return nil if dtd.children.empty?
67
+
68
+ dtd.at("./dl") and return dl_bib_extract(dtd)
69
+ elems = dtd.remove.elements
70
+ return p_unwrap(dtd) unless elems.size == 1 &&
71
+ %w(ol ul).include?(elems[0].name)
72
+
73
+ elems[0].xpath("./li").each_with_object([]) do |li, ret|
74
+ ret << p_unwrap(li)
75
+ end
76
+ end
77
+
78
+ def add_to_hash(bib, key, val)
79
+ Metanorma::Utils::set_nested_value(bib, key.split("."), val)
80
+ end
81
+
82
+ # definition list, with at most one level of unordered lists
83
+ def dl_bib_extract(clause, nested = false)
84
+ dl = clause.at("./dl") or return
85
+ key = ""
86
+ bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
87
+ (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
88
+ add_to_hash(m, key, dd_bib_extract(dtd))
89
+ end
90
+ clause.xpath("./clause").each do |c1|
91
+ key = c1&.at("./title")&.text&.downcase&.strip
92
+ next unless %w(contributor relation series).include? key
93
+
94
+ add_to_hash(bib, key, dl_bib_extract(c1, true))
95
+ end
96
+ dl_bib_extract_title(bib, clause, nested)
97
+ end
98
+
99
+ def dl_bib_extract_title(bib, clause, nested)
100
+ (!nested && clause.at("./title")) or return bib
101
+ title = clause.at("./title").remove.children.to_xml
102
+ bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
103
+ bib["title"].is_a?(String)
104
+ bib["title"] ||= []
105
+ bib["title"] << title if !title.empty?
106
+ bib
107
+ end
108
+
109
+ # ---
110
+
111
+ def formattedref_spans(xmldoc)
112
+ xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
113
+ spans_to_bibitem(b, spans_preprocess(extract_content(b)))
114
+ end
115
+ end
116
+
117
+ def extract_content(bib)
118
+ extract_docid(bib) + extract_spans(bib)
119
+ end
120
+
121
+ def extract_spans(bib)
122
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
123
+ keys = s["class"].split(".", 2)
124
+ m << { key: keys[0], type: keys[1],
125
+ val: s.children.to_xml }
126
+ (s["class"] == "type" and s.remove) or s.replace(s.children)
127
+ end
128
+ end
129
+
130
+ def extract_docid(bib)
131
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
132
+ m << { key: "docid", type: d["type"], val: d.text }
133
+ d.remove
134
+ end
135
+ end
136
+
137
+ def spans_preprocess(spans)
138
+ ret = { contributor: [], docid: [], uri: [], date: [] }
139
+ spans.each do |s|
140
+ case s[:key]
141
+ when "uri", "docid"
142
+ ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
143
+ when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
144
+ when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
145
+ when "publisher"
146
+ ret[:contributor] << { role: "publisher", entity: "organization",
147
+ name: s[:val] }
148
+ when "surname", "initials", "givenname", "formatted-initials"
149
+ ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
150
+ end
151
+ end
152
+ ret
153
+ end
154
+
155
+ def spans_preprocess_contrib(span, contrib)
156
+ span[:key] = "formatted-initials" if span[:key] == "initials"
157
+
158
+ spans_preprocess_new_contrib?(span, contrib) and
159
+ contrib << { role: span[:type] || "author", entity: "person" }
160
+ contrib[-1][span[:key].to_sym] = span[:val]
161
+ contrib
162
+ end
163
+
164
+ def spans_preprocess_new_contrib?(span, contrib)
165
+ contrib.empty? ||
166
+ (if span[:key] == "surname" then contrib[-1][:surname]
167
+ else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
168
+ end) ||
169
+ contrib[-1][:role] != (span[:type] || "author")
170
+ end
171
+
172
+ def spans_to_bibitem(bib, spans)
173
+ ret = ""
174
+ spans[:title] and ret += "<title>#{spans[:title]}</title>"
175
+ spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
176
+ spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
177
+ spans[:date].each { |s| ret += span_to_docid(s, "date") }
178
+ spans[:contributor].each { |s| ret += span_to_contrib(s) }
179
+ spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
180
+ spans[:type] and bib["type"] = spans[:type]
181
+ bib << ret
182
+ end
183
+
184
+ def span_to_docid(span, key)
185
+ if span[:type]
186
+ "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
187
+ else
188
+ "<#{key}>#{span[:val]}</#{key}>"
189
+ end
190
+ end
191
+
192
+ def span_to_contrib(span)
193
+ e = if span[:entity] == "organization"
194
+ "<organization><name>#{span[:name]}</name></organization>"
195
+ else span_to_person(span)
196
+ end
197
+ "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
198
+ end
199
+
200
+ def span_to_person(span)
201
+ pre = (span[:"formatted-initials"] and
202
+ "<formatted-initials>"\
203
+ "#{span[:"formatted-initials"]}</formatted-initials>") ||
204
+ "<forename>#{span[:givenname]}</forename>"
205
+ "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
206
+ "</person>"
207
+ end
208
+ end
209
+ end
210
+ end
@@ -45,7 +45,8 @@ module Metanorma
45
45
 
46
46
  def figure_dl_cleanup1(xmldoc)
47
47
  q = "//figure/following-sibling::*[self::dl]"
48
- xmldoc.xpath(q).each do |s|
48
+ q1 = "//figure/figure/following-sibling::*[self::dl]"
49
+ (xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
49
50
  s["key"] == "true" and s.previous_element << s.remove
50
51
  end
51
52
  end
@@ -65,8 +66,9 @@ module Metanorma
65
66
  # examples containing only figures become subfigures of figures
66
67
  def subfigure_cleanup(xmldoc)
67
68
  xmldoc.xpath("//example[figure]").each do |e|
68
- next unless e.elements.map(&:name).reject do |m|
69
- %w(name figure index).include? m
69
+ next unless e.elements.reject do |m|
70
+ %w(name figure index note).include?(m.name) ||
71
+ (m.name == "dl" && m["key"] == "true")
70
72
  end.empty?
71
73
 
72
74
  e.name = "figure"
@@ -84,9 +86,9 @@ module Metanorma
84
86
 
85
87
  def figure_cleanup(xmldoc)
86
88
  figure_footnote_cleanup(xmldoc)
89
+ subfigure_cleanup(xmldoc)
87
90
  figure_dl_cleanup1(xmldoc)
88
91
  figure_dl_cleanup2(xmldoc)
89
- subfigure_cleanup(xmldoc)
90
92
  single_subfigure_cleanup(xmldoc)
91
93
  end
92
94
 
@@ -1,24 +1,11 @@
1
- require "nokogiri"
2
- require "pathname"
3
- require "html2doc"
4
1
  require "asciimath2unitsml"
5
- require_relative "./cleanup_block"
6
- require_relative "./cleanup_footnotes"
7
- require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
9
- require_relative "./cleanup_boilerplate"
10
- require_relative "./cleanup_section"
11
- require_relative "./cleanup_terms"
12
- require_relative "./cleanup_inline"
13
- require_relative "./cleanup_amend"
14
- require "relaton_iev"
15
2
 
16
3
  module Metanorma
17
4
  module Standoc
18
5
  module Cleanup
19
6
  def asciimath2mathml(text)
20
7
  text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
- "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
8
+ "<amathstem>#{@c.decode($1)}</amathstem>"
22
9
  end
23
10
  text = Html2Doc.new({})
24
11
  .asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
@@ -60,7 +47,7 @@ module Metanorma
60
47
  def mathml_italicise(xml)
61
48
  xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
49
  "m" => MATHML_NS).each do |i|
63
- char = HTMLEntities.new.decode(i.text)
50
+ char = @c.decode(i.text)
64
51
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
52
  end
66
53
  end
@@ -153,29 +153,28 @@ module Metanorma
153
153
 
154
154
  def reference_names(xmldoc)
155
155
  xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
156
- docid = ref.at("./docidentifier[@type = 'metanorma']") ||
157
- ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
158
- ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
159
- ref.at("./docidentifier[@primary = 'true']") ||
160
- ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
161
- ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
162
- ref.at("./docidentifier[not(@type = 'DOI')]") or next
156
+ docid = select_docid(ref) or next
163
157
  reference = format_ref(docid.children.to_xml, docid["type"])
164
158
  @anchors[ref["id"]] = { xref: reference }
165
159
  end
166
160
  end
167
161
 
162
+ def select_docid(ref)
163
+ ref.at("./docidentifier[@type = 'metanorma']") ||
164
+ ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
165
+ ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
166
+ ref.at("./docidentifier[@primary = 'true']") ||
167
+ ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
168
+ ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
169
+ ref.at("./docidentifier[not(@type = 'DOI')]")
170
+ end
171
+
168
172
  def fetch_termbase(_termbase, _id)
169
173
  ""
170
174
  end
171
175
 
172
176
  def read_local_bibitem(uri)
173
- return nil if %r{^https?://}.match?(uri)
174
-
175
- file = "#{@localdir}#{uri}.rxl"
176
- File.file?(file) or file = "#{@localdir}#{uri}.xml"
177
- File.file?(file) or return nil
178
- xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
177
+ xml = read_local_bibitem_file(uri) or return nil
179
178
  ret = xml.at("//*[local-name() = 'bibdata']") or return nil
180
179
  ret = Nokogiri::XML(ret.to_xml
181
180
  .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
@@ -186,6 +185,15 @@ module Metanorma
186
185
  ret
187
186
  end
188
187
 
188
+ def read_local_bibitem_file(uri)
189
+ return nil if %r{^https?://}.match?(uri)
190
+
191
+ file = "#{@localdir}#{uri}.rxl"
192
+ File.file?(file) or file = "#{@localdir}#{uri}.xml"
193
+ File.file?(file) or return nil
194
+ Nokogiri::XML(File.read(file, encoding: "utf-8"))
195
+ end
196
+
189
197
  # if citation uri points to local file, get bibitem from it
190
198
  def fetch_local_bibitem(xmldoc)
191
199
  xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
@@ -206,6 +214,7 @@ module Metanorma
206
214
  def bibitem_cleanup(xmldoc)
207
215
  bibitem_nested_id(xmldoc)
208
216
  ref_dl_cleanup(xmldoc)
217
+ formattedref_spans(xmldoc)
209
218
  fetch_local_bibitem(xmldoc)
210
219
  end
211
220
  end
@@ -2,109 +2,9 @@ module Metanorma
2
2
  module Standoc
3
3
  module Cleanup
4
4
  def requirement_cleanup(xmldoc)
5
- requirement_metadata(xmldoc)
6
- requirement_inherit(xmldoc)
7
- requirement_descriptions(xmldoc)
8
- end
9
-
10
- REQRECPER = "//requirement | //recommendation | //permission".freeze
11
-
12
- def requirement_inherit(xmldoc)
13
- xmldoc.xpath(REQRECPER).each do |r|
14
- ins = requirement_inherit_insert(r)
15
- r.xpath("./*//inherit").each { |i| ins.previous = i }
16
- end
17
- end
18
-
19
- def requirement_inherit_insert(reqt)
20
- ins = reqt.at("./classification") || reqt.at(
21
- "./description | ./measurementtarget | ./specification | "\
22
- "./verification | ./import | ./description | ./component | "\
23
- "./requirement | ./recommendation | ./permission",
24
- ) and return ins
25
- requirement_inherit_insert1(reqt)
26
- end
27
-
28
- def requirement_inherit_insert1(reqt)
29
- if t = reqt.at("./title")
30
- t.next = " "
31
- t.next
32
- else
33
- if reqt.children.empty? then reqt.add_child(" ")
34
- else reqt.children.first.previous = " "
35
- end
36
- reqt.children.first
37
- end
38
- end
39
-
40
- def requirement_descriptions(xmldoc)
41
- xmldoc.xpath(REQRECPER).each do |r|
42
- r.xpath(".//p[not(./*)][normalize-space(.)='']").each(&:remove)
43
- r.children.each do |e|
44
- requirement_description_wrap(r, e)
45
- end
46
- requirement_description_cleanup1(r)
47
- end
48
- end
49
-
50
- def requirement_description_wrap(reqt, text)
51
- return if (text.element? && (reqt_subpart(text.name) ||
52
- %w(requirement recommendation
53
- permission).include?(text.name))) ||
54
- (text.text.strip.empty? && !text.at(".//xref | .//eref | .//link"))
55
-
56
- t = Nokogiri::XML::Element.new("description", reqt.document)
57
- text.before(t)
58
- t.children = text.remove
59
- end
60
-
61
- def requirement_description_cleanup1(reqt)
62
- while d = reqt.at("./description[following-sibling::*[1]"\
63
- "[self::description]]")
64
- n = d.next.remove
65
- d << n.children
66
- end
67
- reqt.xpath("./description[normalize-space(.)='']").each do |r|
68
- r.replace("\n")
69
- end
70
- end
71
-
72
- def requirement_metadata(xmldoc)
73
- xmldoc.xpath(REQRECPER).each do |r|
74
- dl = r&.at("./dl[@metadata = 'true']")&.remove or next
75
- requirement_metadata1(r, dl, r.at("./title"))
76
- end
77
- end
78
-
79
- def requirement_metadata1_tags
80
- %w(label subject inherit)
81
- end
82
-
83
- def requirement_metadata1(reqt, dlist, ins)
84
- unless ins
85
- reqt.children.first.previous = " "
86
- ins = reqt.children.first
87
- end
88
- %w(obligation model type).each do |a|
89
- dl_to_attrs(reqt, dlist, a)
90
- end
91
- requirement_metadata1_tags.each do |a|
92
- ins = dl_to_elems(ins, reqt, dlist, a)
93
- end
94
- reqt_dl_to_classif(ins, reqt, dlist)
95
- end
96
-
97
- def reqt_dl_to_classif(ins, reqt, dlist)
98
- if a = reqt.at("./classification[last()]") then ins = a end
99
- dlist.xpath("./dt[text()='classification']").each do |e|
100
- val = e.at("./following::dd/p") || e.at("./following::dd")
101
- req_classif_parse(val.text).each do |r|
102
- ins.next = "<classification><tag>#{r[0]}</tag>"\
103
- "<value>#{r[1]}</value></classification>"
104
- ins = ins.next
105
- end
106
- end
107
- ins
5
+ @reqt_models ||=
6
+ Metanorma::Requirements.new({ default: @default_requirement_model })
7
+ @reqt_models.requirement_cleanup(xmldoc)
108
8
  end
109
9
  end
110
10
  end
@@ -11,7 +11,7 @@ module Metanorma
11
11
  n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
12
12
  end
13
13
  ret = Nokogiri::XML(key.to_xml)
14
- HTMLEntities.new.decode(ret.text.downcase)
14
+ @c.decode(ret.text.downcase)
15
15
  .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
16
16
  .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
17
17
  .gsub(/[0-9]+/, "þ\\0")
@@ -7,7 +7,7 @@ module Metanorma
7
7
  text = text.gsub(/\s+<fn /, "<fn ")
8
8
  %w(passthrough passthrough-inline).each do |v|
9
9
  text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
10
- </#{v}>}mx) { HTMLEntities.new.decode($1) }
10
+ </#{v}>}mx) { @c.decode($1) }
11
11
  end
12
12
  text
13
13
  end
@@ -34,11 +34,13 @@ module Metanorma
34
34
  end
35
35
  end
36
36
 
37
+ IGNORE_QUOTES_ELEMENTS =
38
+ %w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
39
+
37
40
  def uninterrupt_quotes_around_xml_skip(elem)
38
41
  !(/\A['"]/.match?(elem.text) &&
39
- elem.previous.ancestors("pre, tt, sourcecode, stem, figure, bibdata,
40
- passthrough, identifer")
41
- .empty? &&
42
+ elem.previous.path.split(%r{/})[1..-2]
43
+ .intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
42
44
  ((elem.previous.text.strip.empty? &&
43
45
  !empty_tag_with_text_content?(elem.previous)) ||
44
46
  elem.previous.name == "index"))
@@ -49,7 +51,7 @@ module Metanorma
49
51
  /\S\Z/.match?(prev.text) or return
50
52
  foll = elem.at(".//following::text()[1]")
51
53
  m = /\A(["'][[:punct:]]*)(\s|\Z)/
52
- .match(HTMLEntities.new.decode(foll&.text)) or return
54
+ .match(@c.decode(foll&.text)) or return
53
55
  foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
54
56
  prev.content = "#{prev.text}#{m[1]}"
55
57
  end
@@ -74,10 +76,10 @@ module Metanorma
74
76
  empty_tag_with_text_content?(x) and prev = "dummy"
75
77
  next unless x.text?
76
78
 
77
- x.ancestors("pre, tt, sourcecode, stem, figure, bibdata, passthrough,
78
- identifier").empty? and
79
+ ancestors = x.path.split(%r{/})[1..-2]
80
+ ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
79
81
  dumb2smart_quotes1(x, prev)
80
- prev = x.text if x.ancestors("index").empty?
82
+ prev = x.text unless ancestors.include?("index")
81
83
  end
82
84
  end
83
85
 
@@ -68,10 +68,9 @@ module Metanorma
68
68
  end
69
69
 
70
70
  def xref_to_eref(elem)
71
- c = HTMLEntities.new
72
71
  elem["bibitemid"] = elem["target"]
73
72
  if ref = @anchors&.dig(elem["target"], :xref)
74
- elem["citeas"] = c.decode(ref)
73
+ elem["citeas"] = @c.decode(ref)
75
74
  else
76
75
  elem["citeas"] = ""
77
76
  xref_to_eref1(elem)