metanorma-standoc 2.1.5 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/lib/metanorma/standoc/base.rb +13 -0
  3. data/lib/metanorma/standoc/blocks.rb +26 -17
  4. data/lib/metanorma/standoc/cleanup.rb +1 -1
  5. data/lib/metanorma/standoc/cleanup_biblio.rb +210 -0
  6. data/lib/metanorma/standoc/cleanup_block.rb +6 -4
  7. data/lib/metanorma/standoc/cleanup_maths.rb +2 -15
  8. data/lib/metanorma/standoc/cleanup_ref.rb +22 -13
  9. data/lib/metanorma/standoc/cleanup_reqt.rb +3 -103
  10. data/lib/metanorma/standoc/cleanup_symbols.rb +1 -1
  11. data/lib/metanorma/standoc/cleanup_text.rb +10 -8
  12. data/lib/metanorma/standoc/cleanup_xref.rb +1 -2
  13. data/lib/metanorma/standoc/converter.rb +2 -0
  14. data/lib/metanorma/standoc/front.rb +1 -1
  15. data/lib/metanorma/standoc/front_contributor.rb +0 -10
  16. data/lib/metanorma/standoc/inline.rb +8 -4
  17. data/lib/metanorma/standoc/isodoc.rng +6 -1
  18. data/lib/metanorma/standoc/macros.rb +1 -180
  19. data/lib/metanorma/standoc/macros_inline.rb +194 -0
  20. data/lib/metanorma/standoc/ref_sect.rb +2 -2
  21. data/lib/metanorma/standoc/ref_utility.rb +1 -1
  22. data/lib/metanorma/standoc/reqt.rb +19 -75
  23. data/lib/metanorma/standoc/reqt.rng +1 -1
  24. data/lib/metanorma/standoc/section.rb +35 -3
  25. data/lib/metanorma/standoc/utils.rb +9 -43
  26. data/lib/metanorma/standoc/validate.rb +1 -69
  27. data/lib/metanorma/standoc/validate_table.rb +91 -0
  28. data/lib/metanorma/standoc/version.rb +1 -1
  29. data/metanorma-standoc.gemspec +4 -5
  30. data/spec/metanorma/{refs_dl_spec.rb → biblio_spec.rb} +90 -7
  31. data/spec/metanorma/blocks_spec.rb +31 -267
  32. data/spec/metanorma/cleanup_blocks_spec.rb +0 -171
  33. data/spec/metanorma/inline_spec.rb +4 -0
  34. data/spec/metanorma/macros_concept_spec.rb +1033 -0
  35. data/spec/metanorma/macros_spec.rb +2 -1030
  36. data/spec/metanorma/refs_spec.rb +0 -2
  37. data/spec/metanorma/reqt_spec.rb +130 -0
  38. data/spec/metanorma/section_spec.rb +5 -0
  39. data/spec/metanorma/validate_spec.rb +46 -6
  40. data/spec/vcr_cassettes/bsi16341.yml +80 -52
  41. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +94 -94
  42. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  43. data/spec/vcr_cassettes/hide_refs.yml +70 -70
  44. data/spec/vcr_cassettes/isobib_get_123.yml +11 -11
  45. data/spec/vcr_cassettes/isobib_get_123_1.yml +23 -23
  46. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
  47. data/spec/vcr_cassettes/isobib_get_123_2.yml +22 -22
  48. data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
  49. data/spec/vcr_cassettes/isobib_get_124.yml +11 -11
  50. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +34 -64
  51. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +45 -45
  52. data/spec/vcr_cassettes/std-link.yml +12 -12
  53. metadata +15 -11
  54. data/lib/metanorma/standoc/cleanup_ref_dl.rb +0 -113
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9487b5701cc86946c27e560e244cc4f84fceb61b0a93dd271068875654eb49a0
4
- data.tar.gz: c647cf6fb530aba3bc96c9087e7a9d49408ce239a6c6aa3a4a46a0e8cf1a5763
3
+ metadata.gz: 837bde4e0b1dcb3e1eeb5ffa7c56d063456f4a4cb3666eb00c18aaf6cae7d164
4
+ data.tar.gz: 69dcdc35a59b471bb2c6ac3cfd52a84b15625a45402c66f83c9e1ec70296b4f3
5
5
  SHA512:
6
- metadata.gz: c8356539be6d28f5dc6d4d148cfcabb8ed7dbc033392544d382d36e1e392e33da6b29a64b535f6c24cdab85a4d17d1dc5698367f0edff8f45eb02b1f8ed97330
7
- data.tar.gz: 13c49550edea0d6169fef67357a5030e146cdf62fec3cb41900db7490d95c09f9dbb1c4f4b55bea4836651ba11fcaf0714b20f37fc3a96a48dd31596cf564549
6
+ metadata.gz: 88ff22adce3bd32d0c2875acf926520a4950beb6e36e3b7e70cd7ffce68c75869fac1e6449ea0b975d2c87a2cce1b2d0e2898f7d7c29d30065abf6708df863f8
7
+ data.tar.gz: 86493fb18c13786a09a019a9a184684d90c19957c4154f01f13acd4d4b5cb1e7999cc8f0d0413f0b10ea870e676ab2bbe3e78cacc848a9799e0f9b5189d1efe3
@@ -9,6 +9,7 @@ require "fileutils"
9
9
  require "metanorma-utils"
10
10
  require "isodoc/xslfo_convert"
11
11
  require_relative "render"
12
+ require "mn-requirements"
12
13
 
13
14
  module Metanorma
14
15
  module Standoc
@@ -29,6 +30,7 @@ module Metanorma
29
30
  init_vars
30
31
  init_misc(node)
31
32
  init_processing(node)
33
+ init_reqt(node)
32
34
  init_toc(node)
33
35
  init_output(node)
34
36
  init_i18n(node)
@@ -42,7 +44,10 @@ module Metanorma
42
44
  @anchors = {}
43
45
  @internal_eref_namespaces = []
44
46
  @seen_headers = []
47
+ @seen_headers_canonical = []
45
48
  @embed_hdr = []
49
+ @reqt_model = nil
50
+ @preface = true
46
51
  end
47
52
 
48
53
  def init_misc(node)
@@ -50,6 +55,7 @@ module Metanorma
50
55
  @index_terms = node.attr("index-terms")
51
56
  @boilerplateauthority = node.attr("boilerplate-authority")
52
57
  @embed_hdr = node.attr("embed_hdr")
58
+ @document_scheme = node.attr("document-scheme")
53
59
  end
54
60
 
55
61
  def init_processing(node)
@@ -62,6 +68,13 @@ module Metanorma
62
68
  @datauriimage = node.attr("data-uri-image") != "false"
63
69
  end
64
70
 
71
+ def init_reqt(node)
72
+ @default_requirement_model = (node.attr("requirements-model") ||
73
+ default_requirement_model)
74
+ @reqt_models = Metanorma::Requirements
75
+ .new({ default: @default_requirement_model })
76
+ end
77
+
65
78
  def init_toc(node)
66
79
  @htmltoclevels = node.attr("htmltoclevels")
67
80
  @doctoclevels = node.attr("doctoclevels")
@@ -34,7 +34,7 @@ module Metanorma
34
34
  # We append each contained block to its parent
35
35
  def open(node)
36
36
  role = node.role || node.attr("style")
37
- reqt_subpart(role) and return requirement_subpart(node)
37
+ reqt_subpart?(role) and return requirement_subpart(node)
38
38
  role == "form" and return form(node)
39
39
  role == "definition" and return termdefinition(node)
40
40
  result = []
@@ -88,26 +88,37 @@ module Metanorma
88
88
  end
89
89
 
90
90
  def example(node)
91
- return term_example(node) if in_terms? || node.option?("termexample")
92
-
91
+ (in_terms? || node.option?("termexample")) and return term_example(node)
93
92
  role = node.role || node.attr("style")
94
- %w(recommendation requirement permission).include?(role) and
95
- return requirement(node, role)
96
- return pseudocode_example(node) if role == "pseudocode"
97
- return svgmap_example(node) if role == "svgmap"
98
- return form(node) if role == "form"
99
- return termdefinition(node) if role == "definition"
100
- return figure_example(node) if role == "figure"
101
-
102
- reqt_subpart(role) and return requirement_subpart(node)
93
+ ret = example_to_requirement(node, role) ||
94
+ example_by_role(node, role) and return ret
95
+ reqt_subpart?(role) and return requirement_subpart(node)
103
96
  example_proper(node)
104
97
  end
105
98
 
99
+ def example_by_role(node, role)
100
+ case role
101
+ when "pseudocode" then pseudocode_example(node)
102
+ when "svgmap" then svgmap_example(node)
103
+ when "form" then form(node)
104
+ when "definition" then termdefinition(node)
105
+ when "figure" then figure_example(node)
106
+ end
107
+ end
108
+
109
+ def example_to_requirement(node, role)
110
+ return unless @reqt_models.requirement_roles.key?(role&.to_sym)
111
+
112
+ # need to call here for proper recursion ordering
113
+ select_requirement_model(node)
114
+ requirement(node,
115
+ @reqt_models.requirement_roles[role.to_sym], role)
116
+ end
117
+
106
118
  def svgmap_attrs(node)
107
119
  attr_code(id_attr(node)
108
- .merge(id: node.id,
120
+ .merge(id: node.id, number: node.attr("number"),
109
121
  unnumbered: node.option?("unnumbered") ? "true" : nil,
110
- number: node.attr("number"),
111
122
  subsequence: node.attr("subsequence"))
112
123
  .merge(keep_attrs(node)))
113
124
  end
@@ -225,7 +236,6 @@ module Metanorma
225
236
  filename: node.attr("filename"))))
226
237
  end
227
238
 
228
- # NOTE: html escaping is performed by Nokogiri
229
239
  def listing(node)
230
240
  fragment = ::Nokogiri::XML::Builder.new do |xml|
231
241
  xml.sourcecode **listing_attrs(node) do |s|
@@ -238,11 +248,10 @@ module Metanorma
238
248
  end
239
249
 
240
250
  def pass(node)
241
- c = HTMLEntities.new
242
251
  noko do |xml|
243
252
  xml.passthrough **attr_code(formats:
244
253
  node.attr("format") || "metanorma") do |p|
245
- p << c.encode(c.decode(node.content), :basic, :hexadecimal)
254
+ p << @c.encode(@c.decode(node.content), :basic, :hexadecimal)
246
255
  end
247
256
  end
248
257
  end
@@ -5,7 +5,7 @@ require_relative "./cleanup_block"
5
5
  require_relative "./cleanup_table"
6
6
  require_relative "./cleanup_footnotes"
7
7
  require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
8
+ require_relative "./cleanup_biblio"
9
9
  require_relative "./cleanup_boilerplate"
10
10
  require_relative "./cleanup_section"
11
11
  require_relative "./cleanup_terms"
@@ -0,0 +1,210 @@
1
+ require "set"
2
+ require "relaton_bib"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def ref_dl_cleanup(xmldoc)
8
+ xmldoc.xpath("//clause[@bibitem = 'true']").each do |c|
9
+ bib = dl_bib_extract(c) or next
10
+ validate_ref_dl(bib, c)
11
+ bibitemxml = RelatonBib::BibliographicItem.from_hash(bib).to_xml or next
12
+ bibitem = Nokogiri::XML(bibitemxml)
13
+ bibitem.root["id"] = c["id"] if c["id"] && !/^_/.match(c["id"])
14
+ c.replace(bibitem.root)
15
+ end
16
+ end
17
+
18
+ # do not accept implicit id
19
+ def validate_ref_dl(bib, clause)
20
+ id = bib["id"]
21
+ id ||= clause["id"] unless /^_/.match?(clause["id"])
22
+ unless id
23
+ @log.add("Anchors", clause,
24
+ "The following reference is missing an anchor:\n"\
25
+ "#{clause.to_xml}")
26
+ return
27
+ end
28
+ @refids << id
29
+ validate_ref_dl1(bib, id, clause)
30
+ end
31
+
32
+ def validate_ref_dl1(bib, id, clause)
33
+ bib["title"] or
34
+ @log.add("Bibliography", clause, "Reference #{id} is missing a title")
35
+ bib["docid"] or
36
+ @log.add("Bibliography", clause,
37
+ "Reference #{id} is missing a document identifier (docid)")
38
+ end
39
+
40
+ def extract_from_p(tag, bib, key)
41
+ return unless bib[tag]
42
+
43
+ "<#{key}>#{bib[tag].at('p').children}</#{key}>"
44
+ end
45
+
46
+ # if the content is a single paragraph, replace it with its children
47
+ # single links replaced with uri
48
+ def p_unwrap(para)
49
+ elems = para.elements
50
+ if elems.size == 1 && elems[0].name == "p"
51
+ link_unwrap(elems[0]).children.to_xml.strip
52
+ else
53
+ para.to_xml.strip
54
+ end
55
+ end
56
+
57
+ def link_unwrap(para)
58
+ elems = para.elements
59
+ if elems.size == 1 && elems[0].name == "link"
60
+ para.at("./link").replace(elems[0]["target"].strip)
61
+ end
62
+ para
63
+ end
64
+
65
+ def dd_bib_extract(dtd)
66
+ return nil if dtd.children.empty?
67
+
68
+ dtd.at("./dl") and return dl_bib_extract(dtd)
69
+ elems = dtd.remove.elements
70
+ return p_unwrap(dtd) unless elems.size == 1 &&
71
+ %w(ol ul).include?(elems[0].name)
72
+
73
+ elems[0].xpath("./li").each_with_object([]) do |li, ret|
74
+ ret << p_unwrap(li)
75
+ end
76
+ end
77
+
78
+ def add_to_hash(bib, key, val)
79
+ Metanorma::Utils::set_nested_value(bib, key.split("."), val)
80
+ end
81
+
82
+ # definition list, with at most one level of unordered lists
83
+ def dl_bib_extract(clause, nested = false)
84
+ dl = clause.at("./dl") or return
85
+ key = ""
86
+ bib = dl.xpath("./dt | ./dd").each_with_object({}) do |dtd, m|
87
+ (dtd.name == "dt" and key = dtd.text.sub(/:+$/, "")) or
88
+ add_to_hash(m, key, dd_bib_extract(dtd))
89
+ end
90
+ clause.xpath("./clause").each do |c1|
91
+ key = c1&.at("./title")&.text&.downcase&.strip
92
+ next unless %w(contributor relation series).include? key
93
+
94
+ add_to_hash(bib, key, dl_bib_extract(c1, true))
95
+ end
96
+ dl_bib_extract_title(bib, clause, nested)
97
+ end
98
+
99
+ def dl_bib_extract_title(bib, clause, nested)
100
+ (!nested && clause.at("./title")) or return bib
101
+ title = clause.at("./title").remove.children.to_xml
102
+ bib["title"] = [bib["title"]] if bib["title"].is_a?(Hash) ||
103
+ bib["title"].is_a?(String)
104
+ bib["title"] ||= []
105
+ bib["title"] << title if !title.empty?
106
+ bib
107
+ end
108
+
109
+ # ---
110
+
111
+ def formattedref_spans(xmldoc)
112
+ xmldoc.xpath("//bibitem[formattedref//span]").each do |b|
113
+ spans_to_bibitem(b, spans_preprocess(extract_content(b)))
114
+ end
115
+ end
116
+
117
+ def extract_content(bib)
118
+ extract_docid(bib) + extract_spans(bib)
119
+ end
120
+
121
+ def extract_spans(bib)
122
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
123
+ keys = s["class"].split(".", 2)
124
+ m << { key: keys[0], type: keys[1],
125
+ val: s.children.to_xml }
126
+ (s["class"] == "type" and s.remove) or s.replace(s.children)
127
+ end
128
+ end
129
+
130
+ def extract_docid(bib)
131
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
132
+ m << { key: "docid", type: d["type"], val: d.text }
133
+ d.remove
134
+ end
135
+ end
136
+
137
+ def spans_preprocess(spans)
138
+ ret = { contributor: [], docid: [], uri: [], date: [] }
139
+ spans.each do |s|
140
+ case s[:key]
141
+ when "uri", "docid"
142
+ ret[s[:key].to_sym] << { type: s[:type], val: s[:val] }
143
+ when "pubyear" then ret[:date] << { type: "published", val: s[:val] }
144
+ when "pubplace", "title", "type" then ret[s[:key].to_sym] = s[:val]
145
+ when "publisher"
146
+ ret[:contributor] << { role: "publisher", entity: "organization",
147
+ name: s[:val] }
148
+ when "surname", "initials", "givenname", "formatted-initials"
149
+ ret[:contributor] = spans_preprocess_contrib(s, ret[:contributor])
150
+ end
151
+ end
152
+ ret
153
+ end
154
+
155
+ def spans_preprocess_contrib(span, contrib)
156
+ span[:key] = "formatted-initials" if span[:key] == "initials"
157
+
158
+ spans_preprocess_new_contrib?(span, contrib) and
159
+ contrib << { role: span[:type] || "author", entity: "person" }
160
+ contrib[-1][span[:key].to_sym] = span[:val]
161
+ contrib
162
+ end
163
+
164
+ def spans_preprocess_new_contrib?(span, contrib)
165
+ contrib.empty? ||
166
+ (if span[:key] == "surname" then contrib[-1][:surname]
167
+ else (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])
168
+ end) ||
169
+ contrib[-1][:role] != (span[:type] || "author")
170
+ end
171
+
172
+ def spans_to_bibitem(bib, spans)
173
+ ret = ""
174
+ spans[:title] and ret += "<title>#{spans[:title]}</title>"
175
+ spans[:uri].each { |s| ret += span_to_docid(s, "uri") }
176
+ spans[:docid].each { |s| ret += span_to_docid(s, "docidentifier") }
177
+ spans[:date].each { |s| ret += span_to_docid(s, "date") }
178
+ spans[:contributor].each { |s| ret += span_to_contrib(s) }
179
+ spans[:pubplace] and ret += "<place>#{spans[:place]}</place>"
180
+ spans[:type] and bib["type"] = spans[:type]
181
+ bib << ret
182
+ end
183
+
184
+ def span_to_docid(span, key)
185
+ if span[:type]
186
+ "<#{key} type='#{span[:type]}'>#{span[:val]}</#{key}>"
187
+ else
188
+ "<#{key}>#{span[:val]}</#{key}>"
189
+ end
190
+ end
191
+
192
+ def span_to_contrib(span)
193
+ e = if span[:entity] == "organization"
194
+ "<organization><name>#{span[:name]}</name></organization>"
195
+ else span_to_person(span)
196
+ end
197
+ "<contributor><role type='#{span[:role]}'/>#{e}</contributor>"
198
+ end
199
+
200
+ def span_to_person(span)
201
+ pre = (span[:"formatted-initials"] and
202
+ "<formatted-initials>"\
203
+ "#{span[:"formatted-initials"]}</formatted-initials>") ||
204
+ "<forename>#{span[:givenname]}</forename>"
205
+ "<person><name>#{pre}<surname>#{span[:surname]}</surname></name>"\
206
+ "</person>"
207
+ end
208
+ end
209
+ end
210
+ end
@@ -45,7 +45,8 @@ module Metanorma
45
45
 
46
46
  def figure_dl_cleanup1(xmldoc)
47
47
  q = "//figure/following-sibling::*[self::dl]"
48
- xmldoc.xpath(q).each do |s|
48
+ q1 = "//figure/figure/following-sibling::*[self::dl]"
49
+ (xmldoc.xpath(q) - xmldoc.xpath(q1)).each do |s|
49
50
  s["key"] == "true" and s.previous_element << s.remove
50
51
  end
51
52
  end
@@ -65,8 +66,9 @@ module Metanorma
65
66
  # examples containing only figures become subfigures of figures
66
67
  def subfigure_cleanup(xmldoc)
67
68
  xmldoc.xpath("//example[figure]").each do |e|
68
- next unless e.elements.map(&:name).reject do |m|
69
- %w(name figure index).include? m
69
+ next unless e.elements.reject do |m|
70
+ %w(name figure index note).include?(m.name) ||
71
+ (m.name == "dl" && m["key"] == "true")
70
72
  end.empty?
71
73
 
72
74
  e.name = "figure"
@@ -84,9 +86,9 @@ module Metanorma
84
86
 
85
87
  def figure_cleanup(xmldoc)
86
88
  figure_footnote_cleanup(xmldoc)
89
+ subfigure_cleanup(xmldoc)
87
90
  figure_dl_cleanup1(xmldoc)
88
91
  figure_dl_cleanup2(xmldoc)
89
- subfigure_cleanup(xmldoc)
90
92
  single_subfigure_cleanup(xmldoc)
91
93
  end
92
94
 
@@ -1,24 +1,11 @@
1
- require "nokogiri"
2
- require "pathname"
3
- require "html2doc"
4
1
  require "asciimath2unitsml"
5
- require_relative "./cleanup_block"
6
- require_relative "./cleanup_footnotes"
7
- require_relative "./cleanup_ref"
8
- require_relative "./cleanup_ref_dl"
9
- require_relative "./cleanup_boilerplate"
10
- require_relative "./cleanup_section"
11
- require_relative "./cleanup_terms"
12
- require_relative "./cleanup_inline"
13
- require_relative "./cleanup_amend"
14
- require "relaton_iev"
15
2
 
16
3
  module Metanorma
17
4
  module Standoc
18
5
  module Cleanup
19
6
  def asciimath2mathml(text)
20
7
  text = text.gsub(%r{<stem type="AsciiMath">(.+?)</stem>}m) do
21
- "<amathstem>#{HTMLEntities.new.decode($1)}</amathstem>"
8
+ "<amathstem>#{@c.decode($1)}</amathstem>"
22
9
  end
23
10
  text = Html2Doc.new({})
24
11
  .asciimath_to_mathml(text, ["<amathstem>", "</amathstem>"])
@@ -60,7 +47,7 @@ module Metanorma
60
47
  def mathml_italicise(xml)
61
48
  xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]",
62
49
  "m" => MATHML_NS).each do |i|
63
- char = HTMLEntities.new.decode(i.text)
50
+ char = @c.decode(i.text)
64
51
  i["mathvariant"] = "normal" if mi_italicise?(char)
65
52
  end
66
53
  end
@@ -153,29 +153,28 @@ module Metanorma
153
153
 
154
154
  def reference_names(xmldoc)
155
155
  xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
156
- docid = ref.at("./docidentifier[@type = 'metanorma']") ||
157
- ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
158
- ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
159
- ref.at("./docidentifier[@primary = 'true']") ||
160
- ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
161
- ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
162
- ref.at("./docidentifier[not(@type = 'DOI')]") or next
156
+ docid = select_docid(ref) or next
163
157
  reference = format_ref(docid.children.to_xml, docid["type"])
164
158
  @anchors[ref["id"]] = { xref: reference }
165
159
  end
166
160
  end
167
161
 
162
+ def select_docid(ref)
163
+ ref.at("./docidentifier[@type = 'metanorma']") ||
164
+ ref.at("./docidentifier[@primary = 'true'][@language = '#{@lang}']") ||
165
+ ref.at("./docidentifier[@primary = 'true'][not(@language)]") ||
166
+ ref.at("./docidentifier[@primary = 'true']") ||
167
+ ref.at("./docidentifier[not(@type = 'DOI')][@language = '#{@lang}']") ||
168
+ ref.at("./docidentifier[not(@type = 'DOI')][not(@language)]") ||
169
+ ref.at("./docidentifier[not(@type = 'DOI')]")
170
+ end
171
+
168
172
  def fetch_termbase(_termbase, _id)
169
173
  ""
170
174
  end
171
175
 
172
176
  def read_local_bibitem(uri)
173
- return nil if %r{^https?://}.match?(uri)
174
-
175
- file = "#{@localdir}#{uri}.rxl"
176
- File.file?(file) or file = "#{@localdir}#{uri}.xml"
177
- File.file?(file) or return nil
178
- xml = Nokogiri::XML(File.read(file, encoding: "utf-8"))
177
+ xml = read_local_bibitem_file(uri) or return nil
179
178
  ret = xml.at("//*[local-name() = 'bibdata']") or return nil
180
179
  ret = Nokogiri::XML(ret.to_xml
181
180
  .sub(%r{(<bibdata[^>]*?) xmlns=("[^"]+"|'[^']+')}, "\\1")).root
@@ -186,6 +185,15 @@ module Metanorma
186
185
  ret
187
186
  end
188
187
 
188
+ def read_local_bibitem_file(uri)
189
+ return nil if %r{^https?://}.match?(uri)
190
+
191
+ file = "#{@localdir}#{uri}.rxl"
192
+ File.file?(file) or file = "#{@localdir}#{uri}.xml"
193
+ File.file?(file) or return nil
194
+ Nokogiri::XML(File.read(file, encoding: "utf-8"))
195
+ end
196
+
189
197
  # if citation uri points to local file, get bibitem from it
190
198
  def fetch_local_bibitem(xmldoc)
191
199
  xmldoc.xpath("//bibitem[formattedref][uri[@type = 'citation']]")
@@ -206,6 +214,7 @@ module Metanorma
206
214
  def bibitem_cleanup(xmldoc)
207
215
  bibitem_nested_id(xmldoc)
208
216
  ref_dl_cleanup(xmldoc)
217
+ formattedref_spans(xmldoc)
209
218
  fetch_local_bibitem(xmldoc)
210
219
  end
211
220
  end
@@ -2,109 +2,9 @@ module Metanorma
2
2
  module Standoc
3
3
  module Cleanup
4
4
  def requirement_cleanup(xmldoc)
5
- requirement_metadata(xmldoc)
6
- requirement_inherit(xmldoc)
7
- requirement_descriptions(xmldoc)
8
- end
9
-
10
- REQRECPER = "//requirement | //recommendation | //permission".freeze
11
-
12
- def requirement_inherit(xmldoc)
13
- xmldoc.xpath(REQRECPER).each do |r|
14
- ins = requirement_inherit_insert(r)
15
- r.xpath("./*//inherit").each { |i| ins.previous = i }
16
- end
17
- end
18
-
19
- def requirement_inherit_insert(reqt)
20
- ins = reqt.at("./classification") || reqt.at(
21
- "./description | ./measurementtarget | ./specification | "\
22
- "./verification | ./import | ./description | ./component | "\
23
- "./requirement | ./recommendation | ./permission",
24
- ) and return ins
25
- requirement_inherit_insert1(reqt)
26
- end
27
-
28
- def requirement_inherit_insert1(reqt)
29
- if t = reqt.at("./title")
30
- t.next = " "
31
- t.next
32
- else
33
- if reqt.children.empty? then reqt.add_child(" ")
34
- else reqt.children.first.previous = " "
35
- end
36
- reqt.children.first
37
- end
38
- end
39
-
40
- def requirement_descriptions(xmldoc)
41
- xmldoc.xpath(REQRECPER).each do |r|
42
- r.xpath(".//p[not(./*)][normalize-space(.)='']").each(&:remove)
43
- r.children.each do |e|
44
- requirement_description_wrap(r, e)
45
- end
46
- requirement_description_cleanup1(r)
47
- end
48
- end
49
-
50
- def requirement_description_wrap(reqt, text)
51
- return if (text.element? && (reqt_subpart(text.name) ||
52
- %w(requirement recommendation
53
- permission).include?(text.name))) ||
54
- (text.text.strip.empty? && !text.at(".//xref | .//eref | .//link"))
55
-
56
- t = Nokogiri::XML::Element.new("description", reqt.document)
57
- text.before(t)
58
- t.children = text.remove
59
- end
60
-
61
- def requirement_description_cleanup1(reqt)
62
- while d = reqt.at("./description[following-sibling::*[1]"\
63
- "[self::description]]")
64
- n = d.next.remove
65
- d << n.children
66
- end
67
- reqt.xpath("./description[normalize-space(.)='']").each do |r|
68
- r.replace("\n")
69
- end
70
- end
71
-
72
- def requirement_metadata(xmldoc)
73
- xmldoc.xpath(REQRECPER).each do |r|
74
- dl = r&.at("./dl[@metadata = 'true']")&.remove or next
75
- requirement_metadata1(r, dl, r.at("./title"))
76
- end
77
- end
78
-
79
- def requirement_metadata1_tags
80
- %w(label subject inherit)
81
- end
82
-
83
- def requirement_metadata1(reqt, dlist, ins)
84
- unless ins
85
- reqt.children.first.previous = " "
86
- ins = reqt.children.first
87
- end
88
- %w(obligation model type).each do |a|
89
- dl_to_attrs(reqt, dlist, a)
90
- end
91
- requirement_metadata1_tags.each do |a|
92
- ins = dl_to_elems(ins, reqt, dlist, a)
93
- end
94
- reqt_dl_to_classif(ins, reqt, dlist)
95
- end
96
-
97
- def reqt_dl_to_classif(ins, reqt, dlist)
98
- if a = reqt.at("./classification[last()]") then ins = a end
99
- dlist.xpath("./dt[text()='classification']").each do |e|
100
- val = e.at("./following::dd/p") || e.at("./following::dd")
101
- req_classif_parse(val.text).each do |r|
102
- ins.next = "<classification><tag>#{r[0]}</tag>"\
103
- "<value>#{r[1]}</value></classification>"
104
- ins = ins.next
105
- end
106
- end
107
- ins
5
+ @reqt_models ||=
6
+ Metanorma::Requirements.new({ default: @default_requirement_model })
7
+ @reqt_models.requirement_cleanup(xmldoc)
108
8
  end
109
9
  end
110
10
  end
@@ -11,7 +11,7 @@ module Metanorma
11
11
  n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
12
12
  end
13
13
  ret = Nokogiri::XML(key.to_xml)
14
- HTMLEntities.new.decode(ret.text.downcase)
14
+ @c.decode(ret.text.downcase)
15
15
  .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
16
16
  .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
17
17
  .gsub(/[0-9]+/, "þ\\0")
@@ -7,7 +7,7 @@ module Metanorma
7
7
  text = text.gsub(/\s+<fn /, "<fn ")
8
8
  %w(passthrough passthrough-inline).each do |v|
9
9
  text.gsub!(%r{<#{v}\s+formats="metanorma">([^<]*)
10
- </#{v}>}mx) { HTMLEntities.new.decode($1) }
10
+ </#{v}>}mx) { @c.decode($1) }
11
11
  end
12
12
  text
13
13
  end
@@ -34,11 +34,13 @@ module Metanorma
34
34
  end
35
35
  end
36
36
 
37
+ IGNORE_QUOTES_ELEMENTS =
38
+ %w(pre tt sourcecode stem figure bibdata passthrough identifier).freeze
39
+
37
40
  def uninterrupt_quotes_around_xml_skip(elem)
38
41
  !(/\A['"]/.match?(elem.text) &&
39
- elem.previous.ancestors("pre, tt, sourcecode, stem, figure, bibdata,
40
- passthrough, identifer")
41
- .empty? &&
42
+ elem.previous.path.split(%r{/})[1..-2]
43
+ .intersection(IGNORE_QUOTES_ELEMENTS).empty? &&
42
44
  ((elem.previous.text.strip.empty? &&
43
45
  !empty_tag_with_text_content?(elem.previous)) ||
44
46
  elem.previous.name == "index"))
@@ -49,7 +51,7 @@ module Metanorma
49
51
  /\S\Z/.match?(prev.text) or return
50
52
  foll = elem.at(".//following::text()[1]")
51
53
  m = /\A(["'][[:punct:]]*)(\s|\Z)/
52
- .match(HTMLEntities.new.decode(foll&.text)) or return
54
+ .match(@c.decode(foll&.text)) or return
53
55
  foll.content = foll.text.sub(/\A(["'][[:punct:]]*)/, "")
54
56
  prev.content = "#{prev.text}#{m[1]}"
55
57
  end
@@ -74,10 +76,10 @@ module Metanorma
74
76
  empty_tag_with_text_content?(x) and prev = "dummy"
75
77
  next unless x.text?
76
78
 
77
- x.ancestors("pre, tt, sourcecode, stem, figure, bibdata, passthrough,
78
- identifier").empty? and
79
+ ancestors = x.path.split(%r{/})[1..-2]
80
+ ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? and
79
81
  dumb2smart_quotes1(x, prev)
80
- prev = x.text if x.ancestors("index").empty?
82
+ prev = x.text unless ancestors.include?("index")
81
83
  end
82
84
  end
83
85
 
@@ -68,10 +68,9 @@ module Metanorma
68
68
  end
69
69
 
70
70
  def xref_to_eref(elem)
71
- c = HTMLEntities.new
72
71
  elem["bibitemid"] = elem["target"]
73
72
  if ref = @anchors&.dig(elem["target"], :xref)
74
- elem["citeas"] = c.decode(ref)
73
+ elem["citeas"] = @c.decode(ref)
75
74
  else
76
75
  elem["citeas"] = ""
77
76
  xref_to_eref1(elem)