metanorma-standoc 2.3.9 → 2.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ require_relative "spans_to_bibitem_preprocessing"
2
+
1
3
  module Metanorma
2
4
  module Standoc
3
5
  module Cleanup
@@ -23,146 +25,6 @@ module Metanorma
23
25
  ret
24
26
  end
25
27
 
26
- def extract_spans(bib)
27
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
28
- s.at("./ancestor::span") and next
29
- extract_spans1(s, m)
30
- end
31
- end
32
-
33
- def extract_spans1(span, acc)
34
- keys = span["class"].split(".", 2)
35
- acc << { key: keys[0], type: keys[1],
36
- val: span.children.to_xml }
37
- (span["class"] == "type" and span.remove) or
38
- span.replace(span.children)
39
- end
40
-
41
- def extract_docid(bib)
42
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
43
- m << { key: "docid", type: d["type"], val: d.text }
44
- d.remove unless bib.at("./title")
45
- end
46
- end
47
-
48
- def empty_span_hash
49
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
50
- end
51
-
52
- def spans_preprocess(spans)
53
- ret = empty_span_hash
54
- spans.each { |s| span_preprocess1(s, ret) }
55
- host_rearrange(ret)
56
- end
57
-
58
- def span_preprocess1(span, ret)
59
- case span[:key]
60
- when "uri", "docid"
61
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
62
- ret[span[:key].to_sym] << { type: span[:type], val: val }
63
- when "date"
64
- ret[span[:key].to_sym] << { type: span[:type] || "published",
65
- val: span[:val] }
66
- when "pages", "volume", "issue"
67
- ret[:extent][span[:key].to_sym] ||= []
68
- ret[:extent][span[:key].to_sym] << span[:val]
69
- when "pubplace", "title", "type", "series"
70
- ret[span[:key].to_sym] = span[:val]
71
- when "in_title"
72
- ret[:in][:title] = span[:val]
73
- when "publisher"
74
- ret[:contrib] << { role: "publisher", entity: "organization",
75
- name: span[:val] }
76
- when "surname", "initials", "givenname", "formatted-initials"
77
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
78
- when "fullname"
79
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
80
- when "organization"
81
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
82
- when "in_surname", "in_initials", "in_givenname",
83
- "in_formatted-initials"
84
- ret[:in][:contrib] ||= []
85
- span[:key].sub!(/^in_/, "")
86
- ret[:in][:contrib] =
87
- spans_preprocess_contrib(span, ret[:in][:contrib])
88
- when "in_fullname"
89
- ret[:in][:contrib] ||= []
90
- span[:key].sub!(/^in_/, "")
91
- ret[:in][:contrib] =
92
- spans_preprocess_fullname(span, ret[:in][:contrib])
93
- when "in_organization"
94
- ret[:in][:contrib] ||= []
95
- span[:key].sub!(/^in_/, "")
96
- ret[:in][:contrib] =
97
- spans_preprocess_org(span, ret[:in][:contrib])
98
- else
99
- msg = "unrecognised key '#{span[:key]}' in " \
100
- "`span:#{span[:key]}[#{span[:val]}]`"
101
- @err << { msg: msg }
102
- end
103
- end
104
-
105
- def host_rearrange(ret)
106
- ret[:in][:title] or return ret
107
- ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
108
- old
109
- end
110
- %i(series).each do |k|
111
- ret[:in][k] = ret[k]
112
- ret.delete(k)
113
- end
114
- /^in/.match?(ret[:type]) and ret[:in][:type] =
115
- ret[:type].sub(/^in/, "")
116
- ret
117
- end
118
-
119
- def spans_preprocess_contrib(span, contrib)
120
- span[:key] == "initials" and span[:key] = "formatted-initials"
121
- spans_preprocess_new_contrib?(span, contrib) and
122
- contrib << { role: span[:type] || "author", entity: "person" }
123
- if multiple_givennames?(span, contrib)
124
- contrib[-1][:givenname] = [contrib[-1][:givenname],
125
- span[:val]].flatten
126
- else contrib[-1][span[:key].to_sym] = span[:val]
127
- end
128
- contrib
129
- end
130
-
131
- def spans_preprocess_new_contrib?(span, contrib)
132
- contrib.empty? ||
133
- (span[:key] == "surname" && contrib[-1][:surname]) ||
134
- contrib[-1][:role] != (span[:type] || "author")
135
- end
136
-
137
- def multiple_givennames?(span, contrib)
138
- (%w(formatted-initials givenname).include?(span[:key]) &&
139
- (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
140
- return false
141
- if contrib[-1][:"formatted-initials"]
142
- contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
143
- contrib[-1].delete(:"formatted-initials")
144
- end
145
- true
146
- end
147
-
148
- def spans_preprocess_fullname(span, contrib)
149
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
150
- out = { role: span[:type] || "author", entity: "person",
151
- surname: name[-1] }
152
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
153
- out[:"formatted-initials"] = name[0..-2].join(" ")
154
- else out[:givenname] = name[0..-2]
155
- end
156
- contrib << out
157
- contrib
158
- end
159
-
160
- def spans_preprocess_org(span, contrib)
161
- contrib << { role: span[:type] || "author", entity: "organization",
162
- name: span[:val] }
163
- contrib
164
- end
165
-
166
28
  def convert
167
29
  ret = spans_to_bibitem(@spans)
168
30
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
@@ -174,17 +36,20 @@ module Metanorma
174
36
  ret = ""
175
37
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
176
38
  ret += spans_to_bibitem_docid(spans)
177
- spans[:contrib].each do |s|
178
- ret += span_to_contrib(s, spans[:title])
179
- end
180
- spans[:series] and
181
- ret += "<series><title>#{spans[:series]}</title></series>"
39
+ ret += spans_to_contribs(spans)
40
+ ret += spans_to_bibitem_edn(spans)
41
+ ret += spans_to_series(spans)
182
42
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
183
43
  ret += spans_to_bibitem_host(spans)
184
44
  ret += spans_to_bibitem_extent(spans[:extent])
185
45
  ret
186
46
  end
187
47
 
48
+ def spans_to_series(spans)
49
+ spans[:series] or return ""
50
+ "<series><title>#{spans[:series]}</title></series>"
51
+ end
52
+
188
53
  def spans_to_bibitem_host(spans)
189
54
  spans[:in].empty? and return ""
190
55
  ret =
@@ -201,6 +66,14 @@ module Metanorma
201
66
  ret
202
67
  end
203
68
 
69
+ def spans_to_bibitem_edn(spans)
70
+ ret = ""
71
+ spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
72
+ spans[:version] and ret += "<version>#{spans[:version]}</version>"
73
+ spans[:note] and ret += "<note>#{spans[:note]}</note>"
74
+ ret
75
+ end
76
+
204
77
  def spans_to_bibitem_extent(spans)
205
78
  ret = ""
206
79
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
@@ -236,6 +109,14 @@ module Metanorma
236
109
  "<date#{type}>#{val}</date>"
237
110
  end
238
111
 
112
+ def spans_to_contribs(spans)
113
+ ret = ""
114
+ spans[:contrib].each do |s|
115
+ ret += span_to_contrib(s, spans[:title])
116
+ end
117
+ ret
118
+ end
119
+
239
120
  def span_to_contrib(span, title)
240
121
  e = if span[:entity] == "organization"
241
122
  "<organization><name>#{span[:name]}</name></organization>"
@@ -0,0 +1,148 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ class SpansToBibitem
5
+ def extract_spans(bib)
6
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
7
+ s.at("./ancestor::span") and next
8
+ extract_spans1(s, m)
9
+ end
10
+ end
11
+
12
+ def extract_spans1(span, acc)
13
+ keys = span["class"].split(".", 2)
14
+ acc << { key: keys[0], type: keys[1],
15
+ val: span.children.to_xml }
16
+ (span["class"] == "type" and span.remove) or
17
+ span.replace(span.children)
18
+ end
19
+
20
+ def extract_docid(bib)
21
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
22
+ m << { key: "docid", type: d["type"], val: d.text }
23
+ d.remove unless bib.at("./title")
24
+ end
25
+ end
26
+
27
+ def empty_span_hash
28
+ { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
29
+ end
30
+
31
+ def spans_preprocess(spans)
32
+ ret = empty_span_hash
33
+ spans.each { |s| span_preprocess1(s, ret) }
34
+ host_rearrange(ret)
35
+ end
36
+
37
+ def span_preprocess1(span, ret)
38
+ case span[:key]
39
+ when "uri", "docid"
40
+ val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
41
+ ret[span[:key].to_sym] << { type: span[:type], val: val }
42
+ when "date"
43
+ ret[span[:key].to_sym] << { type: span[:type] || "published",
44
+ val: span[:val] }
45
+ when "pages", "volume", "issue"
46
+ ret[:extent][span[:key].to_sym] ||= []
47
+ ret[:extent][span[:key].to_sym] << span[:val]
48
+ when "pubplace", "title", "type", "series", "edition", "version",
49
+ "note"
50
+ ret[span[:key].to_sym] = span[:val]
51
+ when "in_title"
52
+ ret[:in][:title] = span[:val]
53
+ when "publisher"
54
+ ret[:contrib] << { role: "publisher", entity: "organization",
55
+ name: span[:val] }
56
+ when "surname", "initials", "givenname", "formatted-initials"
57
+ ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
58
+ when "fullname"
59
+ ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
60
+ when "organization"
61
+ ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
62
+ when "in_surname", "in_initials", "in_givenname",
63
+ "in_formatted-initials"
64
+ ret[:in][:contrib] ||= []
65
+ span[:key].sub!(/^in_/, "")
66
+ ret[:in][:contrib] =
67
+ spans_preprocess_contrib(span, ret[:in][:contrib])
68
+ when "in_fullname"
69
+ ret[:in][:contrib] ||= []
70
+ span[:key].sub!(/^in_/, "")
71
+ ret[:in][:contrib] =
72
+ spans_preprocess_fullname(span, ret[:in][:contrib])
73
+ when "in_organization"
74
+ ret[:in][:contrib] ||= []
75
+ span[:key].sub!(/^in_/, "")
76
+ ret[:in][:contrib] =
77
+ spans_preprocess_org(span, ret[:in][:contrib])
78
+ else
79
+ msg = "unrecognised key '#{span[:key]}' in " \
80
+ "`span:#{span[:key]}[#{span[:val]}]`"
81
+ @err << { msg: msg }
82
+ end
83
+ end
84
+
85
+ def host_rearrange(ret)
86
+ ret[:in][:title] or return ret
87
+ ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
88
+ old
89
+ end
90
+ %i(series).each do |k|
91
+ ret[:in][k] = ret[k]
92
+ ret.delete(k)
93
+ end
94
+ /^in/.match?(ret[:type]) and ret[:in][:type] =
95
+ ret[:type].sub(/^in/, "")
96
+ ret
97
+ end
98
+
99
+ def spans_preprocess_contrib(span, contrib)
100
+ span[:key] == "initials" and span[:key] = "formatted-initials"
101
+ spans_preprocess_new_contrib?(span, contrib) and
102
+ contrib << { role: span[:type] || "author", entity: "person" }
103
+ if multiple_givennames?(span, contrib)
104
+ contrib[-1][:givenname] = [contrib[-1][:givenname],
105
+ span[:val]].flatten
106
+ else contrib[-1][span[:key].to_sym] = span[:val]
107
+ end
108
+ contrib
109
+ end
110
+
111
+ def spans_preprocess_new_contrib?(span, contrib)
112
+ contrib.empty? ||
113
+ (span[:key] == "surname" && contrib[-1][:surname]) ||
114
+ contrib[-1][:role] != (span[:type] || "author")
115
+ end
116
+
117
+ def multiple_givennames?(span, contrib)
118
+ (%w(formatted-initials givenname).include?(span[:key]) &&
119
+ (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
120
+ return false
121
+ if contrib[-1][:"formatted-initials"]
122
+ contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
123
+ contrib[-1].delete(:"formatted-initials")
124
+ end
125
+ true
126
+ end
127
+
128
+ def spans_preprocess_fullname(span, contrib)
129
+ name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
130
+ out = { role: span[:type] || "author", entity: "person",
131
+ surname: name[-1] }
132
+ if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
133
+ out[:"formatted-initials"] = name[0..-2].join(" ")
134
+ else out[:givenname] = name[0..-2]
135
+ end
136
+ contrib << out
137
+ contrib
138
+ end
139
+
140
+ def spans_preprocess_org(span, contrib)
141
+ contrib << { role: span[:type] || "author", entity: "organization",
142
+ name: span[:val] }
143
+ contrib
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -18,7 +18,7 @@ module Metanorma
18
18
  end
19
19
 
20
20
  def noko(&block)
21
- Metanorma::Utils::noko(&block)
21
+ Metanorma::Utils::noko(@script, &block)
22
22
  end
23
23
 
24
24
  def attr_code(attributes)
@@ -68,6 +68,14 @@ module Metanorma
68
68
  para
69
69
  end
70
70
 
71
+ def xml_encode(text)
72
+ @c.encode(text, :basic, :hexadecimal)
73
+ .gsub(/&amp;gt;/, ">").gsub(/&amp;lt;/, "<").gsub(/&amp;amp;/, "&")
74
+ .gsub(/&gt;/, ">").gsub(/&lt;/, "<").gsub(/&amp;/, "&")
75
+ .gsub(/&quot;/, '"').gsub(/&#xa;/, "\n").gsub(/&amp;#/, "&#")
76
+ .gsub(/&apos;/, "'")
77
+ end
78
+
71
79
  class EmptyAttr
72
80
  def attr(_any_attribute)
73
81
  nil
@@ -53,18 +53,46 @@ module Metanorma
53
53
  table_validate(doc)
54
54
  @fatalerror += requirement_validate(doc)
55
55
  image_validate(doc)
56
+ math_validate(doc)
56
57
  @fatalerror.empty? or
57
58
  clean_abort(@fatalerror.join("\n"), doc)
58
59
  end
59
60
 
61
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
62
+
63
+ def math_validate(doc)
64
+ doc.xpath("//m:math", "m" => MATHML_NS).each do |m|
65
+ math = mathml_sanitise(m.dup)
66
+ Plurimath::Math.parse(math, "mathml").to_mathml
67
+ rescue StandardError => e
68
+ math_validate_error(math, m, e)
69
+ end
70
+ end
71
+
72
+ def mathml_sanitise(math)
73
+ math.to_xml(encoding: "US-ASCII").gsub(/ xmlns=["'][^"']+["']/, "")
74
+ .gsub(%r{<[^:/>]+:}, "<").gsub(%r{</[^:/>]+:}, "</")
75
+ #.gsub(/&#([^;]+);/) { |x| "&#x#{$1.to_i.to_s(16)};" }
76
+ end
77
+
78
+ def math_validate_error(math, elem, error)
79
+ a = elem.parent.at("./asciimath")
80
+ l = elem.parent.at("./latexmath")
81
+ orig = ""
82
+ a and orig += "\n\tAsciimath original: #{@c.decode(a.children.to_xml)}"
83
+ l and orig += "\n\tLatexmath original: #{@c.decode(l.children.to_xml)}"
84
+ @log.add("Mathematics", elem,
85
+ "Invalid MathML: #{math}\n #{error}#{orig}")
86
+ @fatalerror << "Invalid MathML: #{math}"
87
+ end
88
+
60
89
  def nested_asset_validate(doc)
61
90
  nested_asset_validate_basic(doc)
62
91
  nested_note_validate(doc)
63
92
  end
64
93
 
65
94
  def nested_asset_validate_basic(doc)
66
- a = "//formula | //example | //figure | //termnote | //termexample | " \
67
- "//table"
95
+ a = "//example | //figure | //termnote | //termexample | //table"
68
96
  doc.xpath("#{a} | //note").each do |m|
69
97
  m.xpath(a.gsub(%r{//}, ".//")).each do |n|
70
98
  nested_asset_report(m, n, doc)
@@ -82,7 +110,6 @@ module Metanorma
82
110
 
83
111
  def nested_asset_report(outer, inner, doc)
84
112
  outer.name == "figure" && inner.name == "figure" and return
85
- outer.name != "formula" && inner.name == "formula" and return
86
113
  err =
87
114
  "There is an instance of #{inner.name} nested within #{outer.name}"
88
115
  @log.add("Syntax", inner, err)
@@ -19,6 +19,6 @@ module Metanorma
19
19
  end
20
20
 
21
21
  module Standoc
22
- VERSION = "2.3.9".freeze
22
+ VERSION = "2.4.1".freeze
23
23
  end
24
24
  end
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "asciidoctor", "~> 2.0.0"
32
32
  spec.add_dependency "iev", "~> 0.3.0"
33
- spec.add_dependency "isodoc", "~> 2.4.0"
33
+ spec.add_dependency "isodoc", "~> 2.5.0"
34
34
  spec.add_dependency "metanorma"
35
35
  spec.add_dependency "metanorma-plugin-datastruct", "~> 0.2.0"
36
36
  spec.add_dependency "metanorma-plugin-lutaml"
@@ -38,8 +38,7 @@ Gem::Specification.new do |spec|
38
38
  # relaton-cli not just relaton, to avoid circular reference in metanorma
39
39
  spec.add_dependency "asciimath2unitsml", "~> 0.4.0"
40
40
  spec.add_dependency "concurrent-ruby"
41
- spec.add_dependency "latexmath"
42
- spec.add_dependency "mathml2asciimath"
41
+ spec.add_dependency "plurimath"
43
42
  spec.add_dependency "pngcheck"
44
43
  spec.add_dependency "relaton-cli", "~> 1.15.0"
45
44
  spec.add_dependency "relaton-iev", "~> 1.1.0"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-standoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.9
4
+ version: 2.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-27 00:00:00.000000000 Z
11
+ date: 2023-03-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.4.0
47
+ version: 2.5.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.4.0
54
+ version: 2.5.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metanorma
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -137,21 +137,7 @@ dependencies:
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  - !ruby/object:Gem::Dependency
140
- name: latexmath
141
- requirement: !ruby/object:Gem::Requirement
142
- requirements:
143
- - - ">="
144
- - !ruby/object:Gem::Version
145
- version: '0'
146
- type: :runtime
147
- prerelease: false
148
- version_requirements: !ruby/object:Gem::Requirement
149
- requirements:
150
- - - ">="
151
- - !ruby/object:Gem::Version
152
- version: '0'
153
- - !ruby/object:Gem::Dependency
154
- name: mathml2asciimath
140
+ name: plurimath
155
141
  requirement: !ruby/object:Gem::Requirement
156
142
  requirements:
157
143
  - - ">="
@@ -520,6 +506,7 @@ files:
520
506
  - lib/metanorma/standoc/reqt.rng
521
507
  - lib/metanorma/standoc/section.rb
522
508
  - lib/metanorma/standoc/spans_to_bibitem.rb
509
+ - lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb
523
510
  - lib/metanorma/standoc/table.rb
524
511
  - lib/metanorma/standoc/term_lookup_cleanup.rb
525
512
  - lib/metanorma/standoc/terms.rb