metanorma-standoc 2.3.8 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,5 @@
1
+ require_relative "spans_to_bibitem_preprocessing"
2
+
1
3
  module Metanorma
2
4
  module Standoc
3
5
  module Cleanup
@@ -9,153 +11,20 @@ module Metanorma
9
11
  def initialize(bib)
10
12
  @bib = bib
11
13
  @err = []
12
- @spans = spans_preprocess(extract_content(bib))
13
- end
14
-
15
- def extract_content(bib)
16
- extract_docid(bib) + extract_spans(bib)
17
- end
18
-
19
- def extract_spans(bib)
20
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
21
- s.at("./ancestor::span") and next
22
- extract_spans1(s, m)
23
- end
24
- end
25
-
26
- def extract_spans1(span, acc)
27
- keys = span["class"].split(".", 2)
28
- acc << { key: keys[0], type: keys[1],
29
- val: span.children.to_xml }
30
- (span["class"] == "type" and span.remove) or
31
- span.replace(span.children)
32
- end
33
-
34
- def extract_docid(bib)
35
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
36
- m << { key: "docid", type: d["type"], val: d.text }
37
- d.remove unless bib.at("./title")
38
- end
39
- end
40
-
41
- def empty_span_hash
42
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
43
- end
44
-
45
- def spans_preprocess(spans)
46
- ret = empty_span_hash
47
- spans.each { |s| span_preprocess1(s, ret) }
48
- host_rearrange(ret)
49
- end
50
-
51
- def span_preprocess1(span, ret)
52
- case span[:key]
53
- when "uri", "docid"
54
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
55
- ret[span[:key].to_sym] << { type: span[:type], val: val }
56
- when "date"
57
- ret[span[:key].to_sym] << { type: span[:type] || "published",
58
- val: span[:val] }
59
- when "pages", "volume", "issue"
60
- ret[:extent][span[:key].to_sym] ||= []
61
- ret[:extent][span[:key].to_sym] << span[:val]
62
- when "pubplace", "title", "type", "series"
63
- ret[span[:key].to_sym] = span[:val]
64
- when "in_title"
65
- ret[:in][:title] = span[:val]
66
- when "publisher"
67
- ret[:contrib] << { role: "publisher", entity: "organization",
68
- name: span[:val] }
69
- when "surname", "initials", "givenname", "formatted-initials"
70
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
71
- when "fullname"
72
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
73
- when "organization"
74
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
75
- when "in_surname", "in_initials", "in_givenname",
76
- "in_formatted-initials"
77
- ret[:in][:contrib] ||= []
78
- span[:key].sub!(/^in_/, "")
79
- ret[:in][:contrib] =
80
- spans_preprocess_contrib(span, ret[:in][:contrib])
81
- when "in_fullname"
82
- ret[:in][:contrib] ||= []
83
- span[:key].sub!(/^in_/, "")
84
- ret[:in][:contrib] =
85
- spans_preprocess_fullname(span, ret[:in][:contrib])
86
- when "in_organization"
87
- ret[:in][:contrib] ||= []
88
- span[:key].sub!(/^in_/, "")
89
- ret[:in][:contrib] =
90
- spans_preprocess_org(span, ret[:in][:contrib])
91
- else
92
- msg = "unrecognised key '#{span[:key]}' in " \
93
- "`span:#{span[:key]}[#{span[:val]}]`"
94
- @err << { msg: msg }
95
- end
14
+ @spans = spans_preprocess(extract_spans(bib))
15
+ ids = spans_preprocess(extract_docid(bib))
16
+ @spans[:docid] = override_docids(ids[:docid], @spans[:docid])
96
17
  end
97
18
 
98
- def host_rearrange(ret)
99
- ret[:in][:title] or return ret
100
- ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
101
- old
19
+ def override_docids(old, new)
20
+ ret = new
21
+ keys = new.map { |a| a[:type] }
22
+ old.each do |e|
23
+ keys.include?(e[:type]) or ret << e
102
24
  end
103
- %i(series).each do |k|
104
- ret[:in][k] = ret[k]
105
- ret.delete(k)
106
- end
107
- /^in/.match?(ret[:type]) and ret[:in][:type] =
108
- ret[:type].sub(/^in/, "")
109
25
  ret
110
26
  end
111
27
 
112
- def spans_preprocess_contrib(span, contrib)
113
- span[:key] == "initials" and span[:key] = "formatted-initials"
114
- spans_preprocess_new_contrib?(span, contrib) and
115
- contrib << { role: span[:type] || "author", entity: "person" }
116
- if multiple_givennames?(span, contrib)
117
- contrib[-1][:givenname] = [contrib[-1][:givenname],
118
- span[:val]].flatten
119
- else contrib[-1][span[:key].to_sym] = span[:val]
120
- end
121
- contrib
122
- end
123
-
124
- def spans_preprocess_new_contrib?(span, contrib)
125
- contrib.empty? ||
126
- (span[:key] == "surname" && contrib[-1][:surname]) ||
127
- contrib[-1][:role] != (span[:type] || "author")
128
- end
129
-
130
- def multiple_givennames?(span, contrib)
131
- (%w(formatted-initials givenname).include?(span[:key]) &&
132
- (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
133
- return false
134
- if contrib[-1][:"formatted-initials"]
135
- contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
136
- contrib[-1].delete(:"formatted-initials")
137
- end
138
- true
139
- end
140
-
141
- def spans_preprocess_fullname(span, contrib)
142
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
143
- out = { role: span[:type] || "author", entity: "person",
144
- surname: name[-1] }
145
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
146
- out[:"formatted-initials"] = name[0..-2].join(" ")
147
- else out[:givenname] = name[0..-2]
148
- end
149
- contrib << out
150
- contrib
151
- end
152
-
153
- def spans_preprocess_org(span, contrib)
154
- contrib << { role: span[:type] || "author", entity: "organization",
155
- name: span[:val] }
156
- contrib
157
- end
158
-
159
28
  def convert
160
29
  ret = spans_to_bibitem(@spans)
161
30
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
@@ -167,17 +36,20 @@ module Metanorma
167
36
  ret = ""
168
37
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
169
38
  ret += spans_to_bibitem_docid(spans)
170
- spans[:contrib].each do |s|
171
- ret += span_to_contrib(s, spans[:title])
172
- end
173
- spans[:series] and
174
- ret += "<series><title>#{spans[:series]}</title></series>"
39
+ ret += spans_to_contribs(spans)
40
+ ret += spans_to_bibitem_edn(spans)
41
+ ret += spans_to_series(spans)
175
42
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
176
43
  ret += spans_to_bibitem_host(spans)
177
44
  ret += spans_to_bibitem_extent(spans[:extent])
178
45
  ret
179
46
  end
180
47
 
48
+ def spans_to_series(spans)
49
+ spans[:series] or return ""
50
+ "<series><title>#{spans[:series]}</title></series>"
51
+ end
52
+
181
53
  def spans_to_bibitem_host(spans)
182
54
  spans[:in].empty? and return ""
183
55
  ret =
@@ -194,6 +66,14 @@ module Metanorma
194
66
  ret
195
67
  end
196
68
 
69
+ def spans_to_bibitem_edn(spans)
70
+ ret = ""
71
+ spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
72
+ spans[:version] and ret += "<version>#{spans[:version]}</version>"
73
+ spans[:note] and ret += "<note>#{spans[:note]}</note>"
74
+ ret
75
+ end
76
+
197
77
  def spans_to_bibitem_extent(spans)
198
78
  ret = ""
199
79
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
@@ -229,6 +109,14 @@ module Metanorma
229
109
  "<date#{type}>#{val}</date>"
230
110
  end
231
111
 
112
+ def spans_to_contribs(spans)
113
+ ret = ""
114
+ spans[:contrib].each do |s|
115
+ ret += span_to_contrib(s, spans[:title])
116
+ end
117
+ ret
118
+ end
119
+
232
120
  def span_to_contrib(span, title)
233
121
  e = if span[:entity] == "organization"
234
122
  "<organization><name>#{span[:name]}</name></organization>"
@@ -0,0 +1,148 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ class SpansToBibitem
5
+ def extract_spans(bib)
6
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
7
+ s.at("./ancestor::span") and next
8
+ extract_spans1(s, m)
9
+ end
10
+ end
11
+
12
+ def extract_spans1(span, acc)
13
+ keys = span["class"].split(".", 2)
14
+ acc << { key: keys[0], type: keys[1],
15
+ val: span.children.to_xml }
16
+ (span["class"] == "type" and span.remove) or
17
+ span.replace(span.children)
18
+ end
19
+
20
+ def extract_docid(bib)
21
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
22
+ m << { key: "docid", type: d["type"], val: d.text }
23
+ d.remove unless bib.at("./title")
24
+ end
25
+ end
26
+
27
+ def empty_span_hash
28
+ { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
29
+ end
30
+
31
+ def spans_preprocess(spans)
32
+ ret = empty_span_hash
33
+ spans.each { |s| span_preprocess1(s, ret) }
34
+ host_rearrange(ret)
35
+ end
36
+
37
+ def span_preprocess1(span, ret)
38
+ case span[:key]
39
+ when "uri", "docid"
40
+ val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
41
+ ret[span[:key].to_sym] << { type: span[:type], val: val }
42
+ when "date"
43
+ ret[span[:key].to_sym] << { type: span[:type] || "published",
44
+ val: span[:val] }
45
+ when "pages", "volume", "issue"
46
+ ret[:extent][span[:key].to_sym] ||= []
47
+ ret[:extent][span[:key].to_sym] << span[:val]
48
+ when "pubplace", "title", "type", "series", "edition", "version",
49
+ "note"
50
+ ret[span[:key].to_sym] = span[:val]
51
+ when "in_title"
52
+ ret[:in][:title] = span[:val]
53
+ when "publisher"
54
+ ret[:contrib] << { role: "publisher", entity: "organization",
55
+ name: span[:val] }
56
+ when "surname", "initials", "givenname", "formatted-initials"
57
+ ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
58
+ when "fullname"
59
+ ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
60
+ when "organization"
61
+ ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
62
+ when "in_surname", "in_initials", "in_givenname",
63
+ "in_formatted-initials"
64
+ ret[:in][:contrib] ||= []
65
+ span[:key].sub!(/^in_/, "")
66
+ ret[:in][:contrib] =
67
+ spans_preprocess_contrib(span, ret[:in][:contrib])
68
+ when "in_fullname"
69
+ ret[:in][:contrib] ||= []
70
+ span[:key].sub!(/^in_/, "")
71
+ ret[:in][:contrib] =
72
+ spans_preprocess_fullname(span, ret[:in][:contrib])
73
+ when "in_organization"
74
+ ret[:in][:contrib] ||= []
75
+ span[:key].sub!(/^in_/, "")
76
+ ret[:in][:contrib] =
77
+ spans_preprocess_org(span, ret[:in][:contrib])
78
+ else
79
+ msg = "unrecognised key '#{span[:key]}' in " \
80
+ "`span:#{span[:key]}[#{span[:val]}]`"
81
+ @err << { msg: msg }
82
+ end
83
+ end
84
+
85
+ def host_rearrange(ret)
86
+ ret[:in][:title] or return ret
87
+ ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
88
+ old
89
+ end
90
+ %i(series).each do |k|
91
+ ret[:in][k] = ret[k]
92
+ ret.delete(k)
93
+ end
94
+ /^in/.match?(ret[:type]) and ret[:in][:type] =
95
+ ret[:type].sub(/^in/, "")
96
+ ret
97
+ end
98
+
99
+ def spans_preprocess_contrib(span, contrib)
100
+ span[:key] == "initials" and span[:key] = "formatted-initials"
101
+ spans_preprocess_new_contrib?(span, contrib) and
102
+ contrib << { role: span[:type] || "author", entity: "person" }
103
+ if multiple_givennames?(span, contrib)
104
+ contrib[-1][:givenname] = [contrib[-1][:givenname],
105
+ span[:val]].flatten
106
+ else contrib[-1][span[:key].to_sym] = span[:val]
107
+ end
108
+ contrib
109
+ end
110
+
111
+ def spans_preprocess_new_contrib?(span, contrib)
112
+ contrib.empty? ||
113
+ (span[:key] == "surname" && contrib[-1][:surname]) ||
114
+ contrib[-1][:role] != (span[:type] || "author")
115
+ end
116
+
117
+ def multiple_givennames?(span, contrib)
118
+ (%w(formatted-initials givenname).include?(span[:key]) &&
119
+ (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
120
+ return false
121
+ if contrib[-1][:"formatted-initials"]
122
+ contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
123
+ contrib[-1].delete(:"formatted-initials")
124
+ end
125
+ true
126
+ end
127
+
128
+ def spans_preprocess_fullname(span, contrib)
129
+ name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
130
+ out = { role: span[:type] || "author", entity: "person",
131
+ surname: name[-1] }
132
+ if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
133
+ out[:"formatted-initials"] = name[0..-2].join(" ")
134
+ else out[:givenname] = name[0..-2]
135
+ end
136
+ contrib << out
137
+ contrib
138
+ end
139
+
140
+ def spans_preprocess_org(span, contrib)
141
+ contrib << { role: span[:type] || "author", entity: "organization",
142
+ name: span[:val] }
143
+ contrib
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -68,6 +68,13 @@ module Metanorma
68
68
  para
69
69
  end
70
70
 
71
+ def xml_encode(text)
72
+ @c.encode(text, :basic, :hexadecimal)
73
+ .gsub(/&amp;gt;/, ">").gsub(/&amp;lt;/, "<").gsub(/&amp;amp;/, "&")
74
+ .gsub(/&gt;/, ">").gsub(/&lt;/, "<").gsub(/&amp;/, "&")
75
+ .gsub(/&quot;/, '"').gsub(/&#xa;/, "\n").gsub(/&amp;#/, "&#")
76
+ end
77
+
71
78
  class EmptyAttr
72
79
  def attr(_any_attribute)
73
80
  nil
@@ -63,8 +63,7 @@ module Metanorma
63
63
  end
64
64
 
65
65
  def nested_asset_validate_basic(doc)
66
- a = "//formula | //example | //figure | //termnote | //termexample | " \
67
- "//table"
66
+ a = "//example | //figure | //termnote | //termexample | //table"
68
67
  doc.xpath("#{a} | //note").each do |m|
69
68
  m.xpath(a.gsub(%r{//}, ".//")).each do |n|
70
69
  nested_asset_report(m, n, doc)
@@ -82,7 +81,6 @@ module Metanorma
82
81
 
83
82
  def nested_asset_report(outer, inner, doc)
84
83
  outer.name == "figure" && inner.name == "figure" and return
85
- outer.name != "formula" && inner.name == "formula" and return
86
84
  err =
87
85
  "There is an instance of #{inner.name} nested within #{outer.name}"
88
86
  @log.add("Syntax", inner, err)
@@ -97,18 +95,6 @@ module Metanorma
97
95
  @fatalerror << err2
98
96
  end
99
97
 
100
- def norm_ref_validate(doc)
101
- found = false
102
- doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
103
- docid = b.at("./docidentifier[@type = 'metanorma']") or next
104
- /^\[\d+\]$/.match?(docid.text) or next
105
- @log.add("Bibliography", b,
106
- "Numeric reference in normative references")
107
- found = true
108
- end
109
- found and @fatalerror << "Numeric reference in normative references"
110
- end
111
-
112
98
  def concept_validate(doc, tag, refterm)
113
99
  found = false
114
100
  concept_validate_ids(doc)
@@ -40,7 +40,7 @@ module Metanorma
40
40
  end
41
41
 
42
42
  def hanging_para_style(root)
43
- root.xpath("//clause | //annex | //foreword | //introduction | "\
43
+ root.xpath("//clause | //annex | //foreword | //introduction | " \
44
44
  "//acknowledgements").each do |c|
45
45
  next unless c.at("./clause")
46
46
  next if c.elements.reject do |n|
@@ -50,6 +50,18 @@ module Metanorma
50
50
  style_warning(c, "Hanging paragraph in clause")
51
51
  end
52
52
  end
53
+
54
+ def norm_ref_validate(doc)
55
+ found = false
56
+ doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
57
+ docid = b.at("./docidentifier[@type = 'metanorma']") or next
58
+ /^\[\d+\]$/.match?(docid.text) or next
59
+ @log.add("Bibliography", b,
60
+ "Numeric reference in normative references")
61
+ found = true
62
+ end
63
+ found and @fatalerror << "Numeric reference in normative references"
64
+ end
53
65
  end
54
66
  end
55
67
  end
@@ -19,6 +19,6 @@ module Metanorma
19
19
  end
20
20
 
21
21
  module Standoc
22
- VERSION = "2.3.8".freeze
22
+ VERSION = "2.4.0".freeze
23
23
  end
24
24
  end
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "asciidoctor", "~> 2.0.0"
32
32
  spec.add_dependency "iev", "~> 0.3.0"
33
- spec.add_dependency "isodoc", "~> 2.4.0"
33
+ spec.add_dependency "isodoc", "~> 2.5.0"
34
34
  spec.add_dependency "metanorma"
35
35
  spec.add_dependency "metanorma-plugin-datastruct", "~> 0.2.0"
36
36
  spec.add_dependency "metanorma-plugin-lutaml"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-standoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.8
4
+ version: 2.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-07 00:00:00.000000000 Z
11
+ date: 2023-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.4.0
47
+ version: 2.5.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.4.0
54
+ version: 2.5.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metanorma
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -520,6 +520,7 @@ files:
520
520
  - lib/metanorma/standoc/reqt.rng
521
521
  - lib/metanorma/standoc/section.rb
522
522
  - lib/metanorma/standoc/spans_to_bibitem.rb
523
+ - lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb
523
524
  - lib/metanorma/standoc/table.rb
524
525
  - lib/metanorma/standoc/term_lookup_cleanup.rb
525
526
  - lib/metanorma/standoc/terms.rb