metanorma-standoc 2.3.8 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ require_relative "spans_to_bibitem_preprocessing"
2
+
1
3
  module Metanorma
2
4
  module Standoc
3
5
  module Cleanup
@@ -9,153 +11,20 @@ module Metanorma
9
11
  def initialize(bib)
10
12
  @bib = bib
11
13
  @err = []
12
- @spans = spans_preprocess(extract_content(bib))
13
- end
14
-
15
- def extract_content(bib)
16
- extract_docid(bib) + extract_spans(bib)
17
- end
18
-
19
- def extract_spans(bib)
20
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
21
- s.at("./ancestor::span") and next
22
- extract_spans1(s, m)
23
- end
24
- end
25
-
26
- def extract_spans1(span, acc)
27
- keys = span["class"].split(".", 2)
28
- acc << { key: keys[0], type: keys[1],
29
- val: span.children.to_xml }
30
- (span["class"] == "type" and span.remove) or
31
- span.replace(span.children)
32
- end
33
-
34
- def extract_docid(bib)
35
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
36
- m << { key: "docid", type: d["type"], val: d.text }
37
- d.remove unless bib.at("./title")
38
- end
39
- end
40
-
41
- def empty_span_hash
42
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
43
- end
44
-
45
- def spans_preprocess(spans)
46
- ret = empty_span_hash
47
- spans.each { |s| span_preprocess1(s, ret) }
48
- host_rearrange(ret)
49
- end
50
-
51
- def span_preprocess1(span, ret)
52
- case span[:key]
53
- when "uri", "docid"
54
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
55
- ret[span[:key].to_sym] << { type: span[:type], val: val }
56
- when "date"
57
- ret[span[:key].to_sym] << { type: span[:type] || "published",
58
- val: span[:val] }
59
- when "pages", "volume", "issue"
60
- ret[:extent][span[:key].to_sym] ||= []
61
- ret[:extent][span[:key].to_sym] << span[:val]
62
- when "pubplace", "title", "type", "series"
63
- ret[span[:key].to_sym] = span[:val]
64
- when "in_title"
65
- ret[:in][:title] = span[:val]
66
- when "publisher"
67
- ret[:contrib] << { role: "publisher", entity: "organization",
68
- name: span[:val] }
69
- when "surname", "initials", "givenname", "formatted-initials"
70
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
71
- when "fullname"
72
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
73
- when "organization"
74
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
75
- when "in_surname", "in_initials", "in_givenname",
76
- "in_formatted-initials"
77
- ret[:in][:contrib] ||= []
78
- span[:key].sub!(/^in_/, "")
79
- ret[:in][:contrib] =
80
- spans_preprocess_contrib(span, ret[:in][:contrib])
81
- when "in_fullname"
82
- ret[:in][:contrib] ||= []
83
- span[:key].sub!(/^in_/, "")
84
- ret[:in][:contrib] =
85
- spans_preprocess_fullname(span, ret[:in][:contrib])
86
- when "in_organization"
87
- ret[:in][:contrib] ||= []
88
- span[:key].sub!(/^in_/, "")
89
- ret[:in][:contrib] =
90
- spans_preprocess_org(span, ret[:in][:contrib])
91
- else
92
- msg = "unrecognised key '#{span[:key]}' in " \
93
- "`span:#{span[:key]}[#{span[:val]}]`"
94
- @err << { msg: msg }
95
- end
14
+ @spans = spans_preprocess(extract_spans(bib))
15
+ ids = spans_preprocess(extract_docid(bib))
16
+ @spans[:docid] = override_docids(ids[:docid], @spans[:docid])
96
17
  end
97
18
 
98
- def host_rearrange(ret)
99
- ret[:in][:title] or return ret
100
- ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
101
- old
19
+ def override_docids(old, new)
20
+ ret = new
21
+ keys = new.map { |a| a[:type] }
22
+ old.each do |e|
23
+ keys.include?(e[:type]) or ret << e
102
24
  end
103
- %i(series).each do |k|
104
- ret[:in][k] = ret[k]
105
- ret.delete(k)
106
- end
107
- /^in/.match?(ret[:type]) and ret[:in][:type] =
108
- ret[:type].sub(/^in/, "")
109
25
  ret
110
26
  end
111
27
 
112
- def spans_preprocess_contrib(span, contrib)
113
- span[:key] == "initials" and span[:key] = "formatted-initials"
114
- spans_preprocess_new_contrib?(span, contrib) and
115
- contrib << { role: span[:type] || "author", entity: "person" }
116
- if multiple_givennames?(span, contrib)
117
- contrib[-1][:givenname] = [contrib[-1][:givenname],
118
- span[:val]].flatten
119
- else contrib[-1][span[:key].to_sym] = span[:val]
120
- end
121
- contrib
122
- end
123
-
124
- def spans_preprocess_new_contrib?(span, contrib)
125
- contrib.empty? ||
126
- (span[:key] == "surname" && contrib[-1][:surname]) ||
127
- contrib[-1][:role] != (span[:type] || "author")
128
- end
129
-
130
- def multiple_givennames?(span, contrib)
131
- (%w(formatted-initials givenname).include?(span[:key]) &&
132
- (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
133
- return false
134
- if contrib[-1][:"formatted-initials"]
135
- contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
136
- contrib[-1].delete(:"formatted-initials")
137
- end
138
- true
139
- end
140
-
141
- def spans_preprocess_fullname(span, contrib)
142
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
143
- out = { role: span[:type] || "author", entity: "person",
144
- surname: name[-1] }
145
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
146
- out[:"formatted-initials"] = name[0..-2].join(" ")
147
- else out[:givenname] = name[0..-2]
148
- end
149
- contrib << out
150
- contrib
151
- end
152
-
153
- def spans_preprocess_org(span, contrib)
154
- contrib << { role: span[:type] || "author", entity: "organization",
155
- name: span[:val] }
156
- contrib
157
- end
158
-
159
28
  def convert
160
29
  ret = spans_to_bibitem(@spans)
161
30
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
@@ -167,17 +36,20 @@ module Metanorma
167
36
  ret = ""
168
37
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
169
38
  ret += spans_to_bibitem_docid(spans)
170
- spans[:contrib].each do |s|
171
- ret += span_to_contrib(s, spans[:title])
172
- end
173
- spans[:series] and
174
- ret += "<series><title>#{spans[:series]}</title></series>"
39
+ ret += spans_to_contribs(spans)
40
+ ret += spans_to_bibitem_edn(spans)
41
+ ret += spans_to_series(spans)
175
42
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
176
43
  ret += spans_to_bibitem_host(spans)
177
44
  ret += spans_to_bibitem_extent(spans[:extent])
178
45
  ret
179
46
  end
180
47
 
48
+ def spans_to_series(spans)
49
+ spans[:series] or return ""
50
+ "<series><title>#{spans[:series]}</title></series>"
51
+ end
52
+
181
53
  def spans_to_bibitem_host(spans)
182
54
  spans[:in].empty? and return ""
183
55
  ret =
@@ -194,6 +66,14 @@ module Metanorma
194
66
  ret
195
67
  end
196
68
 
69
+ def spans_to_bibitem_edn(spans)
70
+ ret = ""
71
+ spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
72
+ spans[:version] and ret += "<version>#{spans[:version]}</version>"
73
+ spans[:note] and ret += "<note>#{spans[:note]}</note>"
74
+ ret
75
+ end
76
+
197
77
  def spans_to_bibitem_extent(spans)
198
78
  ret = ""
199
79
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
@@ -229,6 +109,14 @@ module Metanorma
229
109
  "<date#{type}>#{val}</date>"
230
110
  end
231
111
 
112
+ def spans_to_contribs(spans)
113
+ ret = ""
114
+ spans[:contrib].each do |s|
115
+ ret += span_to_contrib(s, spans[:title])
116
+ end
117
+ ret
118
+ end
119
+
232
120
  def span_to_contrib(span, title)
233
121
  e = if span[:entity] == "organization"
234
122
  "<organization><name>#{span[:name]}</name></organization>"
@@ -0,0 +1,148 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ class SpansToBibitem
5
+ def extract_spans(bib)
6
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
7
+ s.at("./ancestor::span") and next
8
+ extract_spans1(s, m)
9
+ end
10
+ end
11
+
12
+ def extract_spans1(span, acc)
13
+ keys = span["class"].split(".", 2)
14
+ acc << { key: keys[0], type: keys[1],
15
+ val: span.children.to_xml }
16
+ (span["class"] == "type" and span.remove) or
17
+ span.replace(span.children)
18
+ end
19
+
20
+ def extract_docid(bib)
21
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
22
+ m << { key: "docid", type: d["type"], val: d.text }
23
+ d.remove unless bib.at("./title")
24
+ end
25
+ end
26
+
27
+ def empty_span_hash
28
+ { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
29
+ end
30
+
31
+ def spans_preprocess(spans)
32
+ ret = empty_span_hash
33
+ spans.each { |s| span_preprocess1(s, ret) }
34
+ host_rearrange(ret)
35
+ end
36
+
37
+ def span_preprocess1(span, ret)
38
+ case span[:key]
39
+ when "uri", "docid"
40
+ val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
41
+ ret[span[:key].to_sym] << { type: span[:type], val: val }
42
+ when "date"
43
+ ret[span[:key].to_sym] << { type: span[:type] || "published",
44
+ val: span[:val] }
45
+ when "pages", "volume", "issue"
46
+ ret[:extent][span[:key].to_sym] ||= []
47
+ ret[:extent][span[:key].to_sym] << span[:val]
48
+ when "pubplace", "title", "type", "series", "edition", "version",
49
+ "note"
50
+ ret[span[:key].to_sym] = span[:val]
51
+ when "in_title"
52
+ ret[:in][:title] = span[:val]
53
+ when "publisher"
54
+ ret[:contrib] << { role: "publisher", entity: "organization",
55
+ name: span[:val] }
56
+ when "surname", "initials", "givenname", "formatted-initials"
57
+ ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
58
+ when "fullname"
59
+ ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
60
+ when "organization"
61
+ ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
62
+ when "in_surname", "in_initials", "in_givenname",
63
+ "in_formatted-initials"
64
+ ret[:in][:contrib] ||= []
65
+ span[:key].sub!(/^in_/, "")
66
+ ret[:in][:contrib] =
67
+ spans_preprocess_contrib(span, ret[:in][:contrib])
68
+ when "in_fullname"
69
+ ret[:in][:contrib] ||= []
70
+ span[:key].sub!(/^in_/, "")
71
+ ret[:in][:contrib] =
72
+ spans_preprocess_fullname(span, ret[:in][:contrib])
73
+ when "in_organization"
74
+ ret[:in][:contrib] ||= []
75
+ span[:key].sub!(/^in_/, "")
76
+ ret[:in][:contrib] =
77
+ spans_preprocess_org(span, ret[:in][:contrib])
78
+ else
79
+ msg = "unrecognised key '#{span[:key]}' in " \
80
+ "`span:#{span[:key]}[#{span[:val]}]`"
81
+ @err << { msg: msg }
82
+ end
83
+ end
84
+
85
+ def host_rearrange(ret)
86
+ ret[:in][:title] or return ret
87
+ ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
88
+ old
89
+ end
90
+ %i(series).each do |k|
91
+ ret[:in][k] = ret[k]
92
+ ret.delete(k)
93
+ end
94
+ /^in/.match?(ret[:type]) and ret[:in][:type] =
95
+ ret[:type].sub(/^in/, "")
96
+ ret
97
+ end
98
+
99
+ def spans_preprocess_contrib(span, contrib)
100
+ span[:key] == "initials" and span[:key] = "formatted-initials"
101
+ spans_preprocess_new_contrib?(span, contrib) and
102
+ contrib << { role: span[:type] || "author", entity: "person" }
103
+ if multiple_givennames?(span, contrib)
104
+ contrib[-1][:givenname] = [contrib[-1][:givenname],
105
+ span[:val]].flatten
106
+ else contrib[-1][span[:key].to_sym] = span[:val]
107
+ end
108
+ contrib
109
+ end
110
+
111
+ def spans_preprocess_new_contrib?(span, contrib)
112
+ contrib.empty? ||
113
+ (span[:key] == "surname" && contrib[-1][:surname]) ||
114
+ contrib[-1][:role] != (span[:type] || "author")
115
+ end
116
+
117
+ def multiple_givennames?(span, contrib)
118
+ (%w(formatted-initials givenname).include?(span[:key]) &&
119
+ (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
120
+ return false
121
+ if contrib[-1][:"formatted-initials"]
122
+ contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
123
+ contrib[-1].delete(:"formatted-initials")
124
+ end
125
+ true
126
+ end
127
+
128
+ def spans_preprocess_fullname(span, contrib)
129
+ name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
130
+ out = { role: span[:type] || "author", entity: "person",
131
+ surname: name[-1] }
132
+ if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
133
+ out[:"formatted-initials"] = name[0..-2].join(" ")
134
+ else out[:givenname] = name[0..-2]
135
+ end
136
+ contrib << out
137
+ contrib
138
+ end
139
+
140
+ def spans_preprocess_org(span, contrib)
141
+ contrib << { role: span[:type] || "author", entity: "organization",
142
+ name: span[:val] }
143
+ contrib
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -68,6 +68,13 @@ module Metanorma
68
68
  para
69
69
  end
70
70
 
71
+ def xml_encode(text)
72
+ @c.encode(text, :basic, :hexadecimal)
73
+ .gsub(/&amp;gt;/, ">").gsub(/&amp;lt;/, "<").gsub(/&amp;amp;/, "&")
74
+ .gsub(/&gt;/, ">").gsub(/&lt;/, "<").gsub(/&amp;/, "&")
75
+ .gsub(/&quot;/, '"').gsub(/&#xa;/, "\n").gsub(/&amp;#/, "&#")
76
+ end
77
+
71
78
  class EmptyAttr
72
79
  def attr(_any_attribute)
73
80
  nil
@@ -63,8 +63,7 @@ module Metanorma
63
63
  end
64
64
 
65
65
  def nested_asset_validate_basic(doc)
66
- a = "//formula | //example | //figure | //termnote | //termexample | " \
67
- "//table"
66
+ a = "//example | //figure | //termnote | //termexample | //table"
68
67
  doc.xpath("#{a} | //note").each do |m|
69
68
  m.xpath(a.gsub(%r{//}, ".//")).each do |n|
70
69
  nested_asset_report(m, n, doc)
@@ -82,7 +81,6 @@ module Metanorma
82
81
 
83
82
  def nested_asset_report(outer, inner, doc)
84
83
  outer.name == "figure" && inner.name == "figure" and return
85
- outer.name != "formula" && inner.name == "formula" and return
86
84
  err =
87
85
  "There is an instance of #{inner.name} nested within #{outer.name}"
88
86
  @log.add("Syntax", inner, err)
@@ -97,18 +95,6 @@ module Metanorma
97
95
  @fatalerror << err2
98
96
  end
99
97
 
100
- def norm_ref_validate(doc)
101
- found = false
102
- doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
103
- docid = b.at("./docidentifier[@type = 'metanorma']") or next
104
- /^\[\d+\]$/.match?(docid.text) or next
105
- @log.add("Bibliography", b,
106
- "Numeric reference in normative references")
107
- found = true
108
- end
109
- found and @fatalerror << "Numeric reference in normative references"
110
- end
111
-
112
98
  def concept_validate(doc, tag, refterm)
113
99
  found = false
114
100
  concept_validate_ids(doc)
@@ -40,7 +40,7 @@ module Metanorma
40
40
  end
41
41
 
42
42
  def hanging_para_style(root)
43
- root.xpath("//clause | //annex | //foreword | //introduction | "\
43
+ root.xpath("//clause | //annex | //foreword | //introduction | " \
44
44
  "//acknowledgements").each do |c|
45
45
  next unless c.at("./clause")
46
46
  next if c.elements.reject do |n|
@@ -50,6 +50,18 @@ module Metanorma
50
50
  style_warning(c, "Hanging paragraph in clause")
51
51
  end
52
52
  end
53
+
54
+ def norm_ref_validate(doc)
55
+ found = false
56
+ doc.xpath("//references[@normative = 'true']/bibitem").each do |b|
57
+ docid = b.at("./docidentifier[@type = 'metanorma']") or next
58
+ /^\[\d+\]$/.match?(docid.text) or next
59
+ @log.add("Bibliography", b,
60
+ "Numeric reference in normative references")
61
+ found = true
62
+ end
63
+ found and @fatalerror << "Numeric reference in normative references"
64
+ end
53
65
  end
54
66
  end
55
67
  end
@@ -19,6 +19,6 @@ module Metanorma
19
19
  end
20
20
 
21
21
  module Standoc
22
- VERSION = "2.3.8".freeze
22
+ VERSION = "2.4.0".freeze
23
23
  end
24
24
  end
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "asciidoctor", "~> 2.0.0"
32
32
  spec.add_dependency "iev", "~> 0.3.0"
33
- spec.add_dependency "isodoc", "~> 2.4.0"
33
+ spec.add_dependency "isodoc", "~> 2.5.0"
34
34
  spec.add_dependency "metanorma"
35
35
  spec.add_dependency "metanorma-plugin-datastruct", "~> 0.2.0"
36
36
  spec.add_dependency "metanorma-plugin-lutaml"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-standoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.8
4
+ version: 2.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-07 00:00:00.000000000 Z
11
+ date: 2023-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.4.0
47
+ version: 2.5.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.4.0
54
+ version: 2.5.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metanorma
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -520,6 +520,7 @@ files:
520
520
  - lib/metanorma/standoc/reqt.rng
521
521
  - lib/metanorma/standoc/section.rb
522
522
  - lib/metanorma/standoc/spans_to_bibitem.rb
523
+ - lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb
523
524
  - lib/metanorma/standoc/table.rb
524
525
  - lib/metanorma/standoc/term_lookup_cleanup.rb
525
526
  - lib/metanorma/standoc/terms.rb