metanorma-standoc 2.3.9 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ require_relative "spans_to_bibitem_preprocessing"
2
+
1
3
  module Metanorma
2
4
  module Standoc
3
5
  module Cleanup
@@ -23,146 +25,6 @@ module Metanorma
23
25
  ret
24
26
  end
25
27
 
26
- def extract_spans(bib)
27
- bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
28
- s.at("./ancestor::span") and next
29
- extract_spans1(s, m)
30
- end
31
- end
32
-
33
- def extract_spans1(span, acc)
34
- keys = span["class"].split(".", 2)
35
- acc << { key: keys[0], type: keys[1],
36
- val: span.children.to_xml }
37
- (span["class"] == "type" and span.remove) or
38
- span.replace(span.children)
39
- end
40
-
41
- def extract_docid(bib)
42
- bib.xpath("./docidentifier").each_with_object([]) do |d, m|
43
- m << { key: "docid", type: d["type"], val: d.text }
44
- d.remove unless bib.at("./title")
45
- end
46
- end
47
-
48
- def empty_span_hash
49
- { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
50
- end
51
-
52
- def spans_preprocess(spans)
53
- ret = empty_span_hash
54
- spans.each { |s| span_preprocess1(s, ret) }
55
- host_rearrange(ret)
56
- end
57
-
58
- def span_preprocess1(span, ret)
59
- case span[:key]
60
- when "uri", "docid"
61
- val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
62
- ret[span[:key].to_sym] << { type: span[:type], val: val }
63
- when "date"
64
- ret[span[:key].to_sym] << { type: span[:type] || "published",
65
- val: span[:val] }
66
- when "pages", "volume", "issue"
67
- ret[:extent][span[:key].to_sym] ||= []
68
- ret[:extent][span[:key].to_sym] << span[:val]
69
- when "pubplace", "title", "type", "series"
70
- ret[span[:key].to_sym] = span[:val]
71
- when "in_title"
72
- ret[:in][:title] = span[:val]
73
- when "publisher"
74
- ret[:contrib] << { role: "publisher", entity: "organization",
75
- name: span[:val] }
76
- when "surname", "initials", "givenname", "formatted-initials"
77
- ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
78
- when "fullname"
79
- ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
80
- when "organization"
81
- ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
82
- when "in_surname", "in_initials", "in_givenname",
83
- "in_formatted-initials"
84
- ret[:in][:contrib] ||= []
85
- span[:key].sub!(/^in_/, "")
86
- ret[:in][:contrib] =
87
- spans_preprocess_contrib(span, ret[:in][:contrib])
88
- when "in_fullname"
89
- ret[:in][:contrib] ||= []
90
- span[:key].sub!(/^in_/, "")
91
- ret[:in][:contrib] =
92
- spans_preprocess_fullname(span, ret[:in][:contrib])
93
- when "in_organization"
94
- ret[:in][:contrib] ||= []
95
- span[:key].sub!(/^in_/, "")
96
- ret[:in][:contrib] =
97
- spans_preprocess_org(span, ret[:in][:contrib])
98
- else
99
- msg = "unrecognised key '#{span[:key]}' in " \
100
- "`span:#{span[:key]}[#{span[:val]}]`"
101
- @err << { msg: msg }
102
- end
103
- end
104
-
105
- def host_rearrange(ret)
106
- ret[:in][:title] or return ret
107
- ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
108
- old
109
- end
110
- %i(series).each do |k|
111
- ret[:in][k] = ret[k]
112
- ret.delete(k)
113
- end
114
- /^in/.match?(ret[:type]) and ret[:in][:type] =
115
- ret[:type].sub(/^in/, "")
116
- ret
117
- end
118
-
119
- def spans_preprocess_contrib(span, contrib)
120
- span[:key] == "initials" and span[:key] = "formatted-initials"
121
- spans_preprocess_new_contrib?(span, contrib) and
122
- contrib << { role: span[:type] || "author", entity: "person" }
123
- if multiple_givennames?(span, contrib)
124
- contrib[-1][:givenname] = [contrib[-1][:givenname],
125
- span[:val]].flatten
126
- else contrib[-1][span[:key].to_sym] = span[:val]
127
- end
128
- contrib
129
- end
130
-
131
- def spans_preprocess_new_contrib?(span, contrib)
132
- contrib.empty? ||
133
- (span[:key] == "surname" && contrib[-1][:surname]) ||
134
- contrib[-1][:role] != (span[:type] || "author")
135
- end
136
-
137
- def multiple_givennames?(span, contrib)
138
- (%w(formatted-initials givenname).include?(span[:key]) &&
139
- (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
140
- return false
141
- if contrib[-1][:"formatted-initials"]
142
- contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
143
- contrib[-1].delete(:"formatted-initials")
144
- end
145
- true
146
- end
147
-
148
- def spans_preprocess_fullname(span, contrib)
149
- name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
150
- out = { role: span[:type] || "author", entity: "person",
151
- surname: name[-1] }
152
- if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
153
- out[:"formatted-initials"] = name[0..-2].join(" ")
154
- else out[:givenname] = name[0..-2]
155
- end
156
- contrib << out
157
- contrib
158
- end
159
-
160
- def spans_preprocess_org(span, contrib)
161
- contrib << { role: span[:type] || "author", entity: "organization",
162
- name: span[:val] }
163
- contrib
164
- end
165
-
166
28
  def convert
167
29
  ret = spans_to_bibitem(@spans)
168
30
  @out = Nokogiri::XML("<bibitem>#{ret}</bibitem>").root
@@ -174,17 +36,20 @@ module Metanorma
174
36
  ret = ""
175
37
  spans[:title] and ret += "<title>#{spans[:title]}</title>"
176
38
  ret += spans_to_bibitem_docid(spans)
177
- spans[:contrib].each do |s|
178
- ret += span_to_contrib(s, spans[:title])
179
- end
180
- spans[:series] and
181
- ret += "<series><title>#{spans[:series]}</title></series>"
39
+ ret += spans_to_contribs(spans)
40
+ ret += spans_to_bibitem_edn(spans)
41
+ ret += spans_to_series(spans)
182
42
  spans[:pubplace] and ret += "<place>#{spans[:pubplace]}</place>"
183
43
  ret += spans_to_bibitem_host(spans)
184
44
  ret += spans_to_bibitem_extent(spans[:extent])
185
45
  ret
186
46
  end
187
47
 
48
+ def spans_to_series(spans)
49
+ spans[:series] or return ""
50
+ "<series><title>#{spans[:series]}</title></series>"
51
+ end
52
+
188
53
  def spans_to_bibitem_host(spans)
189
54
  spans[:in].empty? and return ""
190
55
  ret =
@@ -201,6 +66,14 @@ module Metanorma
201
66
  ret
202
67
  end
203
68
 
69
+ def spans_to_bibitem_edn(spans)
70
+ ret = ""
71
+ spans[:edition] and ret += "<edition>#{spans[:edition]}</edition>"
72
+ spans[:version] and ret += "<version>#{spans[:version]}</version>"
73
+ spans[:note] and ret += "<note>#{spans[:note]}</note>"
74
+ ret
75
+ end
76
+
204
77
  def spans_to_bibitem_extent(spans)
205
78
  ret = ""
206
79
  { volume: "volume", issue: "issue", pages: "page" }.each do |k, v|
@@ -236,6 +109,14 @@ module Metanorma
236
109
  "<date#{type}>#{val}</date>"
237
110
  end
238
111
 
112
+ def spans_to_contribs(spans)
113
+ ret = ""
114
+ spans[:contrib].each do |s|
115
+ ret += span_to_contrib(s, spans[:title])
116
+ end
117
+ ret
118
+ end
119
+
239
120
  def span_to_contrib(span, title)
240
121
  e = if span[:entity] == "organization"
241
122
  "<organization><name>#{span[:name]}</name></organization>"
@@ -0,0 +1,148 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ class SpansToBibitem
5
+ def extract_spans(bib)
6
+ bib.xpath("./formattedref//span").each_with_object([]) do |s, m|
7
+ s.at("./ancestor::span") and next
8
+ extract_spans1(s, m)
9
+ end
10
+ end
11
+
12
+ def extract_spans1(span, acc)
13
+ keys = span["class"].split(".", 2)
14
+ acc << { key: keys[0], type: keys[1],
15
+ val: span.children.to_xml }
16
+ (span["class"] == "type" and span.remove) or
17
+ span.replace(span.children)
18
+ end
19
+
20
+ def extract_docid(bib)
21
+ bib.xpath("./docidentifier").each_with_object([]) do |d, m|
22
+ m << { key: "docid", type: d["type"], val: d.text }
23
+ d.remove unless bib.at("./title")
24
+ end
25
+ end
26
+
27
+ def empty_span_hash
28
+ { contrib: [], docid: [], uri: [], date: [], extent: {}, in: {} }
29
+ end
30
+
31
+ def spans_preprocess(spans)
32
+ ret = empty_span_hash
33
+ spans.each { |s| span_preprocess1(s, ret) }
34
+ host_rearrange(ret)
35
+ end
36
+
37
+ def span_preprocess1(span, ret)
38
+ case span[:key]
39
+ when "uri", "docid"
40
+ val = link_unwrap(Nokogiri::XML.fragment(span[:val])).to_xml
41
+ ret[span[:key].to_sym] << { type: span[:type], val: val }
42
+ when "date"
43
+ ret[span[:key].to_sym] << { type: span[:type] || "published",
44
+ val: span[:val] }
45
+ when "pages", "volume", "issue"
46
+ ret[:extent][span[:key].to_sym] ||= []
47
+ ret[:extent][span[:key].to_sym] << span[:val]
48
+ when "pubplace", "title", "type", "series", "edition", "version",
49
+ "note"
50
+ ret[span[:key].to_sym] = span[:val]
51
+ when "in_title"
52
+ ret[:in][:title] = span[:val]
53
+ when "publisher"
54
+ ret[:contrib] << { role: "publisher", entity: "organization",
55
+ name: span[:val] }
56
+ when "surname", "initials", "givenname", "formatted-initials"
57
+ ret[:contrib] = spans_preprocess_contrib(span, ret[:contrib])
58
+ when "fullname"
59
+ ret[:contrib] = spans_preprocess_fullname(span, ret[:contrib])
60
+ when "organization"
61
+ ret[:contrib] = spans_preprocess_org(span, ret[:contrib])
62
+ when "in_surname", "in_initials", "in_givenname",
63
+ "in_formatted-initials"
64
+ ret[:in][:contrib] ||= []
65
+ span[:key].sub!(/^in_/, "")
66
+ ret[:in][:contrib] =
67
+ spans_preprocess_contrib(span, ret[:in][:contrib])
68
+ when "in_fullname"
69
+ ret[:in][:contrib] ||= []
70
+ span[:key].sub!(/^in_/, "")
71
+ ret[:in][:contrib] =
72
+ spans_preprocess_fullname(span, ret[:in][:contrib])
73
+ when "in_organization"
74
+ ret[:in][:contrib] ||= []
75
+ span[:key].sub!(/^in_/, "")
76
+ ret[:in][:contrib] =
77
+ spans_preprocess_org(span, ret[:in][:contrib])
78
+ else
79
+ msg = "unrecognised key '#{span[:key]}' in " \
80
+ "`span:#{span[:key]}[#{span[:val]}]`"
81
+ @err << { msg: msg }
82
+ end
83
+ end
84
+
85
+ def host_rearrange(ret)
86
+ ret[:in][:title] or return ret
87
+ ret[:in].merge!(empty_span_hash, { type: "misc" }) do |_, old, _|
88
+ old
89
+ end
90
+ %i(series).each do |k|
91
+ ret[:in][k] = ret[k]
92
+ ret.delete(k)
93
+ end
94
+ /^in/.match?(ret[:type]) and ret[:in][:type] =
95
+ ret[:type].sub(/^in/, "")
96
+ ret
97
+ end
98
+
99
+ def spans_preprocess_contrib(span, contrib)
100
+ span[:key] == "initials" and span[:key] = "formatted-initials"
101
+ spans_preprocess_new_contrib?(span, contrib) and
102
+ contrib << { role: span[:type] || "author", entity: "person" }
103
+ if multiple_givennames?(span, contrib)
104
+ contrib[-1][:givenname] = [contrib[-1][:givenname],
105
+ span[:val]].flatten
106
+ else contrib[-1][span[:key].to_sym] = span[:val]
107
+ end
108
+ contrib
109
+ end
110
+
111
+ def spans_preprocess_new_contrib?(span, contrib)
112
+ contrib.empty? ||
113
+ (span[:key] == "surname" && contrib[-1][:surname]) ||
114
+ contrib[-1][:role] != (span[:type] || "author")
115
+ end
116
+
117
+ def multiple_givennames?(span, contrib)
118
+ (%w(formatted-initials givenname).include?(span[:key]) &&
119
+ (contrib[-1][:"formatted-initials"] || contrib[-1][:givenname])) or
120
+ return false
121
+ if contrib[-1][:"formatted-initials"]
122
+ contrib[-1][:givenname] = contrib[-1][:"formatted-initials"]
123
+ contrib[-1].delete(:"formatted-initials")
124
+ end
125
+ true
126
+ end
127
+
128
+ def spans_preprocess_fullname(span, contrib)
129
+ name = span[:val].gsub(/\.(?=\p{Alpha})/, ". ").split(/ /)
130
+ out = { role: span[:type] || "author", entity: "person",
131
+ surname: name[-1] }
132
+ if name.size > 1 && name[0..-2].all? { |x| /\.$/.match?(x) }
133
+ out[:"formatted-initials"] = name[0..-2].join(" ")
134
+ else out[:givenname] = name[0..-2]
135
+ end
136
+ contrib << out
137
+ contrib
138
+ end
139
+
140
+ def spans_preprocess_org(span, contrib)
141
+ contrib << { role: span[:type] || "author", entity: "organization",
142
+ name: span[:val] }
143
+ contrib
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
@@ -68,6 +68,13 @@ module Metanorma
68
68
  para
69
69
  end
70
70
 
71
+ def xml_encode(text)
72
+ @c.encode(text, :basic, :hexadecimal)
73
+ .gsub(/&amp;gt;/, ">").gsub(/&amp;lt;/, "<").gsub(/&amp;amp;/, "&")
74
+ .gsub(/&gt;/, ">").gsub(/&lt;/, "<").gsub(/&amp;/, "&")
75
+ .gsub(/&quot;/, '"').gsub(/&#xa;/, "\n").gsub(/&amp;#/, "&#")
76
+ end
77
+
71
78
  class EmptyAttr
72
79
  def attr(_any_attribute)
73
80
  nil
@@ -63,8 +63,7 @@ module Metanorma
63
63
  end
64
64
 
65
65
  def nested_asset_validate_basic(doc)
66
- a = "//formula | //example | //figure | //termnote | //termexample | " \
67
- "//table"
66
+ a = "//example | //figure | //termnote | //termexample | //table"
68
67
  doc.xpath("#{a} | //note").each do |m|
69
68
  m.xpath(a.gsub(%r{//}, ".//")).each do |n|
70
69
  nested_asset_report(m, n, doc)
@@ -82,7 +81,6 @@ module Metanorma
82
81
 
83
82
  def nested_asset_report(outer, inner, doc)
84
83
  outer.name == "figure" && inner.name == "figure" and return
85
- outer.name != "formula" && inner.name == "formula" and return
86
84
  err =
87
85
  "There is an instance of #{inner.name} nested within #{outer.name}"
88
86
  @log.add("Syntax", inner, err)
@@ -19,6 +19,6 @@ module Metanorma
19
19
  end
20
20
 
21
21
  module Standoc
22
- VERSION = "2.3.9".freeze
22
+ VERSION = "2.4.0".freeze
23
23
  end
24
24
  end
@@ -30,7 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_dependency "asciidoctor", "~> 2.0.0"
32
32
  spec.add_dependency "iev", "~> 0.3.0"
33
- spec.add_dependency "isodoc", "~> 2.4.0"
33
+ spec.add_dependency "isodoc", "~> 2.5.0"
34
34
  spec.add_dependency "metanorma"
35
35
  spec.add_dependency "metanorma-plugin-datastruct", "~> 0.2.0"
36
36
  spec.add_dependency "metanorma-plugin-lutaml"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metanorma-standoc
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.9
4
+ version: 2.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-27 00:00:00.000000000 Z
11
+ date: 2023-03-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciidoctor
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 2.4.0
47
+ version: 2.5.0
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 2.4.0
54
+ version: 2.5.0
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: metanorma
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -520,6 +520,7 @@ files:
520
520
  - lib/metanorma/standoc/reqt.rng
521
521
  - lib/metanorma/standoc/section.rb
522
522
  - lib/metanorma/standoc/spans_to_bibitem.rb
523
+ - lib/metanorma/standoc/spans_to_bibitem_preprocessing.rb
523
524
  - lib/metanorma/standoc/table.rb
524
525
  - lib/metanorma/standoc/term_lookup_cleanup.rb
525
526
  - lib/metanorma/standoc/terms.rb