metanorma-standoc 3.0.4 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/metanorma/standoc/base.rb +5 -7
- data/lib/metanorma/standoc/basicdoc.rng +14 -8
- data/lib/metanorma/standoc/biblio-standoc.rng +37 -7
- data/lib/metanorma/standoc/biblio.rng +30 -18
- data/lib/metanorma/standoc/blocks.rb +24 -3
- data/lib/metanorma/standoc/cleanup.rb +1 -0
- data/lib/metanorma/standoc/cleanup_bibdata.rb +17 -5
- data/lib/metanorma/standoc/cleanup_inline.rb +3 -0
- data/lib/metanorma/standoc/cleanup_maths.rb +3 -5
- data/lib/metanorma/standoc/cleanup_review.rb +76 -0
- data/lib/metanorma/standoc/cleanup_section.rb +1 -30
- data/lib/metanorma/standoc/cleanup_text.rb +11 -31
- data/lib/metanorma/standoc/cleanup_xref.rb +2 -37
- data/lib/metanorma/standoc/front.rb +6 -3
- data/lib/metanorma/standoc/init.rb +1 -0
- data/lib/metanorma/standoc/inline.rb +0 -1
- data/lib/metanorma/standoc/isodoc.rng +115 -96
- data/lib/metanorma/standoc/lists.rb +1 -0
- data/lib/metanorma/standoc/macros_inline.rb +3 -1
- data/lib/metanorma/standoc/macros_note.rb +0 -1
- data/lib/metanorma/standoc/ref.rb +4 -26
- data/lib/metanorma/standoc/regex.rb +78 -0
- data/lib/metanorma/standoc/reqt.rng +7 -6
- data/lib/metanorma/standoc/terms.rb +2 -9
- data/lib/metanorma/standoc/utils.rb +19 -9
- data/lib/metanorma/standoc/validate.rb +2 -46
- data/lib/metanorma/standoc/validate_schema.rb +104 -0
- data/lib/metanorma/standoc/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 982e6533abd03671b4c76ba32204915843cdc0c644b06ec0e8a34798a5d50d1d
|
4
|
+
data.tar.gz: c12cb3a77574fef2d8aa219dae15470ed14ee50e3d7ca55c527f54dc73242a4e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eec8efe148aeef9f65007f135aed6ad0a91c7c8c0a94749d75c1946d53e0877a3c8840c32679e20d01048fee434d35949453dcfd7335417f049f225eab31afdf
|
7
|
+
data.tar.gz: 8e2dc0a69568e2f460dcf39fa081756ed0d5610b00c02516c02ffd14bb84682a03dc07a29dc261651f65576175c36561d53a80cea8889c9519b8053c3051c612
|
@@ -11,30 +11,28 @@ require_relative "localbib"
|
|
11
11
|
require_relative "init"
|
12
12
|
require "mn-requirements"
|
13
13
|
|
14
|
-
require 'set'
|
15
|
-
|
16
14
|
module Asciidoctor
|
17
15
|
module Compliance
|
18
|
-
ADMONITION_STYLES.add(
|
16
|
+
ADMONITION_STYLES.add("EDITOR") unless ADMONITION_STYLES.include?("EDITOR")
|
19
17
|
end
|
20
18
|
end
|
21
19
|
|
22
20
|
module Metanorma
|
23
21
|
module Standoc
|
24
22
|
module Base
|
25
|
-
#XML_ROOT_TAG = "standard-document".freeze
|
26
|
-
#XML_NAMESPACE = "https://www.metanorma.org/ns/standoc".freeze
|
23
|
+
# XML_ROOT_TAG = "standard-document".freeze
|
24
|
+
# XML_NAMESPACE = "https://www.metanorma.org/ns/standoc".freeze
|
27
25
|
FONTS_MANIFEST = "fonts-manifest".freeze
|
28
26
|
|
29
27
|
attr_accessor :log
|
30
28
|
|
31
29
|
def xml_root_tag
|
32
|
-
#self.class::XML_ROOT_TAG
|
30
|
+
# self.class::XML_ROOT_TAG
|
33
31
|
"metanorma"
|
34
32
|
end
|
35
33
|
|
36
34
|
def xml_namespace
|
37
|
-
#self.class::XML_NAMESPACE
|
35
|
+
# self.class::XML_NAMESPACE
|
38
36
|
"https://www.metanorma.org/ns/standoc"
|
39
37
|
end
|
40
38
|
|
@@ -1701,16 +1701,22 @@ which can be bookmarks as well as block or section references</a:documentation>
|
|
1701
1701
|
<a:documentation>Inline reference to a paragraph or paragraphs, appearing as a footnote.
|
1702
1702
|
The target of a footnote is the location it is embedded in within the text</a:documentation>
|
1703
1703
|
<element name="fn">
|
1704
|
-
<
|
1705
|
-
|
1706
|
-
</attribute>
|
1707
|
-
<oneOrMore>
|
1708
|
-
<ref name="paragraph">
|
1709
|
-
<a:documentation>The content of the footnote</a:documentation>
|
1710
|
-
</ref>
|
1711
|
-
</oneOrMore>
|
1704
|
+
<ref name="FnAttributes"/>
|
1705
|
+
<ref name="FnBody"/>
|
1712
1706
|
</element>
|
1713
1707
|
</define>
|
1708
|
+
<define name="FnBody">
|
1709
|
+
<oneOrMore>
|
1710
|
+
<ref name="paragraph">
|
1711
|
+
<a:documentation>The content of the footnote</a:documentation>
|
1712
|
+
</ref>
|
1713
|
+
</oneOrMore>
|
1714
|
+
</define>
|
1715
|
+
<define name="FnAttributes">
|
1716
|
+
<attribute name="reference">
|
1717
|
+
<a:documentation>The number of the footnote, used to identify it visually</a:documentation>
|
1718
|
+
</attribute>
|
1719
|
+
</define>
|
1714
1720
|
<define name="callout">
|
1715
1721
|
<a:documentation>Inline reference to a paragraph or paragraphs, appearing as annotation of source code</a:documentation>
|
1716
1722
|
<element name="callout">
|
@@ -1,13 +1,14 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
2
|
<grammar xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" xmlns="http://relaxng.org/ns/structure/1.0">
|
3
|
-
<!--
|
4
|
-
Add-ons to biblio.rnc for standoc model: defines the extension point BibDataExtensionType
|
5
|
-
of relaton
|
6
|
-
|
7
|
-
Specialisations as for biblio.rnc. Extension point can be redefined completely for a flavour of standoc
|
8
|
-
(SDO); but other elements in Bibdata can only be extended (more specialised vocabularies for Bibdata)
|
9
|
-
-->
|
10
3
|
<include href="biblio.rng">
|
4
|
+
<!-- ALERT: we cannot have comments on root element, as they intervene with https://github.com/metanorma/metanorma/issues/437 fix -->
|
5
|
+
<!--
|
6
|
+
Add-ons to biblio.rnc for standoc model: defines the extension point BibDataExtensionType
|
7
|
+
of relaton
|
8
|
+
|
9
|
+
Specialisations as for biblio.rnc. Extension point can be redefined completely for a flavour of standoc
|
10
|
+
(SDO); but other elements in Bibdata can only be extended (more specialised vocabularies for Bibdata)
|
11
|
+
-->
|
11
12
|
<define name="BibData">
|
12
13
|
<a:documentation>The bibliographic description of a standardisation document</a:documentation>
|
13
14
|
<ref name="StandardBibliographicItem"/>
|
@@ -91,6 +92,9 @@ a standards definition organization</a:documentation>
|
|
91
92
|
<a:documentation>Representation of the identifier for the standardisation document, giving its individual semantic components</a:documentation>
|
92
93
|
</ref>
|
93
94
|
</zeroOrMore>
|
95
|
+
<ref name="DocumentImages">
|
96
|
+
<a:documentation>Coverpage and other images to be rendered with document</a:documentation>
|
97
|
+
</ref>
|
94
98
|
</define>
|
95
99
|
<define name="doctype">
|
96
100
|
<a:documentation>Classification of the standardisation document</a:documentation>
|
@@ -268,6 +272,32 @@ and not those document components</a:documentation>
|
|
268
272
|
</optional>
|
269
273
|
</element>
|
270
274
|
</define>
|
275
|
+
<define name="DocumentImages">
|
276
|
+
<zeroOrMore>
|
277
|
+
<element name="coverpage-image">
|
278
|
+
<a:documentation>Images to be displayed on the coverpage of the document</a:documentation>
|
279
|
+
<ref name="image-no-id"/>
|
280
|
+
</element>
|
281
|
+
</zeroOrMore>
|
282
|
+
<zeroOrMore>
|
283
|
+
<element name="innercoverpage-image">
|
284
|
+
<a:documentation>Images to be displayed on the inner coverpage of the document</a:documentation>
|
285
|
+
<ref name="image-no-id"/>
|
286
|
+
</element>
|
287
|
+
</zeroOrMore>
|
288
|
+
<zeroOrMore>
|
289
|
+
<element name="tocside-image">
|
290
|
+
<a:documentation>Images to be displayed on the Table of Contents page of the document</a:documentation>
|
291
|
+
<ref name="image-no-id"/>
|
292
|
+
</element>
|
293
|
+
</zeroOrMore>
|
294
|
+
<zeroOrMore>
|
295
|
+
<element name="backpage-image">
|
296
|
+
<a:documentation>Images to be displayed on the backpage of the document</a:documentation>
|
297
|
+
<ref name="image-no-id"/>
|
298
|
+
</element>
|
299
|
+
</zeroOrMore>
|
300
|
+
</define>
|
271
301
|
<define name="StandardBibliographicItem">
|
272
302
|
<ref name="BibliographicItem"/>
|
273
303
|
<zeroOrMore>
|
@@ -1,23 +1,25 @@
|
|
1
1
|
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<!--
|
3
|
-
instantiations of this grammar may replace leaf strings
|
4
|
-
with more elaborated types; e.g. title (text) replaced with
|
5
|
-
title-main, title-intro, title-part; type replaced with
|
6
|
-
enum.
|
7
|
-
|
8
|
-
some renaming at leaf nodes is permissible
|
9
|
-
|
10
|
-
obligations can change both from optional to mandatory,
|
11
|
-
and from mandatory to optional; optional elements may
|
12
|
-
be omitted; freely positioned alternatives may be replaced
|
13
|
-
with strict ordering
|
14
|
-
|
15
|
-
DO NOT introduce a namespace here. We do not want a distinct namespace
|
16
|
-
for these elements, and a distinct namespace for any grammar inheriting
|
17
|
-
these elements; we just want one namespace for any child grammars
|
18
|
-
of this.
|
19
|
-
-->
|
20
2
|
<grammar xmlns:a="http://relaxng.org/ns/compatibility/annotations/1.0" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
|
3
|
+
<!--
|
4
|
+
ALERT: we cannot have comments on root element, as they intervene with https://github.com/metanorma/metanorma/issues/437 fix
|
5
|
+
|
6
|
+
Instantiations of this grammar may replace leaf strings
|
7
|
+
with more elaborated types; e.g. title (text) replaced with
|
8
|
+
title-main, title-intro, title-part; type replaced with
|
9
|
+
enum.
|
10
|
+
|
11
|
+
Some renaming at leaf nodes is permissible
|
12
|
+
|
13
|
+
Obligations can change both from optional to mandatory,
|
14
|
+
and from mandatory to optional; optional elements may
|
15
|
+
be omitted; freely positioned alternatives may be replaced
|
16
|
+
with strict ordering
|
17
|
+
|
18
|
+
DO NOT introduce a namespace here. We do not want a distinct namespace
|
19
|
+
for these elements, and a distinct namespace for any grammar inheriting
|
20
|
+
these elements; we just want one namespace for any child grammars
|
21
|
+
of this.
|
22
|
+
-->
|
21
23
|
<!--
|
22
24
|
https://www.myintervals.com/blog/2009/05/20/iso-8601-date-validation-that-doesnt-suck/
|
23
25
|
iso8601date = xsd:string { pattern = "([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?" }
|
@@ -1241,6 +1243,11 @@ Refer to `BibliographicItem` for definitions</a:documentation>
|
|
1241
1243
|
</define>
|
1242
1244
|
<define name="formattedref">
|
1243
1245
|
<element name="formattedref">
|
1246
|
+
<optional>
|
1247
|
+
<attribute name="format">
|
1248
|
+
<a:documentation>format of formatted reference; Metanorma assumes references are formatted as Metanorma XML</a:documentation>
|
1249
|
+
</attribute>
|
1250
|
+
</optional>
|
1244
1251
|
<oneOrMore>
|
1245
1252
|
<ref name="TextElement"/>
|
1246
1253
|
</oneOrMore>
|
@@ -1812,6 +1819,11 @@ May be used to differentiate rendering of notes in bibliographies</a:documentati
|
|
1812
1819
|
<a:documentation>Abstract of bibliographic item</a:documentation>
|
1813
1820
|
<element name="abstract">
|
1814
1821
|
<ref name="LocalizedStringAttributes"/>
|
1822
|
+
<optional>
|
1823
|
+
<attribute name="format">
|
1824
|
+
<a:documentation>What format the formatted abstract is in. In Metanorma, assumed to be Metanorma XML</a:documentation>
|
1825
|
+
</attribute>
|
1826
|
+
</optional>
|
1815
1827
|
<choice>
|
1816
1828
|
<oneOrMore>
|
1817
1829
|
<ref name="BasicBlockNoId">
|
@@ -211,13 +211,34 @@ module Metanorma
|
|
211
211
|
end
|
212
212
|
|
213
213
|
def pass(node)
|
214
|
+
format = node.attr("format") || "metanorma"
|
214
215
|
noko do |xml|
|
215
|
-
xml.passthrough **attr_code(formats:
|
216
|
-
|
217
|
-
p <<
|
216
|
+
xml.passthrough **attr_code(formats: format) do |p|
|
217
|
+
content = @c.encode(node.content, :basic, :hexadecimal)
|
218
|
+
p << content
|
219
|
+
format == "metanorma" and
|
220
|
+
passthrough_validate(node, node.content, content)
|
218
221
|
end
|
219
222
|
end
|
220
223
|
end
|
224
|
+
|
225
|
+
PASSTHROUGH_ERR = <<~ERRMSG.freeze
|
226
|
+
This is not valid Metanorma XML. If you intended a different format, such as HTML, you need to specify `format=` on the pass markup;
|
227
|
+
refer to https://www.metanorma.org/author/topics/blocks/passthroughs/
|
228
|
+
ERRMSG
|
229
|
+
|
230
|
+
# need to validate Metanorma XML before it passes to textcleanup,
|
231
|
+
# where passthrough wrapper and escaped tags are removed:
|
232
|
+
# <passthrough format="metanorma><tag></passthrough> => <tag>
|
233
|
+
# Do not treat not well-formed XML as invalid,
|
234
|
+
# as it may be fragment, e.g. unterminated start of element markup
|
235
|
+
def passthrough_validate(node, content, encoded_content)
|
236
|
+
valid, = validate_document_fragment(content.dup)
|
237
|
+
err =
|
238
|
+
"Invalid passthrough content: #{encoded_content}\n#{PASSTHROUGH_ERR}"
|
239
|
+
!valid and
|
240
|
+
@log.add("Metanorma XML Syntax", node, err, severity: 0)
|
241
|
+
end
|
221
242
|
end
|
222
243
|
end
|
223
244
|
end
|
@@ -58,7 +58,7 @@ module Metanorma
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
def indirect_eref_to_xref(eref, ident)
|
61
|
+
def indirect_eref_to_xref(eref, ident, id_map=nil)
|
62
62
|
loc = eref.at("./localityStack[locality[@type = 'anchor']]") ||
|
63
63
|
eref.at("./locality[@type = 'anchor']")
|
64
64
|
loc = loc&.remove&.text || ident
|
@@ -66,18 +66,30 @@ module Metanorma
|
|
66
66
|
eref.delete("bibitemid")
|
67
67
|
eref.delete("citeas")
|
68
68
|
eref["target"] = loc
|
69
|
-
|
69
|
+
if id_map
|
70
|
+
return if id_map.has_key?(loc)
|
71
|
+
else
|
72
|
+
eref.document.at("//*[@id = '#{loc}']") and return
|
73
|
+
end
|
70
74
|
eref.children = %(** Missing target #{loc})
|
71
75
|
eref["target"] = ident
|
72
76
|
end
|
73
77
|
|
74
78
|
def resolve_local_indirect_erefs(xmldoc, refs, prefix)
|
79
|
+
# Pre-index elements by ID
|
80
|
+
id_map = xmldoc.xpath("//*[@id]").each_with_object({}) do |node, map|
|
81
|
+
map[node["id"]] = node
|
82
|
+
end
|
83
|
+
|
84
|
+
# Pre-index all <eref> elements by bibitemid
|
85
|
+
eref_map = xmldoc.xpath("//eref[@bibitemid]").group_by { |e| e["bibitemid"] }
|
86
|
+
|
75
87
|
refs.each_with_object([]) do |r, m|
|
76
88
|
id = r.sub(/^#{prefix}_/, "")
|
77
|
-
n =
|
89
|
+
n = id_map[id]
|
78
90
|
if n&.at("./ancestor-or-self::*[@type = '#{prefix}']")
|
79
|
-
|
80
|
-
indirect_eref_to_xref(e, id)
|
91
|
+
eref_map[r]&.each do |e|
|
92
|
+
indirect_eref_to_xref(e, id, id_map)
|
81
93
|
end
|
82
94
|
else m << r
|
83
95
|
end
|
@@ -196,6 +196,9 @@ module Metanorma
|
|
196
196
|
p.name = "passthrough"
|
197
197
|
p.children = select_odd_chars(p.children.to_xml)
|
198
198
|
end
|
199
|
+
doc.xpath("//passthrough[@format = 'metanorma']").each do |p|
|
200
|
+
p.replace(p.children)
|
201
|
+
end
|
199
202
|
doc.xpath("//identifier").each do |p|
|
200
203
|
p.children = select_odd_chars(p.children.to_xml)
|
201
204
|
end
|
@@ -26,9 +26,6 @@ module Metanorma
|
|
26
26
|
asciimath2mathml_err(elem.to_xml, e)
|
27
27
|
end
|
28
28
|
|
29
|
-
# https://medium.com/@rickwang_wxc/in-ruby-given-a-string-detect-if-it-is-valid-numeric-c58275eace60
|
30
|
-
NUMERIC_REGEX = %r{^((\+|-)?\d*\.?\d+)([eE](\+|-){1}\d+)?$}
|
31
|
-
|
32
29
|
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
33
30
|
|
34
31
|
def asciimath_parse(expr, elem)
|
@@ -169,8 +166,7 @@ module Metanorma
|
|
169
166
|
return
|
170
167
|
end
|
171
168
|
f == "default" or return f
|
172
|
-
if @numberfmt_default.empty?
|
173
|
-
"notation='basic'"
|
169
|
+
if @numberfmt_default.empty? then "notation='basic'"
|
174
170
|
else @numberfmt_default&.map { |k, v| "#{k}='#{v}'" }&.join(",")
|
175
171
|
end
|
176
172
|
end
|
@@ -191,6 +187,8 @@ module Metanorma
|
|
191
187
|
.each { |x| mathml_number_format(x) }
|
192
188
|
mathml_unitsml(xmldoc)
|
193
189
|
end
|
190
|
+
|
191
|
+
include ::Metanorma::Standoc::Regex
|
194
192
|
end
|
195
193
|
end
|
196
194
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Metanorma
|
2
|
+
module Standoc
|
3
|
+
module Cleanup
|
4
|
+
def review_cleanup(xmldoc)
|
5
|
+
reviews = xmldoc.xpath("//review")
|
6
|
+
reviews.empty? and return
|
7
|
+
ctr = xmldoc.root.add_child("<review-container/>").first
|
8
|
+
reviews.each do |r|
|
9
|
+
review_set_location(r)
|
10
|
+
ctr << r
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def review_insert_bookmark(review, id)
|
15
|
+
parent = review.parent
|
16
|
+
children = parent.children
|
17
|
+
index = children.index(review)
|
18
|
+
x = find_review_sibling(children, index, :previous) ||
|
19
|
+
find_review_sibling(children, index, :following)
|
20
|
+
ins = x || review.before("<p> </p>").previous.at(".//text()")
|
21
|
+
ins.previous = "<bookmark id='#{id}'/>"
|
22
|
+
end
|
23
|
+
|
24
|
+
# we know node is a block: dig for a place bookmark can go
|
25
|
+
def available_bookmark_destination(node)
|
26
|
+
ret = case node.name
|
27
|
+
when "title", "name", "p" then node
|
28
|
+
when "sourcecode" then node.at(".//name")
|
29
|
+
when "admonition", "note", "example", "li", "quote", "dt", "dd",
|
30
|
+
"permission", "requirement", "recommendation"
|
31
|
+
node.at(".//p | .//name") || node
|
32
|
+
when "formula"
|
33
|
+
node.at(".//p | .//name | .//dt")
|
34
|
+
when "ol", "ul" then node.at(".//p | .//name") || node.at("./li")
|
35
|
+
when "dl" then node.at(".//p | .//name") || node.at("./dt | ./dd")
|
36
|
+
when "table" then node.at(".//td[text()] | .//th[text()]")
|
37
|
+
end or return nil
|
38
|
+
first_non_stem_text(ret)
|
39
|
+
end
|
40
|
+
|
41
|
+
def first_non_stem_text(ret)
|
42
|
+
first_non_stem_text = nil
|
43
|
+
ret.traverse do |n|
|
44
|
+
if n.text? && n.ancestors("stem").empty? && !n.text.strip.empty?
|
45
|
+
first_non_stem_text = n
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
first_non_stem_text
|
50
|
+
end
|
51
|
+
|
52
|
+
def find_review_sibling(children, index, direction = :previous)
|
53
|
+
range = if direction == :previous then (index - 1).downto(0)
|
54
|
+
else (index + 1).upto(children.size - 1)
|
55
|
+
end
|
56
|
+
range.each do |i|
|
57
|
+
node = children[i]
|
58
|
+
if node.element? && !node.text.empty? && node.text.strip != "" &&
|
59
|
+
ret = available_bookmark_destination(node)
|
60
|
+
return ret
|
61
|
+
end
|
62
|
+
end
|
63
|
+
nil
|
64
|
+
end
|
65
|
+
|
66
|
+
def review_set_location(review)
|
67
|
+
unless review["from"]
|
68
|
+
id = "_#{UUIDTools::UUID.random_create}"
|
69
|
+
review_insert_bookmark(review, id)
|
70
|
+
review["from"] = id
|
71
|
+
end
|
72
|
+
review["to"] ||= review["from"]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -96,9 +96,7 @@ module Metanorma
|
|
96
96
|
y.name == "annex" || !y.ancestors("annex").empty? and next
|
97
97
|
y.wrap("<annex/>")
|
98
98
|
y.parent["id"] = "_#{UUIDTools::UUID.random_create}"
|
99
|
-
%w(obligation language script).each
|
100
|
-
y.parent[w] = y[w]
|
101
|
-
end
|
99
|
+
%w(obligation language script).each { |w| y.parent[w] = y[w] }
|
102
100
|
end
|
103
101
|
end
|
104
102
|
|
@@ -208,33 +206,6 @@ module Metanorma
|
|
208
206
|
ins.previous = x.remove
|
209
207
|
end
|
210
208
|
end
|
211
|
-
|
212
|
-
def insert_before(xmldoc, xpath)
|
213
|
-
unless ins = xmldoc.at(xpath).children.first
|
214
|
-
xmldoc.at(xpath) << " "
|
215
|
-
ins = xmldoc.at(xpath).children.first
|
216
|
-
end
|
217
|
-
ins
|
218
|
-
end
|
219
|
-
|
220
|
-
def review_cleanup(xmldoc)
|
221
|
-
reviews = xmldoc.xpath("//review")
|
222
|
-
reviews.empty? and return
|
223
|
-
ctr = xmldoc.root.add_child("<review-container/>").first
|
224
|
-
reviews.each do |r|
|
225
|
-
review_set_location(r)
|
226
|
-
ctr << r
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
def review_set_location(review)
|
231
|
-
unless review["from"]
|
232
|
-
id = "_#{UUIDTools::UUID.random_create}"
|
233
|
-
review.previous = "<bookmark id='#{id}'/>"
|
234
|
-
review["from"] = id
|
235
|
-
end
|
236
|
-
review["to"] ||= review["from"]
|
237
|
-
end
|
238
209
|
end
|
239
210
|
end
|
240
211
|
end
|
@@ -16,16 +16,6 @@ module Metanorma
|
|
16
16
|
!path.intersection(ancestors).empty?
|
17
17
|
end
|
18
18
|
|
19
|
-
def linebreak_cleanup(xmldoc)
|
20
|
-
xmldoc.traverse do |x|
|
21
|
-
x.text? && x.text.include?("\n") or next
|
22
|
-
ancestor_include?(x, PRESERVE_LINEBREAK_ELEMENTS) and next
|
23
|
-
ancestor_include?(x, STRIP_LINEBREAK_ELEMENTS) or next
|
24
|
-
x.replace(Metanorma::Utils
|
25
|
-
.line_sanitise(x.text.lines.map(&:rstrip)).join)
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
19
|
# process example/p, example/sourcecode, not example on its own:
|
30
20
|
# this is about stripping lines for blocks containing inline elems & text
|
31
21
|
def linebreak_cleanup(xmldoc)
|
@@ -57,11 +47,15 @@ module Metanorma
|
|
57
47
|
|
58
48
|
def gather_text_for_linebreak_cleanup(block)
|
59
49
|
x = block.xpath(".//text()").map do |e|
|
60
|
-
{ elem: e, text: e.text,
|
50
|
+
{ elem: e, text: e.text, stem: ancestor_include?(e, %w(stem)),
|
61
51
|
skip: ancestor_include?(e, PRESERVE_LINEBREAK_ELEMENTS) }
|
62
52
|
end
|
63
53
|
x.empty? and return x
|
64
54
|
x.each { |e| e[:skip] ||= !e[:text].include?("\n") }
|
55
|
+
x.each_with_index do |e, i|
|
56
|
+
# do not treat stem linebreaks as meaningful
|
57
|
+
e[:skip] ||= x[i + 1]&.dig(:stem)
|
58
|
+
end
|
65
59
|
x[-1][:last] = true
|
66
60
|
x
|
67
61
|
end
|
@@ -71,6 +65,9 @@ module Metanorma
|
|
71
65
|
if @smartquotes then smartquotes_cleanup1(xmldoc)
|
72
66
|
else dumbquote_cleanup(xmldoc)
|
73
67
|
end
|
68
|
+
xmldoc.xpath("//passthrough[@formats = 'straightquotes']").each do |x|
|
69
|
+
x.replace(x.children)
|
70
|
+
end
|
74
71
|
end
|
75
72
|
|
76
73
|
def smartquotes_cleanup1(xmldoc)
|
@@ -91,7 +88,7 @@ module Metanorma
|
|
91
88
|
# "abc<tag/>", def => "abc",<tag/> def
|
92
89
|
# TODO?
|
93
90
|
def uninterrupt_quotes_around_xml1(xmldoc)
|
94
|
-
|
91
|
+
xmldoc.xpath("//text()[preceding-sibling::*[1]]").each do |n|
|
95
92
|
uninterrupt_quotes_around_xml_skip(n) and next
|
96
93
|
uninterrupt_quotes_around_xml1(n.previous)
|
97
94
|
end
|
@@ -102,7 +99,7 @@ module Metanorma
|
|
102
99
|
identifier metanorma-extension).freeze
|
103
100
|
|
104
101
|
PRESERVE_LINEBREAK_ELEMENTS =
|
105
|
-
%w(pre sourcecode passthrough metanorma-extension).freeze
|
102
|
+
%w(pre sourcecode passthrough metanorma-extension stem).freeze
|
106
103
|
|
107
104
|
STRIP_LINEBREAK_ELEMENTS =
|
108
105
|
%w(title name variant-title figure example review admonition
|
@@ -157,29 +154,12 @@ module Metanorma
|
|
157
154
|
block?(x) and prev = ""
|
158
155
|
empty_tag_with_text_content?(x) and prev = "dummy"
|
159
156
|
x.text? or next
|
160
|
-
|
161
|
-
# ancestors = x.path.gsub(/\[\d+\]/, "").split(%r{/})[1..-2]
|
162
|
-
# ancestors.intersection(IGNORE_QUOTES_ELEMENTS).empty? or next
|
163
157
|
ancestor_include?(x, IGNORE_QUOTES_ELEMENTS) and next
|
164
158
|
dumb2smart_quotes1(x, prev)
|
165
159
|
prev = x.text
|
166
160
|
end
|
167
161
|
end
|
168
162
|
|
169
|
-
def dumb2smart_quotesx(xmldoc)
|
170
|
-
# TODO?>
|
171
|
-
prev = ""
|
172
|
-
xmldoc.xpath("//* | //text()").each do |x|
|
173
|
-
x.is_a?(Nokogiri::XML::Node) or next
|
174
|
-
block?(x) and prev = ""
|
175
|
-
empty_tag_with_text_content?(x) and prev = "dummy"
|
176
|
-
x.text? or next
|
177
|
-
ancestor_include?(x, IGNORE_QUOTES_ELEMENTS) and next
|
178
|
-
dumb2smart_quotes1(x, prev)
|
179
|
-
prev = x.text
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
163
|
def dumb2smart_quotes1(curr, prev)
|
184
164
|
/[-'"(<>]|\.\.|\dx/.match?(curr.text) or return
|
185
165
|
|
@@ -190,7 +170,7 @@ ancestor_include?(x, IGNORE_QUOTES_ELEMENTS) and next
|
|
190
170
|
|
191
171
|
def dumbquote_cleanup(xmldoc)
|
192
172
|
xmldoc.traverse do |n|
|
193
|
-
next unless n.text? &&
|
173
|
+
next unless n.text? && n.text.include?("\u2019")
|
194
174
|
|
195
175
|
n.replace(@c.encode(
|
196
176
|
@c.decode(n.text)
|
@@ -1,33 +1,6 @@
|
|
1
1
|
module Metanorma
|
2
2
|
module Standoc
|
3
3
|
module Cleanup
|
4
|
-
# extending localities to cover ISO referencing
|
5
|
-
CONN_REGEX_STR = "(?<conn>and|or|from|to)!".freeze
|
6
|
-
|
7
|
-
LOCALITIES = "section|clause|part|paragraph|chapter|page|line|" \
|
8
|
-
"table|annex|figure|example|note|formula|list|time|anchor|" \
|
9
|
-
"locality:[^ \\t\\n\\r:,;=]+".freeze
|
10
|
-
|
11
|
-
LOCALITY_REGEX_STR = <<~REGEXP.freeze
|
12
|
-
^((#{CONN_REGEX_STR})?
|
13
|
-
(?<locality>#{LOCALITIES})(\\s+|=)
|
14
|
-
(?<ref>[^"][^ \\t\\n,:;-]*|"[^"]+")
|
15
|
-
(-(?<to>[^"][^ \\t\\n,:;-]*|"[^"]"))?|
|
16
|
-
(?<locality2>whole|title|locality:[^ \\t\\n\\r:,;=]+))(?<punct>[,:;]?)\\s*
|
17
|
-
(?<text>.*)$
|
18
|
-
REGEXP
|
19
|
-
|
20
|
-
def to_regex(str)
|
21
|
-
Regexp.new(str.gsub(/\s/, ""), Regexp::IGNORECASE | Regexp::MULTILINE)
|
22
|
-
end
|
23
|
-
|
24
|
-
LOCALITY_REGEX_VALUE_ONLY_STR = <<~REGEXP.freeze
|
25
|
-
^(?<conn0>(#{CONN_REGEX_STR}))
|
26
|
-
(?!whole|title|locality:)
|
27
|
-
(?<value>[^=,;:\\t\\n\\r]+)
|
28
|
-
(?<punct>[,;\\t\\n\\r]|$)
|
29
|
-
REGEXP
|
30
|
-
|
31
4
|
def tq(text)
|
32
5
|
text.sub(/^"/, "").sub(/"$/, "")
|
33
6
|
end
|
@@ -43,16 +16,6 @@ module Metanorma
|
|
43
16
|
d.children.empty? and d.remove
|
44
17
|
end
|
45
18
|
|
46
|
-
LOCALITY_REGEX_STR_TRIPLEDASH = <<~REGEXP.freeze
|
47
|
-
^(?<locality>(#{CONN_REGEX_STR})?
|
48
|
-
(#{LOCALITIES})(\\s+|=))
|
49
|
-
(?<ref>[^"][^ \\t\\n,:;-]*
|
50
|
-
-[^ \\t\\n,:;"-]+
|
51
|
-
-[^ \\t\\n,:;"]+)
|
52
|
-
(?<text>[,:;]?\\s*
|
53
|
-
.*)$
|
54
|
-
REGEXP
|
55
|
-
|
56
19
|
# treat n-n-n locality as "n-n-n", do not parse as a range
|
57
20
|
def locality_normalise(text)
|
58
21
|
re = to_regex(LOCALITY_REGEX_STR_TRIPLEDASH)
|
@@ -261,6 +224,8 @@ module Metanorma
|
|
261
224
|
extract_localities(x)
|
262
225
|
end
|
263
226
|
end
|
227
|
+
|
228
|
+
include ::Metanorma::Standoc::Regex
|
264
229
|
end
|
265
230
|
end
|
266
231
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
require "date"
|
2
|
-
require "htmlentities"
|
3
2
|
require "pathname"
|
4
3
|
require_relative "./front_contributor"
|
5
4
|
require "isoics"
|
@@ -13,7 +12,7 @@ module Metanorma
|
|
13
12
|
end
|
14
13
|
|
15
14
|
def metadata_id_build(node)
|
16
|
-
part, subpart = node&.attr("partnumber")&.split(
|
15
|
+
part, subpart = node&.attr("partnumber")&.split("-")
|
17
16
|
id = node.attr("docnumber") || ""
|
18
17
|
id += "-#{part}" if part
|
19
18
|
id += "-#{subpart}" if subpart
|
@@ -189,9 +188,12 @@ module Metanorma
|
|
189
188
|
metadata_flavor(node, ext)
|
190
189
|
metadata_committee(node, ext)
|
191
190
|
metadata_ics(node, ext)
|
191
|
+
structured_id(node, ext)
|
192
192
|
metadata_coverpage_images(node, ext)
|
193
193
|
end
|
194
194
|
|
195
|
+
def structured_id(node, xml); end
|
196
|
+
|
195
197
|
def metadata_doctype(node, xml)
|
196
198
|
xml.doctype doctype(node)
|
197
199
|
end
|
@@ -218,7 +220,8 @@ module Metanorma
|
|
218
220
|
at = { language: lang, format: "text/plain" }
|
219
221
|
xml.title **attr_code(at) do |t|
|
220
222
|
title = Metanorma::Utils::asciidoc_sub(
|
221
|
-
node.attr("title") || node.attr("title-en") || node.attr("doctitle")
|
223
|
+
node.attr("title") || node.attr("title-en") || node.attr("doctitle"),
|
224
|
+
)
|
222
225
|
t << title
|
223
226
|
end
|
224
227
|
end
|