metanorma-standoc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. checksums.yaml +7 -0
  2. data/.gitattributes +4 -0
  3. data/.gitignore +11 -0
  4. data/.hound.yml +3 -0
  5. data/.oss-guides.rubocop.yml +1077 -0
  6. data/.rubocop.ribose.yml +66 -0
  7. data/.rubocop.tb.yml +650 -0
  8. data/.rubocop.yml +15 -0
  9. data/.travis.yml +21 -0
  10. data/CODE_OF_CONDUCT.md +46 -0
  11. data/Gemfile +7 -0
  12. data/LICENSE +25 -0
  13. data/Makefile +39 -0
  14. data/README.adoc +9 -0
  15. data/Rakefile +6 -0
  16. data/bin/rspec +18 -0
  17. data/docs/customisation.adoc +178 -0
  18. data/docs/guidance.adoc +436 -0
  19. data/docs/htmloutput.adoc +115 -0
  20. data/docs/quickstart.adoc +375 -0
  21. data/lib/asciidoctor/standoc/base.rb +198 -0
  22. data/lib/asciidoctor/standoc/biblio.rng +836 -0
  23. data/lib/asciidoctor/standoc/blocks.rb +190 -0
  24. data/lib/asciidoctor/standoc/cleanup.rb +247 -0
  25. data/lib/asciidoctor/standoc/cleanup_block.rb +193 -0
  26. data/lib/asciidoctor/standoc/cleanup_footnotes.rb +78 -0
  27. data/lib/asciidoctor/standoc/cleanup_ref.rb +125 -0
  28. data/lib/asciidoctor/standoc/converter.rb +55 -0
  29. data/lib/asciidoctor/standoc/front.rb +121 -0
  30. data/lib/asciidoctor/standoc/inline.rb +134 -0
  31. data/lib/asciidoctor/standoc/isodoc.rng +1059 -0
  32. data/lib/asciidoctor/standoc/lists.rb +87 -0
  33. data/lib/asciidoctor/standoc/macros.rb +95 -0
  34. data/lib/asciidoctor/standoc/ref.rb +187 -0
  35. data/lib/asciidoctor/standoc/section.rb +159 -0
  36. data/lib/asciidoctor/standoc/table.rb +61 -0
  37. data/lib/asciidoctor/standoc/utils.rb +121 -0
  38. data/lib/asciidoctor/standoc/validate.rb +65 -0
  39. data/lib/asciidoctor/standoc/validate_section.rb +42 -0
  40. data/lib/asciidoctor/standoc/version.rb +5 -0
  41. data/lib/metanorma-standoc.rb +9 -0
  42. data/lib/metanorma/standoc.rb +7 -0
  43. data/lib/metanorma/standoc/processor.rb +40 -0
  44. data/metanorma-standoc.gemspec +47 -0
  45. data/spec/asciidoctor-standoc/base_spec.rb +271 -0
  46. data/spec/asciidoctor-standoc/blocks_spec.rb +469 -0
  47. data/spec/asciidoctor-standoc/cleanup_spec.rb +760 -0
  48. data/spec/asciidoctor-standoc/inline_spec.rb +162 -0
  49. data/spec/asciidoctor-standoc/isobib_cache_spec.rb +332 -0
  50. data/spec/asciidoctor-standoc/lists_spec.rb +190 -0
  51. data/spec/asciidoctor-standoc/macros_spec.rb +111 -0
  52. data/spec/asciidoctor-standoc/refs_spec.rb +606 -0
  53. data/spec/asciidoctor-standoc/section_spec.rb +310 -0
  54. data/spec/asciidoctor-standoc/table_spec.rb +307 -0
  55. data/spec/asciidoctor-standoc/validate_spec.rb +133 -0
  56. data/spec/assets/header.html +7 -0
  57. data/spec/assets/html.css +2 -0
  58. data/spec/assets/htmlcover.html +4 -0
  59. data/spec/assets/htmlintro.html +5 -0
  60. data/spec/assets/i18n.yaml +2 -0
  61. data/spec/assets/iso.headless.html +33 -0
  62. data/spec/assets/iso.xml +8 -0
  63. data/spec/assets/rice_image1.png +0 -0
  64. data/spec/assets/scripts.html +3 -0
  65. data/spec/assets/std.css +2 -0
  66. data/spec/assets/word.css +2 -0
  67. data/spec/assets/wordcover.html +3 -0
  68. data/spec/assets/wordintro.html +4 -0
  69. data/spec/examples/103_01_02.html +247 -0
  70. data/spec/examples/english.yaml +69 -0
  71. data/spec/examples/iso_123_.xml +45 -0
  72. data/spec/examples/iso_123_all_parts.xml +45 -0
  73. data/spec/examples/iso_123_no_year_note.xml +46 -0
  74. data/spec/examples/iso_124_.xml +41 -0
  75. data/spec/examples/iso_216_.xml +47 -0
  76. data/spec/examples/iso_iec_12382_.xml +48 -0
  77. data/spec/examples/rice.adoc +715 -0
  78. data/spec/examples/rice.preview.html +1877 -0
  79. data/spec/examples/rice.sh +4 -0
  80. data/spec/examples/rice_images/rice_image1.png +0 -0
  81. data/spec/examples/rice_images/rice_image2.png +0 -0
  82. data/spec/examples/rice_images/rice_image3_1.png +0 -0
  83. data/spec/examples/rice_images/rice_image3_2.png +0 -0
  84. data/spec/examples/rice_images/rice_image3_3.png +0 -0
  85. data/spec/metanorma/processor_spec.rb +70 -0
  86. data/spec/spec_helper.rb +198 -0
  87. metadata +370 -0
@@ -0,0 +1,78 @@
1
+ require "date"
2
+ require "nokogiri"
3
+ require "htmlentities"
4
+ require "json"
5
+ require "pathname"
6
+ require "open-uri"
7
+ require "pp"
8
+
9
+ module Asciidoctor
10
+ module Standoc
11
+ module Cleanup
12
+ # include footnotes inside figure
13
+ def figure_footnote_cleanup(xmldoc)
14
+ nomatches = false
15
+ until nomatches
16
+ q = "//figure/following-sibling::*[1][self::p and *[1][self::fn]]"
17
+ nomatches = true
18
+ xmldoc.xpath(q).each do |s|
19
+ s.previous_element << s.first_element_child.remove
20
+ s.remove
21
+ nomatches = false
22
+ end
23
+ end
24
+ end
25
+
26
+ def table_footnote_renumber1(fn, i, seen)
27
+ if seen[fn.text] then outnum = seen[fn.text]
28
+ else
29
+ i += 1
30
+ outnum = i
31
+ seen[fn.text] = outnum
32
+ end
33
+ fn["reference"] = (outnum - 1 + "a".ord).chr
34
+ fn["table"] = true
35
+ [i, seen]
36
+ end
37
+
38
+ def table_footnote_renumber(xmldoc)
39
+ xmldoc.xpath("//table | //figure").each do |t|
40
+ seen = {}
41
+ i = 0
42
+ t.xpath(".//fn").each do |fn|
43
+ i, seen = table_footnote_renumber1(fn, i, seen)
44
+ end
45
+ end
46
+ end
47
+
48
+ def other_footnote_renumber1(fn, i, seen)
49
+ unless fn["table"]
50
+ if seen[fn.text] then outnum = seen[fn.text]
51
+ else
52
+ i += 1
53
+ outnum = i
54
+ seen[fn.text] = outnum
55
+ end
56
+ fn["reference"] = outnum.to_s
57
+ end
58
+ [i, seen]
59
+ end
60
+
61
+ def other_footnote_renumber(xmldoc)
62
+ seen = {}
63
+ i = 0
64
+ xmldoc.xpath("//fn").each do |fn|
65
+ i, seen = other_footnote_renumber1(fn, i, seen)
66
+ end
67
+ end
68
+
69
+ def footnote_renumber(xmldoc)
70
+ table_footnote_renumber(xmldoc)
71
+ other_footnote_renumber(xmldoc)
72
+ xmldoc.xpath("//fn").each do |fn|
73
+ fn.delete("table")
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,125 @@
1
+ module Asciidoctor
2
+ module Standoc
3
+ module Cleanup
4
+ # extending localities to cover ISO referencing
5
+ LOCALITY_REGEX_STR = <<~REGEXP.freeze
6
+ ^((?<locality>section|clause|part|paragraph|chapter|page|
7
+ table|annex|figure|example|note|formula|
8
+ locality:[^ \\t\\n\\r:,]+)(\\s+|=)
9
+ (?<ref>[^"][^ \\t\\n,:-]*|"[^"]+")
10
+ (-(?<to>[^"][^ \\t\\n,:-]*|"[^"]"))?|
11
+ (?<locality2>whole|locality:[^ \\t\\n\\r:,]+))[,:]?\\s*
12
+ (?<text>.*)$
13
+ REGEXP
14
+ LOCALITY_RE = Regexp.new(LOCALITY_REGEX_STR.gsub(/\s/, ""),
15
+ Regexp::IGNORECASE | Regexp::MULTILINE)
16
+
17
+ def tq(x)
18
+ x.sub(/^"/, "").sub(/"$/, "")
19
+ end
20
+
21
+ def extract_localities(x)
22
+ text = x.children.first.remove.text
23
+ while (m = LOCALITY_RE.match text)
24
+ ref = m[:ref] ? "<referenceFrom>#{tq m[:ref]}</referenceFrom>" : ""
25
+ refto = m[:to] ? "<referenceTo>#{tq m[:to]}</referenceTo>" : ""
26
+ loc = m[:locality]&.downcase || m[:locality2]&.downcase
27
+ x.add_child("<locality type='#{loc}'>#{ref}#{refto}</locality>")
28
+ text = m[:text]
29
+ end
30
+ x.add_child(text)
31
+ end
32
+
33
+ def xref_to_eref(x)
34
+ x["bibitemid"] = x["target"]
35
+ x["citeas"] = @anchors&.dig(x["target"], :xref) ||
36
+ warn("ISO: #{x['target']} is not a real reference!")
37
+ x.delete("target")
38
+ extract_localities(x) unless x.children.empty?
39
+ end
40
+
41
+ def xref_cleanup(xmldoc)
42
+ xmldoc.xpath("//xref").each do |x|
43
+ if refid? x["target"]
44
+ x.name = "eref"
45
+ xref_to_eref(x)
46
+ else
47
+ x.delete("type")
48
+ end
49
+ end
50
+ end
51
+
52
+ # allows us to deal with doc relation localities,
53
+ # temporarily stashed to "bpart"
54
+ def bpart_cleanup(xmldoc)
55
+ xmldoc.xpath("//relation/bpart").each do |x|
56
+ extract_localities(x)
57
+ x.replace(x.children)
58
+ end
59
+ end
60
+
61
+ def quotesource_cleanup(xmldoc)
62
+ xmldoc.xpath("//quote/source | //terms/source").each do |x|
63
+ xref_to_eref(x)
64
+ end
65
+ end
66
+
67
+ def origin_cleanup(xmldoc)
68
+ xmldoc.xpath("//origin").each do |x|
69
+ x["citeas"] = @anchors&.dig(x["bibitemid"], :xref) ||
70
+ warn("ISO: #{x['bibitemid']} is not a real reference!")
71
+ extract_localities(x) unless x.children.empty?
72
+ end
73
+ end
74
+
75
+ # move ref before p
76
+ def ref_cleanup(xmldoc)
77
+ xmldoc.xpath("//p/ref").each do |r|
78
+ parent = r.parent
79
+ parent.previous = r.remove
80
+ end
81
+ end
82
+
83
+ def normref_cleanup(xmldoc)
84
+ q = "//references[title = 'Normative References']"
85
+ r = xmldoc.at(q) || return
86
+ r.elements.each do |n|
87
+ n.remove unless ["title", "bibitem"].include? n.name
88
+ end
89
+ end
90
+
91
+ def format_ref(ref, isopub)
92
+ return ref if isopub
93
+ return "[#{ref}]" if /^\d+$/.match(ref) && !/^\[.*\]$/.match(ref)
94
+ ref
95
+ end
96
+
97
+ ISO_PUBLISHER_XPATH =
98
+ "./contributor[role/@type = 'publisher']/"\
99
+ "organization[abbreviation = 'ISO' or abbreviation = 'IEC' or "\
100
+ "name = 'International Organization for Standardization' or "\
101
+ "name = 'International Electrotechnical Commission']".freeze
102
+
103
+ def date_range(date)
104
+ from = date.at("./from")
105
+ to = date.at("./to")
106
+ on = date.at("./on")
107
+ return on.text if on
108
+ ret = "#{from.text}&ndash;"
109
+ ret += to.text if to
110
+ ret
111
+ end
112
+
113
+ def reference_names(xmldoc)
114
+ xmldoc.xpath("//bibitem[not(ancestor::bibitem)]").each do |ref|
115
+ isopub = ref.at(ISO_PUBLISHER_XPATH)
116
+ docid = ref.at("./docidentifier")
117
+ date = ref.at("./date[@type = 'published']")
118
+ reference = format_ref(docid.text, isopub)
119
+ reference += ":#{date_range(date)}" if date
120
+ @anchors[ref["id"]] = { xref: reference }
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
@@ -0,0 +1,55 @@
1
+ require "asciidoctor"
2
+ require "asciidoctor/standoc/version"
3
+ require "asciidoctor/standoc/base"
4
+ require "asciidoctor/standoc/front"
5
+ require "asciidoctor/standoc/lists"
6
+ require "asciidoctor/standoc/ref"
7
+ require "asciidoctor/standoc/inline"
8
+ require "asciidoctor/standoc/blocks"
9
+ require "asciidoctor/standoc/section"
10
+ require "asciidoctor/standoc/table"
11
+ require "asciidoctor/standoc/validate"
12
+ require "asciidoctor/standoc/utils"
13
+ require "asciidoctor/standoc/cleanup"
14
+ require_relative "./macros.rb"
15
+
16
+ module Asciidoctor
17
+ module Standoc
18
+ # A {Converter} implementation that generates Standoc output, and a document
19
+ # schema encapsulation of the document for validation
20
+ class Converter
21
+ include ::Asciidoctor::Converter
22
+ include ::Asciidoctor::Writer
23
+
24
+ include ::Asciidoctor::Standoc::Base
25
+ include ::Asciidoctor::Standoc::Front
26
+ include ::Asciidoctor::Standoc::Lists
27
+ include ::Asciidoctor::Standoc::Inline
28
+ include ::Asciidoctor::Standoc::Blocks
29
+ include ::Asciidoctor::Standoc::Section
30
+ include ::Asciidoctor::Standoc::Table
31
+ include ::Asciidoctor::Standoc::Utils
32
+ include ::Asciidoctor::Standoc::Cleanup
33
+ include ::Asciidoctor::Standoc::Validate
34
+
35
+ register_for "standoc"
36
+
37
+ $xreftext = {}
38
+
39
+ def initialize(backend, opts)
40
+ super
41
+ basebackend "html"
42
+ outfilesuffix ".xml"
43
+ end
44
+
45
+ alias_method :embedded, :content
46
+ alias_method :verse, :quote
47
+ alias_method :audio, :skip
48
+ alias_method :video, :skip
49
+ alias_method :inline_button, :skip
50
+ alias_method :inline_kbd, :skip
51
+ alias_method :inline_menu, :skip
52
+ alias_method :inline_image, :skip
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,121 @@
1
+ require "date"
2
+ require "nokogiri"
3
+ require "htmlentities"
4
+ require "json"
5
+ require "pathname"
6
+ require "open-uri"
7
+ require "pp"
8
+
9
+ module Asciidoctor
10
+ module Standoc
11
+ module Front
12
+ def metadata_id(node, xml)
13
+ part, subpart = node&.attr("partnumber")&.split(/-/)
14
+ xml.docidentifier do |i|
15
+ i.project_number node.attr("docnumber"),
16
+ **attr_code(part: part, subpart: subpart)
17
+ end
18
+ end
19
+
20
+ def metadata_version(node, xml)
21
+ xml.version do |v|
22
+ v.edition node.attr("edition") if node.attr("edition")
23
+ v.revision_date node.attr("revdate") if node.attr("revdate")
24
+ v.draft node.attr("draft") if node.attr("draft")
25
+ end
26
+ end
27
+
28
+ def committee_component(compname, node, out)
29
+ out.send compname.gsub(/-/, "_"), node.attr(compname),
30
+ **attr_code(number: node.attr("#{compname}-number"),
31
+ type: node.attr("#{compname}-type"))
32
+ end
33
+
34
+ def organization(org, orgname)
35
+ org.name orgname
36
+ end
37
+
38
+ def metadata_author(node, xml)
39
+ publishers = node.attr("publisher") || return
40
+ publishers.split(/,[ ]?/).each do |p|
41
+ xml.contributor do |c|
42
+ c.role **{ type: "author" }
43
+ c.organization { |a| organization(a, p) }
44
+ end
45
+ end
46
+ end
47
+
48
+ def metadata_publisher(node, xml)
49
+ publishers = node.attr("publisher") || return
50
+ publishers.split(/,[ ]?/).each do |p|
51
+ xml.contributor do |c|
52
+ c.role **{ type: "publisher" }
53
+ c.organization { |a| organization(a, p) }
54
+ end
55
+ end
56
+ end
57
+
58
+ def metadata_copyright(node, xml)
59
+ publishers = node.attr("publisher") || " "
60
+ publishers.split(/,[ ]?/).each do |p|
61
+ xml.copyright do |c|
62
+ c.from (node.attr("copyright-year") || Date.today.year)
63
+ p.match(/[A-Za-z]/).nil? or c.owner do |owner|
64
+ owner.organization { |o| organization(o, p) }
65
+ end
66
+ end
67
+ end
68
+ end
69
+
70
+ def metadata_status(node, xml)
71
+ xml.status(**{ format: "plain" }) do |s|
72
+ s << ( node.attr("status") || "published" )
73
+ end
74
+ end
75
+
76
+ def metadata_committee(node, xml)
77
+ xml.editorialgroup do |a|
78
+ committee_component("technical-committee", node, a)
79
+ end
80
+ end
81
+
82
+ def metadata_ics(node, xml)
83
+ ics = node.attr("library-ics")
84
+ ics && ics.split(/,\s*/).each do |i|
85
+ xml.ics do |ics|
86
+ ics.code i
87
+ end
88
+ end
89
+ end
90
+
91
+ def metadata(node, xml)
92
+ title node, xml
93
+ metadata_id(node, xml)
94
+ metadata_author(node, xml)
95
+ metadata_publisher(node, xml)
96
+ xml.language (node.attr("language") || "en")
97
+ xml.script (node.attr("script") || "Latn")
98
+ metadata_status(node, xml)
99
+ metadata_copyright(node, xml)
100
+ metadata_committee(node, xml)
101
+ metadata_ics(node, xml)
102
+ end
103
+
104
+ def asciidoc_sub(x)
105
+ return nil if x.nil?
106
+ d = Asciidoctor::Document.new(x.lines.entries, {header_footer: false})
107
+ b = d.parse.blocks.first
108
+ b.apply_subs(b.source)
109
+ end
110
+
111
+ def title(node, xml)
112
+ ["en"].each do |lang|
113
+ at = { language: lang, format: "text/plain" }
114
+ xml.title **attr_code(at) do |t|
115
+ t << asciidoc_sub(node.attr("title"))
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,134 @@
1
+ require "asciidoctor/extensions"
2
+ require "htmlentities"
3
+
4
+ module Asciidoctor
5
+ module Standoc
6
+ module Inline
7
+ def refid?(x)
8
+ @refids.include? x
9
+ end
10
+
11
+ def inline_anchor(node)
12
+ case node.type
13
+ when :ref
14
+ inline_anchor_ref node
15
+ when :xref
16
+ inline_anchor_xref node
17
+ when :link
18
+ inline_anchor_link node
19
+ when :bibref
20
+ inline_anchor_bibref node
21
+ end
22
+ end
23
+
24
+ def inline_anchor_ref(node)
25
+ noko do |xml|
26
+ xml.bookmark nil, **attr_code(id: node.id)
27
+ end.join
28
+ end
29
+
30
+ def inline_anchor_xref(node)
31
+ matched = /^fn(:\s*(?<text>.*))?$/.match node.text
32
+ f = matched.nil? ? "inline" : "footnote"
33
+ c = matched.nil? ? node.text : matched[:text]
34
+ t = node.target.gsub(/^#/, "").gsub(%r{(.)(\.xml)?#.*$}, "\\1")
35
+ noko do |xml|
36
+ xml.xref **attr_code(target: t, type: f) do |x|
37
+ x << c
38
+ end
39
+ end.join
40
+ end
41
+
42
+ def inline_anchor_link(node)
43
+ contents = node.text
44
+ contents = "" if node.target.gsub(%r{^mailto:}, "") == node.text
45
+ attributes = { "target": node.target }
46
+ noko do |xml|
47
+ xml.link **attr_code(attributes) do |l|
48
+ l << contents
49
+ end
50
+ end.join
51
+ end
52
+
53
+ def inline_anchor_bibref(node)
54
+ eref_contents = node.target == node.text ? nil : node.text
55
+ eref_attributes = { id: node.target }
56
+ @refids << node.target
57
+ noko do |xml|
58
+ xml.ref **attr_code(eref_attributes) do |r|
59
+ r << eref_contents
60
+ end
61
+ end.join
62
+ end
63
+
64
+ def inline_callout(node)
65
+ noko do |xml|
66
+ xml.callout node.text
67
+ end.join
68
+ end
69
+
70
+ def inline_footnote(node)
71
+ noko do |xml|
72
+ @fn_number += 1
73
+ xml.fn **{ reference: @fn_number } do |fn|
74
+ fn.p { |p| p << node.text }
75
+ end
76
+ end.join("\n")
77
+ end
78
+
79
+ def inline_break(node)
80
+ noko do |xml|
81
+ xml << node.text
82
+ xml.br
83
+ end.join("\n")
84
+ end
85
+
86
+ def page_break(_node)
87
+ noko { |xml| xml.pagebreak }.join("\n")
88
+ end
89
+
90
+ def thematic_break(_node)
91
+ noko { |xml| xml.hr }.join("\n")
92
+ end
93
+
94
+ def stem_parse(text, xml)
95
+ if /&lt;([^:>&]+:)?math(\s+[^>&]+)?&gt; |
96
+ <([^:>&]+:)?math(\s+[^>&]+)?>/x.match text
97
+ math = HTMLEntities.new.encode(text, :basic, :hexadecimal).
98
+ gsub(/&amp;gt;/, ">").gsub(/\&amp;lt;/, "<").gsub(/&amp;amp;/, "&").
99
+ gsub(/&gt;/, ">").gsub(/&lt;/, "<").gsub(/&amp;/, "&")
100
+ xml.stem math, **{ type: "MathML" }
101
+ else
102
+ xml.stem text, **{ type: "AsciiMath" }
103
+ end
104
+ end
105
+
106
+ def inline_quoted(node)
107
+ noko do |xml|
108
+ case node.type
109
+ when :emphasis then xml.em node.text
110
+ when :strong then xml.strong node.text
111
+ when :monospaced then xml.tt node.text
112
+ when :double then xml << "\"#{node.text}\""
113
+ when :single then xml << "'#{node.text}'"
114
+ when :superscript then xml.sup node.text
115
+ when :subscript then xml.sub node.text
116
+ when :asciimath then stem_parse(node.text, xml)
117
+ else
118
+ case node.role
119
+ # the following three are legacy, they are now handled by macros
120
+ when "alt" then xml.admitted { |a| a << node.text }
121
+ when "deprecated" then xml.deprecates { |a| a << node.text }
122
+ when "domain" then xml.domain { |a| a << node.text }
123
+
124
+ when "strike" then xml.strike node.text
125
+ when "smallcap" then xml.smallcap node.text
126
+ else
127
+ xml << node.text
128
+ end
129
+ end
130
+ end.join
131
+ end
132
+ end
133
+ end
134
+ end