metanorma-standoc 1.11.3 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (150) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +3 -31
  3. data/.gitignore +23 -0
  4. data/Gemfile +0 -1
  5. data/lib/asciidoctor/standoc/base.rb +2 -145
  6. data/lib/asciidoctor/standoc/blocks.rb +2 -238
  7. data/lib/asciidoctor/standoc/blocks_notes.rb +2 -100
  8. data/lib/asciidoctor/standoc/cleanup.rb +2 -208
  9. data/lib/asciidoctor/standoc/cleanup_amend.rb +2 -53
  10. data/lib/asciidoctor/standoc/cleanup_block.rb +2 -172
  11. data/lib/asciidoctor/standoc/cleanup_boilerplate.rb +2 -212
  12. data/lib/asciidoctor/standoc/cleanup_footnotes.rb +2 -108
  13. data/lib/asciidoctor/standoc/cleanup_image.rb +2 -69
  14. data/lib/asciidoctor/standoc/cleanup_inline.rb +2 -189
  15. data/lib/asciidoctor/standoc/cleanup_maths.rb +2 -221
  16. data/lib/asciidoctor/standoc/cleanup_ref.rb +2 -169
  17. data/lib/asciidoctor/standoc/cleanup_ref_dl.rb +2 -103
  18. data/lib/asciidoctor/standoc/cleanup_reqt.rb +2 -110
  19. data/lib/asciidoctor/standoc/cleanup_section.rb +2 -184
  20. data/lib/asciidoctor/standoc/cleanup_section_names.rb +2 -91
  21. data/lib/asciidoctor/standoc/cleanup_symbols.rb +2 -47
  22. data/lib/asciidoctor/standoc/cleanup_table.rb +2 -67
  23. data/lib/asciidoctor/standoc/cleanup_terms.rb +2 -139
  24. data/lib/asciidoctor/standoc/cleanup_terms_designations.rb +2 -192
  25. data/lib/asciidoctor/standoc/cleanup_text.rb +2 -95
  26. data/lib/asciidoctor/standoc/cleanup_toc.rb +3 -0
  27. data/lib/asciidoctor/standoc/cleanup_xref.rb +2 -106
  28. data/lib/asciidoctor/standoc/converter.rb +2 -123
  29. data/lib/asciidoctor/standoc/datamodel/attributes_table_preprocessor.rb +2 -56
  30. data/lib/asciidoctor/standoc/datamodel/diagram_preprocessor.rb +2 -102
  31. data/lib/asciidoctor/standoc/datamodel/plantuml_renderer.rb +3 -404
  32. data/lib/asciidoctor/standoc/deprecated.rb +5 -0
  33. data/lib/asciidoctor/standoc/front.rb +2 -219
  34. data/lib/asciidoctor/standoc/front_contributor.rb +2 -191
  35. data/lib/asciidoctor/standoc/inline.rb +2 -231
  36. data/lib/asciidoctor/standoc/lists.rb +2 -119
  37. data/lib/asciidoctor/standoc/macros.rb +2 -203
  38. data/lib/asciidoctor/standoc/macros_form.rb +2 -62
  39. data/lib/asciidoctor/standoc/macros_note.rb +2 -44
  40. data/lib/asciidoctor/standoc/macros_plantuml.rb +2 -112
  41. data/lib/asciidoctor/standoc/macros_terms.rb +2 -180
  42. data/lib/asciidoctor/standoc/ref.rb +2 -251
  43. data/lib/asciidoctor/standoc/ref_sect.rb +2 -153
  44. data/lib/asciidoctor/standoc/ref_utility.rb +2 -0
  45. data/lib/asciidoctor/standoc/render.rb +2 -116
  46. data/lib/asciidoctor/standoc/reqt.rb +2 -89
  47. data/lib/asciidoctor/standoc/section.rb +2 -194
  48. data/lib/asciidoctor/standoc/table.rb +2 -84
  49. data/lib/asciidoctor/standoc/term_lookup_cleanup.rb +2 -178
  50. data/lib/asciidoctor/standoc/terms.rb +2 -153
  51. data/lib/asciidoctor/standoc/utils.rb +2 -100
  52. data/lib/asciidoctor/standoc/validate.rb +2 -157
  53. data/lib/asciidoctor/standoc/validate_section.rb +2 -54
  54. data/lib/isodoc/html/htmlstyle.css +44 -29
  55. data/lib/isodoc/html/htmlstyle.scss +17 -12
  56. data/lib/metanorma/standoc/base.rb +163 -0
  57. data/lib/{asciidoctor → metanorma}/standoc/basicdoc.rng +0 -0
  58. data/lib/{asciidoctor → metanorma}/standoc/biblio.rng +2 -2
  59. data/lib/metanorma/standoc/blocks.rb +239 -0
  60. data/lib/metanorma/standoc/blocks_notes.rb +101 -0
  61. data/lib/metanorma/standoc/cleanup.rb +157 -0
  62. data/lib/metanorma/standoc/cleanup_amend.rb +54 -0
  63. data/lib/metanorma/standoc/cleanup_block.rb +173 -0
  64. data/lib/metanorma/standoc/cleanup_boilerplate.rb +213 -0
  65. data/lib/metanorma/standoc/cleanup_footnotes.rb +109 -0
  66. data/lib/metanorma/standoc/cleanup_image.rb +70 -0
  67. data/lib/metanorma/standoc/cleanup_inline.rb +190 -0
  68. data/lib/metanorma/standoc/cleanup_maths.rb +222 -0
  69. data/lib/metanorma/standoc/cleanup_ref.rb +170 -0
  70. data/lib/metanorma/standoc/cleanup_ref_dl.rb +104 -0
  71. data/lib/metanorma/standoc/cleanup_reqt.rb +111 -0
  72. data/lib/metanorma/standoc/cleanup_section.rb +212 -0
  73. data/lib/metanorma/standoc/cleanup_section_names.rb +92 -0
  74. data/lib/metanorma/standoc/cleanup_symbols.rb +48 -0
  75. data/lib/metanorma/standoc/cleanup_table.rb +68 -0
  76. data/lib/metanorma/standoc/cleanup_terms.rb +140 -0
  77. data/lib/metanorma/standoc/cleanup_terms_designations.rb +199 -0
  78. data/lib/metanorma/standoc/cleanup_text.rb +74 -0
  79. data/lib/metanorma/standoc/cleanup_toc.rb +98 -0
  80. data/lib/metanorma/standoc/cleanup_xref.rb +107 -0
  81. data/lib/metanorma/standoc/converter.rb +126 -0
  82. data/lib/metanorma/standoc/datamodel/attributes_table_preprocessor.rb +57 -0
  83. data/lib/metanorma/standoc/datamodel/diagram_preprocessor.rb +103 -0
  84. data/lib/metanorma/standoc/datamodel/plantuml_renderer.rb +409 -0
  85. data/lib/metanorma/standoc/front.rb +224 -0
  86. data/lib/metanorma/standoc/front_contributor.rb +192 -0
  87. data/lib/metanorma/standoc/inline.rb +232 -0
  88. data/lib/{asciidoctor → metanorma}/standoc/isodoc.rng +104 -3
  89. data/lib/metanorma/standoc/lists.rb +120 -0
  90. data/lib/metanorma/standoc/macros.rb +205 -0
  91. data/lib/metanorma/standoc/macros_embed.rb +72 -0
  92. data/lib/metanorma/standoc/macros_form.rb +63 -0
  93. data/lib/metanorma/standoc/macros_note.rb +45 -0
  94. data/lib/metanorma/standoc/macros_plantuml.rb +113 -0
  95. data/lib/metanorma/standoc/macros_terms.rb +194 -0
  96. data/lib/metanorma/standoc/ref.rb +243 -0
  97. data/lib/metanorma/standoc/ref_sect.rb +153 -0
  98. data/lib/{asciidoctor/standoc/ref_date_id.rb → metanorma/standoc/ref_utility.rb} +43 -5
  99. data/lib/metanorma/standoc/render.rb +115 -0
  100. data/lib/metanorma/standoc/reqt.rb +90 -0
  101. data/lib/{asciidoctor → metanorma}/standoc/reqt.rng +0 -0
  102. data/lib/metanorma/standoc/section.rb +209 -0
  103. data/lib/metanorma/standoc/table.rb +85 -0
  104. data/lib/metanorma/standoc/term_lookup_cleanup.rb +179 -0
  105. data/lib/metanorma/standoc/terms.rb +160 -0
  106. data/lib/metanorma/standoc/utils.rb +101 -0
  107. data/lib/metanorma/standoc/validate.rb +158 -0
  108. data/lib/metanorma/standoc/validate_section.rb +55 -0
  109. data/lib/metanorma/standoc/version.rb +1 -1
  110. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/model_representation.adoc.erb +0 -0
  111. data/lib/{asciidoctor → metanorma}/standoc/views/datamodel/plantuml_representation.adoc.erb +0 -0
  112. data/lib/metanorma-standoc.rb +1 -1
  113. data/metanorma-standoc.gemspec +1 -1
  114. data/spec/assets/a1.adoc +8 -0
  115. data/spec/assets/a2.adoc +8 -0
  116. data/spec/assets/a3.adoc +9 -0
  117. data/spec/assets/a4.adoc +4 -0
  118. data/spec/{asciidoctor → metanorma}/base_spec.rb +499 -407
  119. data/spec/{asciidoctor → metanorma}/blank_spec.rb +1 -1
  120. data/spec/{asciidoctor → metanorma}/blocks_spec.rb +1 -1
  121. data/spec/{asciidoctor → metanorma}/cleanup_blocks_spec.rb +1 -1
  122. data/spec/{asciidoctor → metanorma}/cleanup_sections_spec.rb +1 -1
  123. data/spec/{asciidoctor → metanorma}/cleanup_spec.rb +5 -5
  124. data/spec/{asciidoctor → metanorma}/cleanup_terms_spec.rb +227 -119
  125. data/spec/{asciidoctor → metanorma}/datamodel/attributes_table_preprocessor_spec.rb +1 -1
  126. data/spec/{asciidoctor → metanorma}/datamodel/diagram_preprocessor_spec.rb +1 -1
  127. data/spec/{asciidoctor → metanorma}/inline_spec.rb +170 -1
  128. data/spec/{asciidoctor → metanorma}/isobib_cache_spec.rb +1 -1
  129. data/spec/{asciidoctor → metanorma}/lists_spec.rb +1 -1
  130. data/spec/{asciidoctor → metanorma}/macros_json2text_spec.rb +0 -0
  131. data/spec/{asciidoctor → metanorma}/macros_plantuml_spec.rb +3 -3
  132. data/spec/{asciidoctor → metanorma}/macros_spec.rb +97 -6
  133. data/spec/{asciidoctor → metanorma}/macros_yaml2text_spec.rb +0 -0
  134. data/spec/metanorma/refs_dl_spec.rb +863 -0
  135. data/spec/{asciidoctor → metanorma}/refs_spec.rb +522 -15
  136. data/spec/{asciidoctor → metanorma}/section_spec.rb +59 -1
  137. data/spec/{asciidoctor → metanorma}/table_spec.rb +1 -1
  138. data/spec/{asciidoctor → metanorma}/validate_spec.rb +2 -2
  139. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec.yml +46 -46
  140. data/spec/vcr_cassettes/dated_iso_ref_joint_iso_iec1.yml +12 -12
  141. data/spec/vcr_cassettes/hide_refs.yml +599 -0
  142. data/spec/vcr_cassettes/isobib_get_123.yml +12 -12
  143. data/spec/vcr_cassettes/isobib_get_123_1.yml +24 -24
  144. data/spec/vcr_cassettes/isobib_get_123_1_fr.yml +35 -35
  145. data/spec/vcr_cassettes/isobib_get_123_2001.yml +13 -13
  146. data/spec/vcr_cassettes/isobib_get_124.yml +10 -10
  147. data/spec/vcr_cassettes/rfcbib_get_rfc8341.yml +18 -18
  148. data/spec/vcr_cassettes/separates_iev_citations_by_top_level_clause.yml +46 -46
  149. metadata +88 -32
  150. data/spec/asciidoctor/refs_dl_spec.rb +0 -864
@@ -0,0 +1,48 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ # Indices sort after letter but before any following
5
+ # letter (x, x_m, x_1, xa); we use colon to force that sort order.
6
+ # Numbers sort *after* letters; we use thorn to force that sort order.
7
+ def symbol_key(sym)
8
+ key = sym.dup
9
+ key.traverse do |n|
10
+ n.name == "math" and
11
+ n.replace(grkletters(MathML2AsciiMath.m2a(n.to_xml)))
12
+ end
13
+ ret = Nokogiri::XML(key.to_xml)
14
+ HTMLEntities.new.decode(ret.text.downcase)
15
+ .gsub(/[\[\]{}<>()]/, "").gsub(/\s/m, "")
16
+ .gsub(/[[:punct:]]|[_^]/, ":\\0").gsub(/`/, "")
17
+ .gsub(/[0-9]+/, "þ\\0")
18
+ end
19
+
20
+ def grkletters(text)
21
+ text.gsub(/\b(alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|
22
+ lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|
23
+ psi|omega)\b/xi, "&\\1;")
24
+ end
25
+
26
+ def extract_symbols_list(dlist)
27
+ dl_out = []
28
+ dlist.xpath("./dt | ./dd").each do |dtd|
29
+ if dtd.name == "dt"
30
+ dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
31
+ else
32
+ dl_out.last[:dd] = dtd.remove
33
+ end
34
+ end
35
+ dl_out
36
+ end
37
+
38
+ def symbols_cleanup(docxml)
39
+ docxml.xpath("//definitions/dl").each do |dl|
40
+ dl_out = extract_symbols_list(dl)
41
+ dl_out.sort! { |a, b| a[:key] <=> b[:key] || a[:dt] <=> b[:dt] }
42
+ dl.children = dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n")
43
+ end
44
+ docxml
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,68 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ def dl1_table_cleanup(xmldoc)
5
+ q = "//table/following-sibling::*[1][self::dl]"
6
+ xmldoc.xpath(q).each do |s|
7
+ s["key"] == "true" and s.previous_element << s.remove
8
+ end
9
+ end
10
+
11
+ # move Key dl after table footer
12
+ def dl2_table_cleanup(xmldoc)
13
+ q = "//table/following-sibling::*[1][self::p]"
14
+ xmldoc.xpath(q).each do |s|
15
+ if s.text =~ /^\s*key[^a-z]*$/i && s&.next_element&.name == "dl"
16
+ s.next_element["key"] = "true"
17
+ s.previous_element << s.next_element.remove
18
+ s.remove
19
+ end
20
+ end
21
+ end
22
+
23
+ def insert_thead(table)
24
+ thead = table.at("./thead")
25
+ return thead unless thead.nil?
26
+
27
+ if tname = table.at("./name")
28
+ thead = tname.add_next_sibling("<thead/>").first
29
+ return thead
30
+ end
31
+ table.children.first.add_previous_sibling("<thead/>").first
32
+ end
33
+
34
+ def header_rows_cleanup(xmldoc)
35
+ xmldoc.xpath("//table[@headerrows]").each do |s|
36
+ thead = insert_thead(s)
37
+ (thead.xpath("./tr").size...s["headerrows"].to_i).each do
38
+ row = s.at("./tbody/tr")
39
+ row.parent = thead
40
+ end
41
+ thead.xpath(".//td").each { |n| n.name = "th" }
42
+ s.delete("headerrows")
43
+ end
44
+ end
45
+
46
+ def table_cleanup(xmldoc)
47
+ dl1_table_cleanup(xmldoc)
48
+ dl2_table_cleanup(xmldoc)
49
+ notes_table_cleanup(xmldoc)
50
+ header_rows_cleanup(xmldoc)
51
+ end
52
+
53
+ # move notes into table
54
+ def notes_table_cleanup(xmldoc)
55
+ nomatches = false
56
+ until nomatches
57
+ nomatches = true
58
+ xmldoc.xpath("//table/following-sibling::*[1]"\
59
+ "[self::note[not(@keep-separate = 'true')]]").each do |n|
60
+ n.delete("keep-separate")
61
+ n.previous_element << n.remove
62
+ nomatches = false
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,140 @@
1
+ require_relative "term_lookup_cleanup"
2
+ require_relative "cleanup_terms_designations"
3
+
4
+ module Metanorma
5
+ module Standoc
6
+ module Cleanup
7
+ def termdomain_cleanup(xmldoc)
8
+ xmldoc.xpath("//p/domain").each do |a|
9
+ parent = a.parent
10
+ prev = parent.previous
11
+ prev.next = a.remove
12
+ parent.text.strip.empty? and parent.remove
13
+ end
14
+ end
15
+
16
+ def termdomain1_cleanup(xmldoc)
17
+ xmldoc.xpath("//term").each do |t|
18
+ d = t.xpath("./domain | ./subject").last or next
19
+ defn = d.at("../definition") and defn.previous = d.remove
20
+ end
21
+ end
22
+
23
+ def termdefinition_cleanup(xmldoc)
24
+ generate_termdefinitions(xmldoc)
25
+ split_termdefinitions(xmldoc)
26
+ alternate_termdefinitions(xmldoc)
27
+ end
28
+
29
+ TERMDEF_BLOCKS =
30
+ "./p | ./ol | ./dl[not(@metadata = 'true')] | ./ul | ./figure | "\
31
+ "./formula | ./table".freeze
32
+
33
+ def generate_termdefinitions(xmldoc)
34
+ xmldoc.xpath("//term[not(definition)]").each do |d|
35
+ first_child = d.at(TERMDEF_BLOCKS) || next
36
+ t = Nokogiri::XML::Element.new("definition", xmldoc)
37
+ first_child.replace(t)
38
+ t << first_child.remove
39
+ d.xpath(TERMDEF_BLOCKS).each do |n|
40
+ t << n.remove
41
+ end
42
+ end
43
+ end
44
+
45
+ def split_termdefinitions(xmldoc)
46
+ xmldoc.xpath("//definition").each do |d|
47
+ if d.at("./p | ./ol | ./dl | ./ul")
48
+ d.children = "<verbal-definition>#{d.children}</verbal-definition>"
49
+ else
50
+ d.children = "<non-verbal-representation>"\
51
+ "#{d.children}</non-verbal-representation>"
52
+ end
53
+ end
54
+ end
55
+
56
+ def alternate_termdefinitions(xmldoc)
57
+ xmldoc.xpath("//term").each do |t|
58
+ t.xpath("./definition").each do |d|
59
+ d1 = d.next_element or next
60
+ if (v = d.at("./verbal-definition")) &&
61
+ !d.at("./non-verbal-representation") &&
62
+ !d1.at("./verbal-definition") &&
63
+ nv = d1.at("./non-verbal-representation")
64
+ v.next = nv.remove
65
+ d1.remove
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ def termdocsource_cleanup(xmldoc)
72
+ f = xmldoc.at("//preface | //sections")
73
+ xmldoc.xpath("//termdocsource").each { |s| f.previous = s.remove }
74
+ end
75
+
76
+ def term_children_cleanup(xmldoc)
77
+ xmldoc.xpath("//terms[terms]").each { |t| t.name = "clause" }
78
+ xmldoc.xpath("//term").each do |t|
79
+ %w(termnote termexample termsource term).each do |w|
80
+ t.xpath("./#{w}").each { |n| t << n.remove }
81
+ end
82
+ end
83
+ end
84
+
85
+ def termdef_from_termbase(xmldoc)
86
+ xmldoc.xpath("//term").each do |x|
87
+ if (c = x.at("./origin/termref")) && !x.at("./definition")
88
+ x.at("./origin").previous = fetch_termbase(c["base"], c.text)
89
+ end
90
+ end
91
+ end
92
+
93
+ def termnote_example_cleanup(xmldoc)
94
+ %w(note example).each do |w|
95
+ xmldoc.xpath("//term#{w}[not(ancestor::term)]").each do |x|
96
+ x.name = w
97
+ end
98
+ end
99
+ end
100
+
101
+ def termdef_cleanup(xmldoc)
102
+ termdef_unnest_cleanup(xmldoc)
103
+ Metanorma::Standoc::TermLookupCleanup.new(xmldoc, @log).call
104
+ term_nonverbal_designations(xmldoc)
105
+ term_dl_to_metadata(xmldoc)
106
+ term_termsource_to_designation(xmldoc)
107
+ term_designation_reorder(xmldoc)
108
+ termdef_from_termbase(xmldoc)
109
+ termdomain_cleanup(xmldoc)
110
+ termdef_stem_cleanup(xmldoc)
111
+ termdefinition_cleanup(xmldoc)
112
+ termdomain1_cleanup(xmldoc)
113
+ termnote_example_cleanup(xmldoc)
114
+ term_children_cleanup(xmldoc)
115
+ termdocsource_cleanup(xmldoc)
116
+ end
117
+
118
+ def index_cleanup(xmldoc)
119
+ return unless @index_terms
120
+
121
+ xmldoc.xpath("//preferred").each do |p|
122
+ index_cleanup1(p.at("./expression/name | ./letter-symbol/name"),
123
+ p.xpath("./field-of-application | ./usage-info")
124
+ &.map(&:text)&.join(", "))
125
+ end
126
+ xmldoc.xpath("//definitions/dl/dt").each do |p|
127
+ index_cleanup1(p, "")
128
+ end
129
+ end
130
+
131
+ def index_cleanup1(term, fieldofappl)
132
+ return unless term
133
+
134
+ idx = term.children.dup
135
+ fieldofappl.empty? or idx << ", &#x3c;#{fieldofappl}&#x3e;"
136
+ term << "<index><primary>#{idx.to_xml}</primary></index>"
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,199 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ def termdef_stem_cleanup(xmldoc)
5
+ termdef_stem2admitted(xmldoc)
6
+ xmldoc.xpath("//term//expression/name[stem]").each do |n|
7
+ test = n.dup
8
+ test.at("./stem").remove
9
+ next unless test.text.strip.empty?
10
+
11
+ n.parent.name = "letter-symbol"
12
+ end
13
+ end
14
+
15
+ def termdef_stem2admitted(xmldoc)
16
+ xmldoc.xpath("//term/p/stem").each do |a|
17
+ if initial_formula(a.parent)
18
+ parent = a.parent
19
+ parent.replace("<admitted>#{term_expr(a.to_xml)}</admitted>")
20
+ end
21
+ end
22
+ xmldoc.xpath("//term/formula").each do |a|
23
+ initial_formula(a) and
24
+ a.replace("<admitted>#{term_expr(a.children.to_xml)}</admitted>")
25
+ end
26
+ end
27
+
28
+ def initial_formula(elem)
29
+ elem.elements.size == 1 && # para contains just stem expression
30
+ !elem.at("./preceding-sibling::p | ./preceding-sibling::dl | "\
31
+ "./preceding-sibling::ol | ./preceding-sibling::ul")
32
+ end
33
+
34
+ # release termdef tags from surrounding paras
35
+ def termdef_unnest_cleanup(xmldoc)
36
+ desgn = "//p/admitted | //p/deprecates | //p/preferred | //p//related"
37
+ nodes = xmldoc.xpath(desgn)
38
+ while !nodes.empty?
39
+ nodes[0].parent.replace(nodes[0].parent.children)
40
+ nodes = xmldoc.xpath(desgn)
41
+ end
42
+ end
43
+
44
+ def term_dl_to_metadata(xmldoc)
45
+ xmldoc.xpath("//term[dl[@metadata = 'true']]").each do |t|
46
+ t.xpath("./dl[@metadata = 'true']").each do |dl|
47
+ prev = related2pref(dl_to_designation(dl)) or next
48
+ term_dl_to_designation_metadata(prev, dl)
49
+ term_dl_to_term_metadata(prev, dl)
50
+ term_dl_to_expression_metadata(prev, dl)
51
+ dl.remove
52
+ end
53
+ end
54
+ end
55
+
56
+ def term_dl_to_term_metadata(prev, dlist)
57
+ return unless prev.name == "preferred" &&
58
+ prev.at("./preceding-sibling::preferred").nil?
59
+
60
+ ins = term_element_insert_point(prev)
61
+ %w(domain subject).each do |a|
62
+ ins = dl_to_elems(ins, prev.parent, dlist, a)
63
+ end
64
+ end
65
+
66
+ def term_dl_to_designation_metadata(prev, dlist)
67
+ %w(absent geographic-area).each do |a|
68
+ dl_to_attrs(prev, dlist, a)
69
+ end
70
+ %w(field-of-application usage-info).reverse.each do |a|
71
+ dl_to_elems(prev.at("./expression"), prev, dlist, a)
72
+ end
73
+ end
74
+
75
+ def term_element_insert_point(prev)
76
+ ins = prev
77
+ while %w(preferred admitted deprecates related domain dl)
78
+ .include? ins&.next_element&.name
79
+ ins = ins.next_element
80
+ end
81
+ ins
82
+ end
83
+
84
+ def term_dl_to_expression_metadata(prev, dlist)
85
+ term_dl_to_expression_root_metadata(prev, dlist)
86
+ term_dl_to_expression_name_metadata(prev, dlist)
87
+ term_to_letter_symbol(prev, dlist)
88
+ end
89
+
90
+ def term_dl_to_expression_root_metadata(prev, dlist)
91
+ %w(isInternational).each do |a|
92
+ p = prev.at("./expression | ./letter-symbol | ./graphical-symbol")
93
+ dl_to_attrs(p, dlist, a)
94
+ end
95
+ %w(language script type).each do |a|
96
+ p = prev.at("./expression") or next
97
+ dl_to_attrs(p, dlist, a)
98
+ end
99
+ end
100
+
101
+ def term_dl_to_expression_name_metadata(prev, dlist)
102
+ %w(abbreviation-type pronunciation).reverse.each do |a|
103
+ dl_to_elems(prev.at("./expression/name"), prev, dlist, a)
104
+ end
105
+ g = dlist.at("./dt[text()='grammar']/following::dd//dl") and
106
+ term_dl_to_expression_grammar(prev, g)
107
+ end
108
+
109
+ def term_dl_to_expression_grammar(prev, dlist)
110
+ prev.at(".//expression") or return
111
+ prev.at(".//expression") << "<grammar><sentinel/></grammar>"
112
+ %w(gender number isPreposition isParticiple isAdjective isAdverb isNoun
113
+ grammar-value).reverse.each do |a|
114
+ dl_to_elems(prev.at(".//expression/grammar/*"), prev.elements.last,
115
+ dlist, a)
116
+ end
117
+ term_dl_to_designation_category(prev, "gender")
118
+ term_dl_to_designation_category(prev, "number")
119
+ prev.at(".//expression/grammar/sentinel").remove
120
+ end
121
+
122
+ def term_dl_to_designation_category(prev, category)
123
+ cat = prev.at(".//expression/grammar/#{category}")
124
+ /,/.match?(cat&.text) and
125
+ cat.replace(cat.text.split(/,\s*/)
126
+ .map { |x| "<#{category}>#{x}</#{category}>" }.join)
127
+ end
128
+
129
+ def term_to_letter_symbol(prev, dlist)
130
+ ls = dlist.at("./dt[text()='letter-symbol']/following::dd/p")
131
+ return unless ls&.text == "true"
132
+
133
+ prev.at(".//expression").name = "letter-symbol"
134
+ end
135
+
136
+ def dl_to_designation(dlist)
137
+ prev = dlist.previous_element
138
+ unless %w(preferred admitted deprecates related).include? prev&.name
139
+ @log.add("AsciiDoc Input", dlist, "Metadata definition list does "\
140
+ "not follow a term designation")
141
+ return nil
142
+ end
143
+ prev
144
+ end
145
+
146
+ def term_nonverbal_designations(xmldoc)
147
+ xmldoc.xpath("//term/preferred | //term/admitted | //term/deprecates")
148
+ .each do |d|
149
+ d.text.strip.empty? or next
150
+ n = d.next_element
151
+ if %w(formula figure).include?(n&.name)
152
+ term_nonverbal_designations1(d, n)
153
+ else d.at("./expression/name") or
154
+ d.children = term_expr("")
155
+ end
156
+ end
157
+ end
158
+
159
+ def term_nonverbal_designations1(desgn, elem)
160
+ desgn = related2pref(desgn)
161
+ if elem.name == "figure"
162
+ elem.at("./name").remove
163
+ desgn.children =
164
+ "<graphical-symbol>#{elem.remove.to_xml}</graphical-symbol>"
165
+ else
166
+ desgn.children = term_expr(elem.at("./stem").to_xml)
167
+ elem.remove
168
+ end
169
+ end
170
+
171
+ def term_termsource_to_designation(xmldoc)
172
+ xmldoc.xpath("//term/termsource").each do |t|
173
+ p = t.previous_element
174
+ while %w(domain subject).include? p&.name
175
+ p = p.previous_element
176
+ end
177
+ %w(preferred admitted deprecates related).include?(p&.name) or
178
+ next
179
+ related2pref(p) << t.remove
180
+ end
181
+ end
182
+
183
+ def term_designation_reorder(xmldoc)
184
+ xmldoc.xpath("//term").each do |t|
185
+ %w(preferred admitted deprecates related)
186
+ .each_with_object([]) do |tag, m|
187
+ t.xpath("./#{tag}").each { |x| m << x.remove }
188
+ end.reverse.each do |x|
189
+ t.children.first.previous = x
190
+ end
191
+ end
192
+ end
193
+
194
+ def related2pref(elem)
195
+ elem&.name == "related" ? elem = elem.at("./preferred") : elem
196
+ end
197
+ end
198
+ end
199
+ end
@@ -0,0 +1,74 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ def textcleanup(result)
5
+ text = result.flatten.map { |l| l.sub(/\s*$/, "") } * "\n"
6
+ !@keepasciimath and text = asciimath2mathml(text)
7
+ text = text.gsub(/\s+<fn /, "<fn ")
8
+ text.gsub(%r{<passthrough\s+formats="metanorma">([^<]*)
9
+ </passthrough>}mx) { HTMLEntities.new.decode($1) }
10
+ end
11
+
12
+ IGNORE_DUMBQUOTES =
13
+ "//pre | //pre//* | //tt | //tt//* | "\
14
+ "//sourcecode | //sourcecode//* | //bibdata//* | //stem | "\
15
+ "//stem//* | //figure[@class = 'pseudocode'] | "\
16
+ "//figure[@class = 'pseudocode']//*".freeze
17
+
18
+ def smartquotes_cleanup(xmldoc)
19
+ xmldoc.xpath("//date").each { |d| Metanorma::Utils::endash_date(d) }
20
+ if @smartquotes then smartquotes_cleanup1(xmldoc)
21
+ else dumbquote_cleanup(xmldoc)
22
+ end
23
+ end
24
+
25
+ def smartquotes_cleanup1(xmldoc)
26
+ uninterrupt_quotes_around_xml(xmldoc)
27
+ dumb2smart_quotes(xmldoc)
28
+ end
29
+
30
+ # "abc<tag/>", def => "abc",<tag/> def
31
+ def uninterrupt_quotes_around_xml(xmldoc)
32
+ xmldoc.traverse do |n|
33
+ next unless n.text? && n&.previous&.element?
34
+ next unless /^['"]/.match?(n.text)
35
+ next unless n.previous.ancestors("pre, tt, sourcecode, stem, figure")
36
+ .empty?
37
+
38
+ uninterrupt_quotes_around_xml1(n.previous)
39
+ end
40
+ end
41
+
42
+ def uninterrupt_quotes_around_xml1(elem)
43
+ prev = elem.at(".//preceding::text()[1]") or return
44
+ /\S$/.match?(prev.text) or return
45
+ foll = elem.at(".//following::text()[1]")
46
+ m = /^(["'][[:punct:]]*)(\s|$)/
47
+ .match(HTMLEntities.new.decode(foll&.text)) or return
48
+ foll.content = foll.text.sub(/^(["'][[:punct:]]*)/, "")
49
+ prev.content = "#{prev.text}#{m[1]}"
50
+ end
51
+
52
+ def dumb2smart_quotes(xmldoc)
53
+ (xmldoc.xpath("//*[child::text()]") - xmldoc.xpath(IGNORE_DUMBQUOTES))
54
+ .each do |x|
55
+ x.children.each do |n|
56
+ next unless n.text?
57
+
58
+ /[-'"(<>]|\.\.|\dx/.match(n) or next
59
+
60
+ n.replace(Metanorma::Utils::smartformat(n.text))
61
+ end
62
+ end
63
+ end
64
+
65
+ def dumbquote_cleanup(xmldoc)
66
+ xmldoc.traverse do |n|
67
+ next unless n.text?
68
+
69
+ n.replace(n.text.gsub(/(?<=\p{Alnum})\u2019(?=\p{Alpha})/, "'")) # .
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,98 @@
1
+ module Metanorma
2
+ module Standoc
3
+ module Cleanup
4
+ def toc_cleanup(xmldoc)
5
+ toc_cleanup_para(xmldoc)
6
+ xmldoc.xpath("//toc").each { |t| toc_cleanup1(t, xmldoc) }
7
+ toc_cleanup_clause(xmldoc)
8
+ toc_metadata(xmldoc)
9
+ end
10
+
11
+ def toc_cleanup_para(xmldoc)
12
+ xmldoc.xpath("//p[toc]").each do |x|
13
+ x.xpath("./toc").reverse.each do |t|
14
+ x.next = t
15
+ end
16
+ x.remove if x.text.strip.empty?
17
+ end
18
+ end
19
+
20
+ def toc_index(toc, xmldoc)
21
+ depths = toc_index_depths(toc)
22
+ depths.keys.each_with_object([]) do |key, arr|
23
+ xmldoc.xpath(key).each do |x|
24
+ arr << toc_index1(key, x, depths)
25
+ end
26
+ end.sort_by { |a| a[:line] }
27
+ end
28
+
29
+ def toc_index1(key, entry, depths)
30
+ t = entry.at("./following-sibling::variant-title[@type = 'toc']") and
31
+ entry = t
32
+ { text: entry.children.to_xml, depth: depths[key].to_i,
33
+ target: entry.xpath("(./ancestor-or-self::*/@id)[last()]")[0].text,
34
+ line: entry.line }
35
+ end
36
+
37
+ def toc_index_depths(toc)
38
+ toc.xpath("./toc-xpath").each_with_object({}) do |x, m|
39
+ m[x.text] = x["depth"]
40
+ end
41
+ end
42
+
43
+ def toc_cleanup1(toc, xmldoc)
44
+ depth = 1
45
+ ret = ""
46
+ toc_index(toc, xmldoc).each do |x|
47
+ ret = toc_cleanup1_entry(x, depth, ret)
48
+ depth = x[:depth]
49
+ end
50
+ toc.children = "<ul>#{ret}</ul>"
51
+ end
52
+
53
+ def toc_cleanup1_entry(entry, depth, ret)
54
+ if depth > entry[:depth]
55
+ ret += "</ul></li>" * (depth - entry[:depth])
56
+ elsif depth < entry[:depth]
57
+ ret += "<li><ul>" * (entry[:depth] - depth)
58
+ end
59
+ ret + "<li><xref target='#{entry[:target]}'>#{entry[:text]}</xref></li>"
60
+ end
61
+
62
+ def toc_cleanup_clause(xmldoc)
63
+ xmldoc
64
+ .xpath("//clause[@type = 'toc'] | //annex[@type = 'toc']").each do |c|
65
+ c.xpath(".//ul[not(ancestor::ul)]").each do |ul|
66
+ toc_cleanup_clause_entry(xmldoc, ul)
67
+ ul.replace("<toc>#{ul.to_xml}</toc>")
68
+ end
69
+ end
70
+ end
71
+
72
+ def toc_cleanup_clause_entry(xmldoc, list)
73
+ list.xpath(".//xref[not(text())]").each do |x|
74
+ c1 = xmldoc.at("//*[@id = '#{x['target']}']")
75
+ t = c1.at("./variant-title[@type = 'toc']") || c1.at("./title")
76
+ x << t.dup.children
77
+ end
78
+ end
79
+
80
+ def toc_metadata(xmldoc)
81
+ return unless @htmltoclevels || @doctoclevels || @toclevels
82
+
83
+ ins = xmldoc.at("//misc-container") ||
84
+ xmldoc.at("//bibdata").after("<misc-container/>").next_element
85
+ toc_metadata1(ins)
86
+ end
87
+
88
+ def toc_metadata1(ins)
89
+ [[@toclevels, "TOC Heading Levels"],
90
+ [@toclevels, "TOC Heading Levels"],
91
+ [@toclevels, "TOC Heading Levels"]].each do |n|
92
+ n[0] and ins << "<presentation-metadata><name>#{n[1]}</name>"\
93
+ "<value>#{n[0]}</value></presentation-metadata>"
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end