stepmod-utils 0.1.7 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +42 -0
  3. data/.github/workflows/release.yml +23 -20
  4. data/Gemfile +2 -0
  5. data/Makefile +5 -0
  6. data/README.adoc +136 -2
  7. data/exe/stepmod-annotate-all +39 -0
  8. data/exe/stepmod-build-resource-docs-cache +19 -0
  9. data/exe/stepmod-convert-express-description +33 -0
  10. data/exe/stepmod-convert-express-resource +33 -0
  11. data/exe/stepmod-extract-terms +56 -13
  12. data/exe/stepmod-find-express-files +23 -0
  13. data/lib/stepmod/utils/cleaner.rb +11 -0
  14. data/lib/stepmod/utils/concept.rb +16 -3
  15. data/lib/stepmod/utils/converters/a.rb +47 -0
  16. data/lib/stepmod/utils/converters/blockquote.rb +22 -0
  17. data/lib/stepmod/utils/converters/br.rb +15 -0
  18. data/lib/stepmod/utils/converters/bypass.rb +81 -0
  19. data/lib/stepmod/utils/converters/code.rb +19 -0
  20. data/lib/stepmod/utils/converters/comment.rb +16 -0
  21. data/lib/stepmod/utils/converters/dd.rb +15 -0
  22. data/lib/stepmod/utils/converters/def.rb +11 -4
  23. data/lib/stepmod/utils/converters/dl.rb +31 -0
  24. data/lib/stepmod/utils/converters/drop.rb +22 -0
  25. data/lib/stepmod/utils/converters/dt.rb +17 -0
  26. data/lib/stepmod/utils/converters/em_express_description.rb +22 -0
  27. data/lib/stepmod/utils/converters/eqn.rb +97 -0
  28. data/lib/stepmod/utils/converters/example.rb +1 -6
  29. data/lib/stepmod/utils/converters/express_g.rb +49 -0
  30. data/lib/stepmod/utils/converters/express_ref_express_description.rb +13 -0
  31. data/lib/stepmod/utils/converters/ext_description.rb +17 -0
  32. data/lib/stepmod/utils/converters/ext_descriptions.rb +14 -0
  33. data/lib/stepmod/utils/converters/fund_cons.rb +21 -0
  34. data/lib/stepmod/utils/converters/head.rb +22 -0
  35. data/lib/stepmod/utils/converters/hr.rb +15 -0
  36. data/lib/stepmod/utils/converters/ignore.rb +16 -0
  37. data/lib/stepmod/utils/converters/introduction.rb +15 -0
  38. data/lib/stepmod/utils/converters/note.rb +1 -6
  39. data/lib/stepmod/utils/converters/ol.rb +3 -2
  40. data/lib/stepmod/utils/converters/p.rb +21 -0
  41. data/lib/stepmod/utils/converters/pass_through.rb +13 -0
  42. data/lib/stepmod/utils/converters/q.rb +16 -0
  43. data/lib/stepmod/utils/converters/resource.rb +14 -0
  44. data/lib/stepmod/utils/converters/schema.rb +19 -0
  45. data/lib/stepmod/utils/converters/schema_diag.rb +14 -0
  46. data/lib/stepmod/utils/converters/strong.rb +41 -0
  47. data/lib/stepmod/utils/converters/sub.rb +24 -0
  48. data/lib/stepmod/utils/converters/sup.rb +22 -0
  49. data/lib/stepmod/utils/converters/table.rb +62 -0
  50. data/lib/stepmod/utils/converters/text.rb +68 -0
  51. data/lib/stepmod/utils/html_to_asciimath.rb +157 -0
  52. data/lib/stepmod/utils/smrl_description_converter.rb +49 -0
  53. data/lib/stepmod/utils/smrl_resource_converter.rb +67 -0
  54. data/lib/stepmod/utils/stepmod_file_annotator.rb +78 -0
  55. data/lib/stepmod/utils/version.rb +1 -1
  56. data/migrating_from_cvs.adoc +190 -0
  57. data/stepmod-utils.gemspec +2 -0
  58. metadata +84 -9
  59. data/.github/workflows/macos.yml +0 -39
  60. data/.github/workflows/ubuntu.yml +0 -53
  61. data/.github/workflows/windows.yml +0 -41
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stepmod
4
+ module Utils
5
+ module Converters
6
+ class Table < ReverseAdoc::Converters::Base
7
+ def convert(node, state = {})
8
+ id = node['id']
9
+ anchor = id ? "[[#{id}]]\n" : ""
10
+ title = node['caption'].to_s
11
+ title = ".#{title}\n" unless title.empty?
12
+ attrs = style(node)
13
+ "\n\n#{anchor}#{attrs}#{title}|===\n#{treat_children(node, state)}\n|===\n"
14
+ end
15
+
16
+ def frame(node)
17
+ case node["frame"]
18
+ when "void"
19
+ "frame=none"
20
+ when "hsides"
21
+ "frame=topbot"
22
+ when "vsides"
23
+ "frame=sides"
24
+ when "box", "border"
25
+ "frame=all"
26
+ else
27
+ nil
28
+ end
29
+ end
30
+
31
+ def rules(node)
32
+ case node["rules"]
33
+ when "all"
34
+ "rules=all"
35
+ when "rows"
36
+ "rules=rows"
37
+ when "cols"
38
+ "rules=cols"
39
+ when "none"
40
+ "rules=none"
41
+ else
42
+ nil
43
+ end
44
+ end
45
+
46
+ def style(node)
47
+ width = "width=#{node['width']}" if node['width']
48
+ attrs = []
49
+ frame_attr = frame(node)
50
+ rules_attr = rules(node)
51
+ attrs += width if width
52
+ attrs += frame_attr if frame_attr
53
+ attrs += rules_attr if rules_attr
54
+ return "" if attrs.empty?
55
+ "[#{attrs.join(',')}]\n"
56
+ end
57
+ end
58
+
59
+ ReverseAdoc::Converters.register :table, Table.new
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Stepmod
4
+ module Utils
5
+ module Converters
6
+ class Text < ReverseAdoc::Converters::Base
7
+ def convert(node, state = {})
8
+ if node.text.strip.empty?
9
+ treat_empty(node, state)
10
+ else
11
+ treat_text(node)
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def treat_empty(node, state)
18
+ parent = node.parent.name.to_sym
19
+ if [:ol, :ul].include?(parent) # Otherwise the identation is broken
20
+ ''
21
+ elsif state[:tdsinglepara]
22
+ ''
23
+ elsif node.text == ' ' # Regular whitespace text node
24
+ ' '
25
+ else
26
+ ''
27
+ end
28
+ end
29
+
30
+ def treat_text(node)
31
+ text = node.text
32
+ text = preserve_nbsp(text)
33
+ # text = remove_border_newlines(text)
34
+ text = remove_inner_newlines(text)
35
+
36
+ text = preserve_keychars_within_backticks(text)
37
+ text = preserve_tags(text)
38
+
39
+ text
40
+ end
41
+
42
+ def preserve_nbsp(text)
43
+ text.gsub(/\u00A0/, "&nbsp;")
44
+ end
45
+
46
+ def preserve_tags(text)
47
+ text.gsub(/[<>]/, '>' => '\>', '<' => '\<')
48
+ end
49
+
50
+ def remove_border_newlines(text)
51
+ text.gsub(/\A\n+/, '').gsub(/\n+\z/, '')
52
+ end
53
+
54
+ def remove_inner_newlines(text)
55
+ text.tr("\n\t", ' ').squeeze(' ')
56
+ end
57
+
58
+ def preserve_keychars_within_backticks(text)
59
+ text.gsub(/`.*?`/) do |match|
60
+ match.gsub('\_', '_').gsub('\*', '*')
61
+ end
62
+ end
63
+ end
64
+
65
+ ReverseAdoc::Converters.register :text, Text.new
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,157 @@
1
+ module Stepmod
2
+ module Utils
3
+ class HtmlToAsciimath
4
+ def call(input)
5
+ return input if input.nil? || input.empty?
6
+
7
+ to_asciimath = Nokogiri::HTML.fragment(input, "UTF-8")
8
+
9
+ to_asciimath.css('i').each do |math_element|
10
+ # puts "HTML MATH!! #{math_element.to_xml}"
11
+ # puts "HTML MATH!! #{math_element.text}"
12
+ decoded = text_to_asciimath(math_element.text)
13
+ case decoded.length
14
+ when 1..12
15
+ # puts "(#{math_element.text} to => #{decoded})"
16
+ math_element.replace "stem:[#{decoded}]"
17
+ when 0
18
+ math_element.remove
19
+ else
20
+ math_element.replace "_#{decoded}_"
21
+ end
22
+ end
23
+
24
+ to_asciimath.css('sub').each do |math_element|
25
+ case math_element.text.length
26
+ when 0
27
+ math_element.remove
28
+ else
29
+ math_element.replace "~#{text_to_asciimath(math_element.text)}~"
30
+ end
31
+ end
32
+
33
+ to_asciimath.css('sup').each do |math_element|
34
+ case math_element.text.length
35
+ when 0
36
+ math_element.remove
37
+ else
38
+ math_element.replace "^#{text_to_asciimath(math_element.text)}^"
39
+ end
40
+ end
41
+
42
+ to_asciimath.css('ol').each do |element|
43
+ element.css('li').each do |li|
44
+ li.replace ". #{li.text}"
45
+ end
46
+ end
47
+
48
+ to_asciimath.css('ul').each do |element|
49
+ element.css('li').each do |li|
50
+ li.replace "* #{li.text}"
51
+ end
52
+ end
53
+
54
+ # Replace sans-serif font with monospace
55
+ to_asciimath.css('font[style*="sans-serif"]').each do |x|
56
+ x.replace "`#{x.text}`"
57
+ end
58
+
59
+ html_entities_to_stem(
60
+ to_asciimath.children.to_s.gsub(/\]stem:\[/, '').gsub(/<\/?[uo]l>/, '')
61
+ )
62
+ end
63
+
64
+ def text_to_asciimath(text)
65
+ html_entities_to_asciimath(text.decode_html)
66
+ end
67
+
68
+ def html_entities_to_asciimath(x)
69
+ x.gsub("&alpha;", "alpha").
70
+ gsub("&beta;", "beta").
71
+ gsub("&gamma;", "gamma").
72
+ gsub("&Gamma;", "Gamma").
73
+ gsub("&delta;", "delta").
74
+ gsub("&Delta;", "Delta").
75
+ gsub("&epsilon;", "epsilon").
76
+ gsub("&varepsilon;", "varepsilon").
77
+ gsub("&zeta;", "zeta").
78
+ gsub("&eta;", "eta").
79
+ gsub("&theta;", "theta").
80
+ gsub("&Theta;", "Theta").
81
+ gsub("&vartheta;", "vartheta").
82
+ gsub("&iota;", "iota").
83
+ gsub("&kappa;", "kappa").
84
+ gsub("&lambda;", "lambda").
85
+ gsub("&Lambda;", "Lambda").
86
+ gsub("&mu;", "mu").
87
+ gsub("&nu;", "nu").
88
+ gsub("&xi;", "xi").
89
+ gsub("&Xi;", "Xi").
90
+ gsub("&pi;", "pi").
91
+ gsub("&Pi;", "Pi").
92
+ gsub("&rho;", "rho").
93
+ gsub("&beta;", "beta").
94
+ gsub("&sigma;", "sigma").
95
+ gsub("&Sigma;", "Sigma").
96
+ gsub("&tau;", "tau").
97
+ gsub("&upsilon;", "upsilon").
98
+ gsub("&phi;", "phi").
99
+ gsub("&Phi;", "Phi").
100
+ gsub("&varphi;", "varphi").
101
+ gsub("&chi;", "chi").
102
+ gsub("&psi;", "psi").
103
+ gsub("&Psi;", "Psi").
104
+ gsub("&omega;", "omega")
105
+ gsub("&#967", "χ").
106
+ gsub("&#215", "×").
107
+ gsub("&#931", "Σ").
108
+ gsub("&#961", "ρ").
109
+ gsub("&#963", "σ").
110
+ gsub("&#955", "λ").
111
+ gsub("&#964", "τ").
112
+ gsub("&#8706", "∂").
113
+ gsub("&#8804", "≤").
114
+ gsub("&#8805", "≥")
115
+ end
116
+
117
+ def html_entities_to_stem(x)
118
+ x.gsub("&alpha;", "stem:[alpha]").
119
+ gsub("&beta;", "stem:[beta]").
120
+ gsub("&gamma;", "stem:[gamma]").
121
+ gsub("&Gamma;", "stem:[Gamma]").
122
+ gsub("&delta;", "stem:[delta]").
123
+ gsub("&Delta;", "stem:[Delta]").
124
+ gsub("&epsilon;", "stem:[epsilon]").
125
+ gsub("&varepsilon;", "stem:[varepsilon]").
126
+ gsub("&zeta;", "stem:[zeta]").
127
+ gsub("&eta;", "stem:[eta]").
128
+ gsub("&theta;", "stem:[theta]").
129
+ gsub("&Theta;", "stem:[Theta]").
130
+ gsub("&vartheta;", "stem:[vartheta]").
131
+ gsub("&iota;", "stem:[iota]").
132
+ gsub("&kappa;", "stem:[kappa]").
133
+ gsub("&lambda;", "stem:[lambda]").
134
+ gsub("&Lambda;", "stem:[Lambda]").
135
+ gsub("&mu;", "stem:[mu]").
136
+ gsub("&nu;", "stem:[nu]").
137
+ gsub("&xi;", "stem:[xi]").
138
+ gsub("&Xi;", "stem:[Xi]").
139
+ gsub("&pi;", "stem:[pi]").
140
+ gsub("&Pi;", "stem:[Pi]").
141
+ gsub("&rho;", "stem:[rho]").
142
+ gsub("&beta;", "stem:[beta]").
143
+ gsub("&sigma;", "stem:[sigma]").
144
+ gsub("&Sigma;", "stem:[Sigma]").
145
+ gsub("&tau;", "stem:[tau]").
146
+ gsub("&upsilon;", "stem:[upsilon]").
147
+ gsub("&phi;", "stem:[phi]").
148
+ gsub("&Phi;", "stem:[Phi]").
149
+ gsub("&varphi;", "stem:[varphi]").
150
+ gsub("&chi;", "stem:[chi]").
151
+ gsub("&psi;", "stem:[psi]").
152
+ gsub("&Psi;", "stem:[Psi]").
153
+ gsub("&omega;", "stem:[omega]")
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'reverse_adoc'
4
+ require 'stepmod/utils/converters/a'
5
+ require 'stepmod/utils/converters/blockquote'
6
+ require 'stepmod/utils/converters/br'
7
+ require 'stepmod/utils/converters/bypass'
8
+ require 'stepmod/utils/converters/code'
9
+ require 'stepmod/utils/converters/drop'
10
+ require 'stepmod/utils/converters/em_express_description'
11
+ require 'stepmod/utils/converters/example'
12
+ require 'stepmod/utils/converters/express_ref_express_description'
13
+ require 'stepmod/utils/converters/ext_description'
14
+ require 'stepmod/utils/converters/ext_descriptions'
15
+ require 'stepmod/utils/converters/head'
16
+ require 'stepmod/utils/converters/hr'
17
+ require 'stepmod/utils/converters/ignore'
18
+ require 'stepmod/utils/converters/note'
19
+ require 'stepmod/utils/converters/p'
20
+ require 'stepmod/utils/converters/pass_through'
21
+ require 'stepmod/utils/converters/q'
22
+ require 'stepmod/utils/converters/strong'
23
+ require 'stepmod/utils/converters/sub'
24
+ require 'stepmod/utils/converters/sup'
25
+ require 'stepmod/utils/converters/text'
26
+ require 'stepmod/utils/cleaner'
27
+
28
+ module Stepmod
29
+ module Utils
30
+ class SmrlDescriptionConverter
31
+ def self.convert(input, options = {})
32
+ root = if input.is_a?(String)
33
+ then Nokogiri::XML(input).root
34
+ elsif input.is_a?(Nokogiri::XML::Document)
35
+ then input.root
36
+ elsif input.is_a?(Nokogiri::XML::Node)
37
+ then input
38
+ end
39
+
40
+ root || (return '')
41
+
42
+ ReverseAdoc.config.with(options) do
43
+ result = ReverseAdoc::Converters.lookup(root.name).convert(root)
44
+ Stepmod::Utils::Cleaner.new.tidy(result)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'reverse_adoc'
4
+ require 'reverse_adoc/converters/bypass'
5
+ require 'reverse_adoc/converters/pass_through'
6
+ require 'stepmod/utils/converters/a'
7
+ require 'stepmod/utils/converters/blockquote'
8
+ require 'stepmod/utils/converters/br'
9
+ require 'stepmod/utils/converters/bypass'
10
+ require 'stepmod/utils/converters/code'
11
+ require 'stepmod/utils/converters/comment'
12
+ require 'stepmod/utils/converters/dd'
13
+ require 'stepmod/utils/converters/dl'
14
+ require 'stepmod/utils/converters/dt'
15
+ require 'stepmod/utils/converters/drop'
16
+ require 'stepmod/utils/converters/example'
17
+ require 'stepmod/utils/converters/express_g'
18
+ require 'stepmod/utils/converters/fund_cons'
19
+ require 'stepmod/utils/converters/eqn'
20
+ require 'stepmod/utils/converters/head'
21
+ require 'stepmod/utils/converters/hr'
22
+ require 'stepmod/utils/converters/ignore'
23
+ require 'stepmod/utils/converters/introduction'
24
+ require 'stepmod/utils/converters/note'
25
+ require 'stepmod/utils/converters/ol'
26
+ require 'stepmod/utils/converters/p'
27
+ require 'stepmod/utils/converters/pass_through'
28
+ require 'stepmod/utils/converters/q'
29
+ require 'stepmod/utils/converters/resource'
30
+ require 'stepmod/utils/converters/schema_diag'
31
+ require 'stepmod/utils/converters/schema'
32
+ require 'stepmod/utils/converters/strong'
33
+ require 'stepmod/utils/converters/sub'
34
+ require 'stepmod/utils/converters/sup'
35
+ require 'stepmod/utils/converters/table'
36
+ require 'stepmod/utils/converters/text'
37
+ require 'stepmod/utils/cleaner'
38
+
39
+ require 'reverse_adoc/converters/figure'
40
+ require 'reverse_adoc/converters/img'
41
+ require 'reverse_adoc/converters/li'
42
+ require 'reverse_adoc/converters/tr'
43
+ require 'reverse_adoc/converters/td'
44
+ require 'reverse_adoc/converters/th'
45
+
46
+ module Stepmod
47
+ module Utils
48
+ class SmrlResourceConverter
49
+ def self.convert(input, options = {})
50
+ root = if input.is_a?(String)
51
+ then Nokogiri::XML(input).root
52
+ elsif input.is_a?(Nokogiri::XML::Document)
53
+ then input.root
54
+ elsif input.is_a?(Nokogiri::XML::Node)
55
+ then input
56
+ end
57
+
58
+ root || (return '')
59
+
60
+ ReverseAdoc.config.with(options) do
61
+ result = ReverseAdoc::Converters.lookup(root.name).convert(root)
62
+ Stepmod::Utils::Cleaner.new.tidy(result)
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,78 @@
1
+ require 'json'
2
+ require 'stepmod/utils/smrl_description_converter'
3
+ require 'stepmod/utils/smrl_resource_converter'
4
+
5
+ module Stepmod
6
+ module Utils
7
+ class StepmodFileAnnotator
8
+ attr_reader :express_file, :resource_docs_cache_file, :stepmod_dir
9
+
10
+ # @param express_file [String] path to the exp file needed to annotate
11
+ # @param resource_docs_cache_file [String] output of ./stepmod-build-resource-docs-cache
12
+ def initialize(express_file:, resource_docs_cache_file:, stepmod_dir: nil)
13
+ @express_file = express_file
14
+ @resource_docs_cache_file = resource_docs_cache_file
15
+ @stepmod_dir = stepmod_dir || Dir.pwd
16
+ end
17
+
18
+ def call
19
+ match = File.basename(express_file).match('^(arm|mim|bom)\.exp$')
20
+ descriptions_base = match ? "#{match.captures[0]}_descriptions.xml" : 'descriptions.xml'
21
+ descriptions_file = File.join(File.dirname(express_file), descriptions_base)
22
+ output_express = File.read(express_file)
23
+ resource_docs_cache = JSON.parse(File.read(resource_docs_cache_file))
24
+
25
+ if File.exists?(descriptions_file)
26
+ descriptions = Nokogiri::XML(File.read(descriptions_file)).root
27
+ added_resource_descriptions = {}
28
+ descriptions.xpath('ext_description').each do |description|
29
+ # Add base resource from linked path if exists, eg "language_schema.language.wr:WR1" -> "language_schema"
30
+ base_linked = description['linkend'].to_s.split('.').first
31
+ if added_resource_descriptions[base_linked].nil?
32
+ base_reource_doc_dir = resource_docs_cache[description['linkend'].to_s.split('.').first]
33
+ if base_reource_doc_dir
34
+ output_express << convert_from_resource_file(base_reource_doc_dir, stepmod_dir, base_linked, descriptions_file)
35
+ end
36
+ added_resource_descriptions[base_linked] = true
37
+ end
38
+ resource_docs_dir = resource_docs_cache[description['linkend']]
39
+ # Convert content description
40
+ # when a schema description is available from resource.xml and also descriptions.xml, the description from resource.xml is only used.
41
+ # https://github.com/metanorma/annotated-express/issues/32#issuecomment-792609078
42
+ if description.text.strip.length.positive? && resource_docs_dir.nil?
43
+ output_express << convert_from_description_text(descriptions_file, description)
44
+ end
45
+ # Add converted description from exact linked path
46
+ if resource_docs_dir && added_resource_descriptions[description['linkend']].nil?
47
+ output_express << convert_from_resource_file(resource_docs_dir, stepmod_dir, description['linkend'], descriptions_file)
48
+ added_resource_descriptions[description['linkend']] = true
49
+ end
50
+ end
51
+ end
52
+
53
+ output_express
54
+ end
55
+
56
+ private
57
+
58
+ def convert_from_description_text(descriptions_file, description)
59
+ Dir.chdir(File.dirname(descriptions_file)) do
60
+ wrapper = "<ext_descriptions>#{description.to_s}</ext_descriptions>"
61
+ "\n" + Stepmod::Utils::SmrlDescriptionConverter.convert(wrapper)
62
+ end
63
+ end
64
+
65
+ def convert_from_resource_file(resource_docs_dir, stepmod_dir, linked, descriptions_file)
66
+ resource_docs_file = File.join(stepmod_dir, 'data/resource_docs', resource_docs_dir, 'resource.xml')
67
+ puts(resource_docs_file)
68
+ resource_docs = Nokogiri::XML(File.read(resource_docs_file)).root
69
+ schema = resource_docs.xpath("schema[@name='#{linked}']")
70
+
71
+ Dir.chdir(File.dirname(descriptions_file)) do
72
+ wrapper = "<resource>#{schema.to_s}</resource>"
73
+ "\n" + Stepmod::Utils::SmrlResourceConverter.convert(wrapper)
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end