asciimath2unitsml 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d2ef44eb717d6b445489de85ee1e8eb3f5b81a6d83602c48ea23a8cbe4f100c7
4
+ data.tar.gz: 84b867f5a97b3ad154c7be8e5975ca4c97446049c4368ece6ae5e23da3c94437
5
+ SHA512:
6
+ metadata.gz: b749aa65924f4b815a7d38d2df9c9d6b48e4cd8f9db62678c100d732a60c8159b537fa2c3bd192296f533371f731ec85365f0d7f231f560f173d167971c38871
7
+ data.tar.gz: 64a47773ef26b6b870fc6a4406b697ec10b943e637a82f549946d8126830db9a521f3bdcb4e314718d084e40b7cfacbc820417476cd98663760ec8de29ba566e
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ruby-vobject.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2021, Plurimath from Ribose
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.adoc ADDED
@@ -0,0 +1,80 @@
1
+ = asciimath2unitsml
2
+ Convert Asciimath via MathML to UnitsML
3
+
4
+ Encode UnitsML expressions in AsciiMath as `"unitsml(...)"`. The gem converts
5
+ AsciiMath incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
6
+ into MathML complying with https://www.w3.org/TR/mathml-units/[], with
7
+ UnitsML markup embedded in it, with identifiers for each unit and dimension.
8
+ The consuming document is meant to deduplicate the instances of UnitsML markup
9
+ with the same identifier, and potentially remove them to elsewhere in the document
10
+ or another document.
11
+
12
+ The AsciiMath conventions used are:
13
+
14
+ * `^` for exponents, e.g. `m^-2`
15
+ * `*` to combine two units by multiplication; e.g. `m*s^-2`. Division is not supported, use negative exponents instead
16
+ * `u` for μ (micro-)
17
+
18
+ So
19
+
20
+ [source]
21
+ ----
22
+ 9 "unitsml(C^3*A)"
23
+ ----
24
+
25
+ is converted into:
26
+
27
+ [source,xml]
28
+ ----
29
+ <math xmlns='http://www.w3.org/1998/Math/MathML'>
30
+ <mrow>
31
+ <mn>9</mn>
32
+ <mo rspace='thickmathspace'>&#x2062;</mo>
33
+ <mrow xref='U_C3.A'>
34
+ <msup>
35
+ <mrow>
36
+ <mi mathvariant='normal'>C</mi>
37
+ </mrow>
38
+ <mrow>
39
+ <mn>3</mn>
40
+ </mrow>
41
+ </msup>
42
+ <mo>&#xB7;</mo>
43
+ <mi mathvariant='normal'>A</mi>
44
+ </mrow>
45
+
46
+ <Unit xmlns='http://unitsml.nist.gov/2005' xml:id='U_C3.A' dimensionURL='#D_T3I4'>
47
+ <UnitSystem name='SI' type='SI_derived' xml:lang='en-US'/>
48
+ <UnitName xml:lang='en'>C^3*A</UnitName>
49
+ <UnitSymbol type='HTML'>C<sup>3</sup> &#xB7; A</UnitSymbol>
50
+ <UnitSymbol type='MathML'>
51
+ <math xmlns='http://www.w3.org/1998/Math/MathML'>
52
+ <mrow>
53
+ <msup>
54
+ <mrow>
55
+ <mi mathvariant='normal'>C</mi>
56
+ </mrow>
57
+ <mrow>
58
+ <mn>3</mn>
59
+ </mrow>
60
+ </msup>
61
+ <mo>&#xB7;</mo>
62
+ <mi mathvariant='normal'>A</mi>
63
+ </mrow>
64
+ </math>
65
+ </UnitSymbol>
66
+ <RootUnits>
67
+ <EnumeratedRootUnit unit='coulomb' powerNumerator='3'/>
68
+ <EnumeratedRootUnit unit='ampere'/>
69
+ </RootUnits>
70
+ </Unit>
71
+ <Dimension xmlns='http://unitsml.nist.gov/2005' xml:id='D_T3I4'>
72
+ <Time symbol='T' powerNumerator='3'/>
73
+ <ElectricCurrent symbol='I' powerNumerator='4'/>
74
+ </Dimension>
75
+
76
+ </mrow>
77
+ </math>
78
+ ----
79
+
80
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,43 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "asciimath2unitsml/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "asciimath2unitsml"
9
+ spec.version = Asciimath2UnitsML::VERSION
10
+ spec.authors = ["Ribose Inc."]
11
+ spec.email = ["open.source@ribose.com"]
12
+
13
+ spec.summary = "Convert Asciimath via MathML to UnitsML"
14
+ spec.description = <<~DESCRIPTION
15
+ Convert Asciimath via MathML to UnitsML
16
+ DESCRIPTION
17
+
18
+ spec.homepage = "https://github.com/plurimath/asciimath2unitsml"
19
+ spec.license = "BSD-2-Clause"
20
+
21
+ spec.bindir = "bin"
22
+ spec.require_paths = ["lib"]
23
+ spec.files = `git ls-files`.split("\n")
24
+ spec.test_files = `git ls-files -- {spec}/*`.split("\n")
25
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
26
+
27
+ spec.add_dependency "asciimath"
28
+ spec.add_dependency "htmlentities"
29
+ spec.add_dependency "nokogiri", "~> 1.10.4"
30
+ spec.add_dependency "rsec", "~> 1.0.0"
31
+
32
+ spec.add_development_dependency "bundler"
33
+ spec.add_development_dependency "byebug", "~> 9.1"
34
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
35
+ spec.add_development_dependency "guard", "~> 2.14"
36
+ spec.add_development_dependency "guard-rspec", "~> 4.7"
37
+ spec.add_development_dependency "rake", "~> 12.0"
38
+ spec.add_development_dependency "rspec", "~> 3.6"
39
+ spec.add_development_dependency "rubocop", "= 0.54.0"
40
+ spec.add_development_dependency "simplecov", "~> 0.15"
41
+ spec.add_development_dependency "timecop", "~> 0.9"
42
+ end
43
+
data/bin/rspec ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require "pathname"
10
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path(
11
+ "../../Gemfile", Pathname.new(__FILE__).realpath
12
+ )
13
+
14
+ require "rubygems"
15
+ require "bundler/setup"
16
+
17
+ load Gem.bin_path("rspec-core", "rspec")
18
+
@@ -0,0 +1,3 @@
1
+ require_relative "asciimath2unitsml/version"
2
+ require_relative "asciimath2unitsml/conv"
3
+
@@ -0,0 +1,240 @@
1
+ require "asciimath"
2
+ require "nokogiri"
3
+ require "htmlentities"
4
+ require "yaml"
5
+ require "rsec"
6
+ require_relative "string"
7
+ require_relative "parse"
8
+
9
+ module Asciimath2UnitsML
10
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
11
+ UNITSML_NS = "http://unitsml.nist.gov/2005".freeze
12
+
13
+ class Conv
14
+ def initialize
15
+ @prefixes_id = read_yaml("../unitsdb/prefixes.yaml")
16
+ @prefixes = flip_name_and_id(@prefixes_id)
17
+ @quantities = read_yaml("../unitsdb/quantities.yaml")
18
+ @units_id = read_yaml("../unitsdb/units.yaml")
19
+ @units = flip_name_and_id(@units_id)
20
+ @parser = parser
21
+ end
22
+
23
+ # https://www.w3.org/TR/mathml-units/ section 2: delimit number Invisible-Times unit
24
+ def Asciimath2UnitsML(expression)
25
+ xml = Nokogiri::XML(asciimath2mathml(expression))
26
+ MathML2UnitsML(xml).to_xml
27
+ end
28
+
29
+ def MathML2UnitsML(xml)
30
+ xml.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
31
+ next unless %r{^unitsml\(.+\)$}.match(x.text)
32
+ text = x.text.sub(%r{^unitsml\((.+)\)$}m, "\\1")
33
+ units = parse(text)
34
+ delim = x&.previous_element&.name == "mn" ? "<mo rspace='thickmathspace'>&#x2062;</mo>" : ""
35
+ x.replace("#{delim}<mrow xref='#{unit_id(text)}'>#{mathmlsymbol(units)}</mrow>\n#{unitsml(units, text)}")
36
+ end
37
+ xml
38
+ end
39
+
40
+ def asciimath2mathml(expression)
41
+ AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
42
+ AsciiMath.parse(HTMLEntities.new.decode(expression)).ast).to_s.
43
+ gsub(/<math>/, "<math xmlns='#{MATHML_NS}'>")
44
+ end
45
+
46
+ def unit_id(text)
47
+ "U_" +
48
+ (@units[text.to_sym] ? @units[text.to_sym][:id] : text.gsub(/\*/, ".").gsub(/\^/, ""))
49
+ end
50
+
51
+ def unit(units, text, dims)
52
+ dimid = dim_id(dims)
53
+ <<~END
54
+ <Unit xmlns='#{UNITSML_NS}' xml:id='#{unit_id(text)}'#{dimid ? " dimensionURL='##{dimid}'" : ""}>
55
+ #{unitsystem(units)}
56
+ #{unitname(units, text)}
57
+ #{unitsymbol(units)}
58
+ #{rootunits(units)}
59
+ </Unit>
60
+ END
61
+ end
62
+
63
+ def unitsystem(units)
64
+ ret = []
65
+ units.any? { |x| @units[x[:unit].to_sym][:si] != true } and
66
+ ret << "<UnitSystem name='not_SI' type='not_SI' xml:lang='en-US'/>"
67
+ if units.any? { |x| @units[x[:unit].to_sym][:si] == true }
68
+ base = units.size == 1 && @units[units[0][:unit].to_sym][:type].include?("si-base")
69
+ ret << "<UnitSystem name='SI' type='#{base ? "SI_base" : "SI_derived"}' xml:lang='en-US'/>"
70
+ end
71
+ ret.join("\n")
72
+ end
73
+
74
+ def unitname(units, text)
75
+ name = @units[text.to_sym] ? @units[text.to_sym][:name] : compose_name(units, text)
76
+ "<UnitName xml:lang='en'>#{name}</UnitName>"
77
+ end
78
+
79
+ # TODO: compose name from the component units
80
+ def compose_name(units, text)
81
+ text
82
+ end
83
+
84
+ def unitsymbol(units)
85
+ <<~END
86
+ <UnitSymbol type="HTML">#{htmlsymbol(units)}</UnitSymbol>
87
+ <UnitSymbol type="MathML">#{mathmlsymbolwrap(units)}</UnitSymbol>
88
+ END
89
+ end
90
+
91
+ def htmlsymbol(units)
92
+ units.map do |u|
93
+ u[:exponent] and exp = "<sup>#{u[:exponent].sub(/-/, "&#x2212;")}</sup>"
94
+ "#{u[:prefix]}#{u[:unit]}#{exp}"
95
+ end.join(" &#183; ")
96
+ end
97
+
98
+ def mathmlsymbol(units)
99
+ exp = units.map do |u|
100
+ base = "<mi mathvariant='normal'>#{u[:prefix]}#{u[:unit]}</mi>"
101
+ if u[:exponent]
102
+ exp = "<mn>#{u[:exponent]}</mn>".sub(/<mn>-/, "<mo>&#x2212;</mo><mn>")
103
+ "<msup><mrow>#{base}</mrow><mrow>#{exp}</mrow></msup>"
104
+ else
105
+ base
106
+ end
107
+ end.join("<mo>&#xB7;</mo>")
108
+ end
109
+
110
+ def mathmlsymbolwrap(units)
111
+ <<~END
112
+ <math xmlns='#{MATHML_NS}'>
113
+ <mrow>#{mathmlsymbol(units)}</mrow>
114
+ </math>
115
+ END
116
+ end
117
+
118
+ def rootunits(units)
119
+ return if units.size == 1
120
+ exp = units.map do |u|
121
+ prefix = " prefix='#{u[:prefix]}'" if u[:prefix]
122
+ exponent = " powerNumerator='#{u[:exponent]}'" if u[:exponent]
123
+ "<EnumeratedRootUnit unit='#{@units[u[:unit].to_sym][:name]}'#{prefix}#{exponent}/>"
124
+ end.join("\n")
125
+ <<~END
126
+ <RootUnits>#{exp}</RootUnits>
127
+ END
128
+ end
129
+
130
+ def prefix(units)
131
+ units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p1|
132
+ p = p1.to_sym
133
+ <<~END
134
+ <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p][:base]}'
135
+ prefixPower='#{@prefixes[p][:power]}' xml:id='#{@prefixes[p][:id]}'>
136
+ <PrefixName xml:lang="en">#{@prefixes[p][:name]}</PrefixName>
137
+ <PrefixSymbol type="ASCII">#{@prefixes[p][:symbol]}</PrefixSymbol>
138
+ </Prefix>
139
+ END
140
+ end.join("\n")
141
+ end
142
+
143
+ def dimension(dims)
144
+ return if dims.nil? || dims.empty?
145
+ <<~END
146
+ <Dimension xmlns='#{UNITSML_NS}' xml:id="#{dim_id(dims)}">
147
+ #{dims.map { |u| dimension1(u) }.join("\n") }
148
+ </Dimension>
149
+ END
150
+ end
151
+
152
+ U2D = {
153
+ "m" => { dimension: "Length", order: 1, symbol: "L" },
154
+ "g" => { dimension: "Mass", order: 2, symbol: "M" },
155
+ "kg" => { dimension: "Mass", order: 2, symbol: "M" },
156
+ "s" => { dimension: "Time", order: 3, symbol: "T" },
157
+ "A" => { dimension: "ElectricCurrent", order: 4, symbol: "I" },
158
+ "K" => { dimension: "ThermodynamicTemperature", order: 5, symbol: "Theta" },
159
+ "mol" => { dimension: "AmountOfSubstance", order: 6, symbol: "N" },
160
+ "cd" => { dimension: "LuminousIntensity", order: 7, symbol: "J" },
161
+ }
162
+
163
+ def units2dimensions(units)
164
+ norm = normalise_units(units)
165
+ return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" }
166
+ norm.map do |u|
167
+ { dimension: U2D[u[:unit]][:dimension],
168
+ unit: u[:unit],
169
+ exponent: u[:exponent] || 1,
170
+ symbol: U2D[u[:unit]][:symbol] }
171
+ end.sort { |a, b| U2D[a[:unit]][:order] <=> U2D[b[:unit]][:order] }
172
+ end
173
+
174
+ def dimension1(u)
175
+ %(<#{u[:dimension]} symbol="#{u[:symbol]}" powerNumerator="#{u[:exponent]}"/>)
176
+ end
177
+
178
+ def dim_id(dims)
179
+ return nil if dims.nil? || dims.empty?
180
+ "D_" + dims.map { |d| U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : d[:exponent].to_s) }.join("")
181
+ end
182
+
183
+ def normalise_units(units)
184
+ gather_units(units.map { |u| normalise_unit(u) }.flatten)
185
+ end
186
+
187
+ def gather_units(units)
188
+ units.sort { |a, b| a[:unit] <=> b[:unit] }.each_with_object([]) do |k, m|
189
+ if m.empty? || m[-1][:unit] != k[:unit] then m << k
190
+ else
191
+ m[-1] = { prefix: combine_prefixes(@prefixes[m[-1][:prefix]], @prefixes[k[:prefix]]),
192
+ unit: m[-1][:unit],
193
+ exponent: (k[:exponent]&.to_i || 1) + (m[-1][:exponent]&.to_i || 1) }
194
+ end
195
+ end
196
+ end
197
+
198
+ def normalise_unit(u)
199
+ if @units[u[:unit].to_sym][:type]&.include?("si-base") then u
200
+ elsif !@units[u[:unit].to_sym][:bases] then { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
201
+ else
202
+ @units[u[:unit].to_sym][:bases].each_with_object([]) do |k, m|
203
+ m << { prefix: k["prefix"] ?
204
+ combine_prefixes(@prefixes_id[k["prefix"]], @prefixes[u[:prefix]]) : u[:prefix],
205
+ unit: @units_id[k["id"].to_sym][:symbol],
206
+ exponent: (k["power"]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
207
+ end
208
+ end
209
+ end
210
+
211
+ def combine_prefixes(p1, p2)
212
+ return nil if p1.nil? && p2.nil?
213
+ return p1[:symbol] if p2.nil?
214
+ return p2[:symbol] if p1.nil?
215
+ return "unknown" if p1[:base] != p2[:base]
216
+ @prefixes.each do |p|
217
+ return p[:symbol] if p[:base] == p1[:base] && p[:power] == p1[:power] + p2[:power]
218
+ end
219
+ "unknown"
220
+ end
221
+
222
+ def parse(x)
223
+ units = @parser.parse(x)
224
+ if !units || Rsec::INVALID[units]
225
+ raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
226
+ end
227
+ Rsec::Fail.reset
228
+ units
229
+ end
230
+
231
+ def unitsml(units, text)
232
+ dims = units2dimensions(units)
233
+ <<~END
234
+ #{unit(units, text, dims)}
235
+ #{prefix(units)}
236
+ #{dimension(dims)}
237
+ END
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,58 @@
1
+ module Asciimath2UnitsML
2
+ class Conv
3
+ include Rsec::Helpers
4
+
5
+ def read_yaml(path)
6
+ symbolize_keys(YAML.load_file(File.join(File.join(File.dirname(__FILE__), path))))
7
+ end
8
+
9
+ def flip_name_and_id(yaml)
10
+ yaml.each_with_object({}) do |(k, v), m|
11
+ next if v[:name].nil? || v[:name].empty?
12
+ symbol = v[:symbol] || v[:short]
13
+ m[symbol.to_sym] = v
14
+ m[symbol.to_sym][:symbol] = symbol
15
+ m[symbol.to_sym][:id] = k.to_s
16
+ end
17
+ end
18
+
19
+ def symbolize_keys(hash)
20
+ hash.inject({})do |result, (key, value)|
21
+ new_key = case key
22
+ when String then key.to_sym
23
+ else key
24
+ end
25
+ new_value = case value
26
+ when Hash then symbolize_keys(value)
27
+ else value
28
+ end
29
+ result[new_key] = new_value
30
+ result
31
+ end
32
+ end
33
+
34
+ def parser
35
+ prefix = /#{@prefixes.keys.join("|")}/.r
36
+ unit_keys = @units.keys.reject do |k|
37
+ @units[k][:type]&.include?("buildable") || /\*|\^/.match(k)
38
+ end.map { |k| Regexp.escape(k) }
39
+ unit1 = /#{unit_keys.sort_by(&:length).reverse.join("|")}/.r
40
+ exponent = /\^-?\d+/.r.map { |m| m.sub(/\^/, "") }
41
+ multiplier = /\*/.r
42
+ unit = seq(unit1, exponent._?) { |x| { prefix: nil, unit: x[0], exponent: x[1][0] } } |
43
+ seq(prefix, unit1, exponent._?) { |x| { prefix: x[0][0], unit: x[1], exponent: x[2][0] } }
44
+ units_tail = seq(multiplier, unit) { |u| u[1] }
45
+ units = seq(unit, units_tail.star) { |x| [x[0], x[1]].flatten }
46
+ parser = units.eof
47
+ end
48
+
49
+ def parse(x)
50
+ units = @parser.parse(x)
51
+ if !units || Rsec::INVALID[units]
52
+ raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
53
+ end
54
+ Rsec::Fail.reset
55
+ units
56
+ end
57
+ end
58
+ end