asciimath2unitsml 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d2ef44eb717d6b445489de85ee1e8eb3f5b81a6d83602c48ea23a8cbe4f100c7
4
+ data.tar.gz: 84b867f5a97b3ad154c7be8e5975ca4c97446049c4368ece6ae5e23da3c94437
5
+ SHA512:
6
+ metadata.gz: b749aa65924f4b815a7d38d2df9c9d6b48e4cd8f9db62678c100d732a60c8159b537fa2c3bd192296f533371f731ec85365f0d7f231f560f173d167971c38871
7
+ data.tar.gz: 64a47773ef26b6b870fc6a4406b697ec10b943e637a82f549946d8126830db9a521f3bdcb4e314718d084e40b7cfacbc820417476cd98663760ec8de29ba566e
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in ruby-vobject.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2021, Plurimath from Ribose
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.adoc ADDED
@@ -0,0 +1,80 @@
1
+ = asciimath2unitsml
2
+ Convert Asciimath via MathML to UnitsML
3
+
4
+ Encode UnitsML expressions in AsciiMath as `"unitsml(...)"`. The gem converts
5
+ AsciiMath incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
6
+ into MathML complying with https://www.w3.org/TR/mathml-units/[], with
7
+ UnitsML markup embedded in it, with identifiers for each unit and dimension.
8
+ The consuming document is meant to deduplicate the instances of UnitsML markup
9
+ with the same identifier, and potentially remove them to elsewhere in the document
10
+ or another document.
11
+
12
+ The AsciiMath conventions used are:
13
+
14
+ * `^` for exponents, e.g. `m^-2`
15
+ * `*` to combine two units by multiplication; e.g. `m*s^-2`. Division is not supported, use negative exponents instead
16
+ * `u` for μ (micro-)
17
+
18
+ So
19
+
20
+ [source]
21
+ ----
22
+ 9 "unitsml(C^3*A)"
23
+ ----
24
+
25
+ is converted into:
26
+
27
+ [source,xml]
28
+ ----
29
+ <math xmlns='http://www.w3.org/1998/Math/MathML'>
30
+ <mrow>
31
+ <mn>9</mn>
32
+ <mo rspace='thickmathspace'>&#x2062;</mo>
33
+ <mrow xref='U_C3.A'>
34
+ <msup>
35
+ <mrow>
36
+ <mi mathvariant='normal'>C</mi>
37
+ </mrow>
38
+ <mrow>
39
+ <mn>3</mn>
40
+ </mrow>
41
+ </msup>
42
+ <mo>&#xB7;</mo>
43
+ <mi mathvariant='normal'>A</mi>
44
+ </mrow>
45
+
46
+ <Unit xmlns='http://unitsml.nist.gov/2005' xml:id='U_C3.A' dimensionURL='#D_T3I4'>
47
+ <UnitSystem name='SI' type='SI_derived' xml:lang='en-US'/>
48
+ <UnitName xml:lang='en'>C^3*A</UnitName>
49
+ <UnitSymbol type='HTML'>C<sup>3</sup> &#xB7; A</UnitSymbol>
50
+ <UnitSymbol type='MathML'>
51
+ <math xmlns='http://www.w3.org/1998/Math/MathML'>
52
+ <mrow>
53
+ <msup>
54
+ <mrow>
55
+ <mi mathvariant='normal'>C</mi>
56
+ </mrow>
57
+ <mrow>
58
+ <mn>3</mn>
59
+ </mrow>
60
+ </msup>
61
+ <mo>&#xB7;</mo>
62
+ <mi mathvariant='normal'>A</mi>
63
+ </mrow>
64
+ </math>
65
+ </UnitSymbol>
66
+ <RootUnits>
67
+ <EnumeratedRootUnit unit='coulomb' powerNumerator='3'/>
68
+ <EnumeratedRootUnit unit='ampere'/>
69
+ </RootUnits>
70
+ </Unit>
71
+ <Dimension xmlns='http://unitsml.nist.gov/2005' xml:id='D_T3I4'>
72
+ <Time symbol='T' powerNumerator='3'/>
73
+ <ElectricCurrent symbol='I' powerNumerator='4'/>
74
+ </Dimension>
75
+
76
+ </mrow>
77
+ </math>
78
+ ----
79
+
80
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,43 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path("../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "asciimath2unitsml/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "asciimath2unitsml"
9
+ spec.version = Asciimath2UnitsML::VERSION
10
+ spec.authors = ["Ribose Inc."]
11
+ spec.email = ["open.source@ribose.com"]
12
+
13
+ spec.summary = "Convert Asciimath via MathML to UnitsML"
14
+ spec.description = <<~DESCRIPTION
15
+ Convert Asciimath via MathML to UnitsML
16
+ DESCRIPTION
17
+
18
+ spec.homepage = "https://github.com/plurimath/asciimath2unitsml"
19
+ spec.license = "BSD-2-Clause"
20
+
21
+ spec.bindir = "bin"
22
+ spec.require_paths = ["lib"]
23
+ spec.files = `git ls-files`.split("\n")
24
+ spec.test_files = `git ls-files -- {spec}/*`.split("\n")
25
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
26
+
27
+ spec.add_dependency "asciimath"
28
+ spec.add_dependency "htmlentities"
29
+ spec.add_dependency "nokogiri", "~> 1.10.4"
30
+ spec.add_dependency "rsec", "~> 1.0.0"
31
+
32
+ spec.add_development_dependency "bundler"
33
+ spec.add_development_dependency "byebug", "~> 9.1"
34
+ spec.add_development_dependency "equivalent-xml", "~> 0.6"
35
+ spec.add_development_dependency "guard", "~> 2.14"
36
+ spec.add_development_dependency "guard-rspec", "~> 4.7"
37
+ spec.add_development_dependency "rake", "~> 12.0"
38
+ spec.add_development_dependency "rspec", "~> 3.6"
39
+ spec.add_development_dependency "rubocop", "= 0.54.0"
40
+ spec.add_development_dependency "simplecov", "~> 0.15"
41
+ spec.add_development_dependency "timecop", "~> 0.9"
42
+ end
43
+
data/bin/rspec ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require "pathname"
10
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path(
11
+ "../../Gemfile", Pathname.new(__FILE__).realpath
12
+ )
13
+
14
+ require "rubygems"
15
+ require "bundler/setup"
16
+
17
+ load Gem.bin_path("rspec-core", "rspec")
18
+
@@ -0,0 +1,3 @@
1
+ require_relative "asciimath2unitsml/version"
2
+ require_relative "asciimath2unitsml/conv"
3
+
@@ -0,0 +1,240 @@
1
+ require "asciimath"
2
+ require "nokogiri"
3
+ require "htmlentities"
4
+ require "yaml"
5
+ require "rsec"
6
+ require_relative "string"
7
+ require_relative "parse"
8
+
9
+ module Asciimath2UnitsML
10
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
11
+ UNITSML_NS = "http://unitsml.nist.gov/2005".freeze
12
+
13
+ class Conv
14
+ def initialize
15
+ @prefixes_id = read_yaml("../unitsdb/prefixes.yaml")
16
+ @prefixes = flip_name_and_id(@prefixes_id)
17
+ @quantities = read_yaml("../unitsdb/quantities.yaml")
18
+ @units_id = read_yaml("../unitsdb/units.yaml")
19
+ @units = flip_name_and_id(@units_id)
20
+ @parser = parser
21
+ end
22
+
23
+ # https://www.w3.org/TR/mathml-units/ section 2: delimit number Invisible-Times unit
24
+ def Asciimath2UnitsML(expression)
25
+ xml = Nokogiri::XML(asciimath2mathml(expression))
26
+ MathML2UnitsML(xml).to_xml
27
+ end
28
+
29
+ def MathML2UnitsML(xml)
30
+ xml.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
31
+ next unless %r{^unitsml\(.+\)$}.match(x.text)
32
+ text = x.text.sub(%r{^unitsml\((.+)\)$}m, "\\1")
33
+ units = parse(text)
34
+ delim = x&.previous_element&.name == "mn" ? "<mo rspace='thickmathspace'>&#x2062;</mo>" : ""
35
+ x.replace("#{delim}<mrow xref='#{unit_id(text)}'>#{mathmlsymbol(units)}</mrow>\n#{unitsml(units, text)}")
36
+ end
37
+ xml
38
+ end
39
+
40
+ def asciimath2mathml(expression)
41
+ AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
42
+ AsciiMath.parse(HTMLEntities.new.decode(expression)).ast).to_s.
43
+ gsub(/<math>/, "<math xmlns='#{MATHML_NS}'>")
44
+ end
45
+
46
+ def unit_id(text)
47
+ "U_" +
48
+ (@units[text.to_sym] ? @units[text.to_sym][:id] : text.gsub(/\*/, ".").gsub(/\^/, ""))
49
+ end
50
+
51
+ def unit(units, text, dims)
52
+ dimid = dim_id(dims)
53
+ <<~END
54
+ <Unit xmlns='#{UNITSML_NS}' xml:id='#{unit_id(text)}'#{dimid ? " dimensionURL='##{dimid}'" : ""}>
55
+ #{unitsystem(units)}
56
+ #{unitname(units, text)}
57
+ #{unitsymbol(units)}
58
+ #{rootunits(units)}
59
+ </Unit>
60
+ END
61
+ end
62
+
63
+ def unitsystem(units)
64
+ ret = []
65
+ units.any? { |x| @units[x[:unit].to_sym][:si] != true } and
66
+ ret << "<UnitSystem name='not_SI' type='not_SI' xml:lang='en-US'/>"
67
+ if units.any? { |x| @units[x[:unit].to_sym][:si] == true }
68
+ base = units.size == 1 && @units[units[0][:unit].to_sym][:type].include?("si-base")
69
+ ret << "<UnitSystem name='SI' type='#{base ? "SI_base" : "SI_derived"}' xml:lang='en-US'/>"
70
+ end
71
+ ret.join("\n")
72
+ end
73
+
74
+ def unitname(units, text)
75
+ name = @units[text.to_sym] ? @units[text.to_sym][:name] : compose_name(units, text)
76
+ "<UnitName xml:lang='en'>#{name}</UnitName>"
77
+ end
78
+
79
+ # TODO: compose name from the component units
80
+ def compose_name(units, text)
81
+ text
82
+ end
83
+
84
+ def unitsymbol(units)
85
+ <<~END
86
+ <UnitSymbol type="HTML">#{htmlsymbol(units)}</UnitSymbol>
87
+ <UnitSymbol type="MathML">#{mathmlsymbolwrap(units)}</UnitSymbol>
88
+ END
89
+ end
90
+
91
+ def htmlsymbol(units)
92
+ units.map do |u|
93
+ u[:exponent] and exp = "<sup>#{u[:exponent].sub(/-/, "&#x2212;")}</sup>"
94
+ "#{u[:prefix]}#{u[:unit]}#{exp}"
95
+ end.join(" &#183; ")
96
+ end
97
+
98
+ def mathmlsymbol(units)
99
+ exp = units.map do |u|
100
+ base = "<mi mathvariant='normal'>#{u[:prefix]}#{u[:unit]}</mi>"
101
+ if u[:exponent]
102
+ exp = "<mn>#{u[:exponent]}</mn>".sub(/<mn>-/, "<mo>&#x2212;</mo><mn>")
103
+ "<msup><mrow>#{base}</mrow><mrow>#{exp}</mrow></msup>"
104
+ else
105
+ base
106
+ end
107
+ end.join("<mo>&#xB7;</mo>")
108
+ end
109
+
110
+ def mathmlsymbolwrap(units)
111
+ <<~END
112
+ <math xmlns='#{MATHML_NS}'>
113
+ <mrow>#{mathmlsymbol(units)}</mrow>
114
+ </math>
115
+ END
116
+ end
117
+
118
+ def rootunits(units)
119
+ return if units.size == 1
120
+ exp = units.map do |u|
121
+ prefix = " prefix='#{u[:prefix]}'" if u[:prefix]
122
+ exponent = " powerNumerator='#{u[:exponent]}'" if u[:exponent]
123
+ "<EnumeratedRootUnit unit='#{@units[u[:unit].to_sym][:name]}'#{prefix}#{exponent}/>"
124
+ end.join("\n")
125
+ <<~END
126
+ <RootUnits>#{exp}</RootUnits>
127
+ END
128
+ end
129
+
130
+ def prefix(units)
131
+ units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p1|
132
+ p = p1.to_sym
133
+ <<~END
134
+ <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p][:base]}'
135
+ prefixPower='#{@prefixes[p][:power]}' xml:id='#{@prefixes[p][:id]}'>
136
+ <PrefixName xml:lang="en">#{@prefixes[p][:name]}</PrefixName>
137
+ <PrefixSymbol type="ASCII">#{@prefixes[p][:symbol]}</PrefixSymbol>
138
+ </Prefix>
139
+ END
140
+ end.join("\n")
141
+ end
142
+
143
+ def dimension(dims)
144
+ return if dims.nil? || dims.empty?
145
+ <<~END
146
+ <Dimension xmlns='#{UNITSML_NS}' xml:id="#{dim_id(dims)}">
147
+ #{dims.map { |u| dimension1(u) }.join("\n") }
148
+ </Dimension>
149
+ END
150
+ end
151
+
152
+ U2D = {
153
+ "m" => { dimension: "Length", order: 1, symbol: "L" },
154
+ "g" => { dimension: "Mass", order: 2, symbol: "M" },
155
+ "kg" => { dimension: "Mass", order: 2, symbol: "M" },
156
+ "s" => { dimension: "Time", order: 3, symbol: "T" },
157
+ "A" => { dimension: "ElectricCurrent", order: 4, symbol: "I" },
158
+ "K" => { dimension: "ThermodynamicTemperature", order: 5, symbol: "Theta" },
159
+ "mol" => { dimension: "AmountOfSubstance", order: 6, symbol: "N" },
160
+ "cd" => { dimension: "LuminousIntensity", order: 7, symbol: "J" },
161
+ }
162
+
163
+ def units2dimensions(units)
164
+ norm = normalise_units(units)
165
+ return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" }
166
+ norm.map do |u|
167
+ { dimension: U2D[u[:unit]][:dimension],
168
+ unit: u[:unit],
169
+ exponent: u[:exponent] || 1,
170
+ symbol: U2D[u[:unit]][:symbol] }
171
+ end.sort { |a, b| U2D[a[:unit]][:order] <=> U2D[b[:unit]][:order] }
172
+ end
173
+
174
+ def dimension1(u)
175
+ %(<#{u[:dimension]} symbol="#{u[:symbol]}" powerNumerator="#{u[:exponent]}"/>)
176
+ end
177
+
178
+ def dim_id(dims)
179
+ return nil if dims.nil? || dims.empty?
180
+ "D_" + dims.map { |d| U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : d[:exponent].to_s) }.join("")
181
+ end
182
+
183
+ def normalise_units(units)
184
+ gather_units(units.map { |u| normalise_unit(u) }.flatten)
185
+ end
186
+
187
+ def gather_units(units)
188
+ units.sort { |a, b| a[:unit] <=> b[:unit] }.each_with_object([]) do |k, m|
189
+ if m.empty? || m[-1][:unit] != k[:unit] then m << k
190
+ else
191
+ m[-1] = { prefix: combine_prefixes(@prefixes[m[-1][:prefix]], @prefixes[k[:prefix]]),
192
+ unit: m[-1][:unit],
193
+ exponent: (k[:exponent]&.to_i || 1) + (m[-1][:exponent]&.to_i || 1) }
194
+ end
195
+ end
196
+ end
197
+
198
+ def normalise_unit(u)
199
+ if @units[u[:unit].to_sym][:type]&.include?("si-base") then u
200
+ elsif !@units[u[:unit].to_sym][:bases] then { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
201
+ else
202
+ @units[u[:unit].to_sym][:bases].each_with_object([]) do |k, m|
203
+ m << { prefix: k["prefix"] ?
204
+ combine_prefixes(@prefixes_id[k["prefix"]], @prefixes[u[:prefix]]) : u[:prefix],
205
+ unit: @units_id[k["id"].to_sym][:symbol],
206
+ exponent: (k["power"]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
207
+ end
208
+ end
209
+ end
210
+
211
+ def combine_prefixes(p1, p2)
212
+ return nil if p1.nil? && p2.nil?
213
+ return p1[:symbol] if p2.nil?
214
+ return p2[:symbol] if p1.nil?
215
+ return "unknown" if p1[:base] != p2[:base]
216
+ @prefixes.each do |p|
217
+ return p[:symbol] if p[:base] == p1[:base] && p[:power] == p1[:power] + p2[:power]
218
+ end
219
+ "unknown"
220
+ end
221
+
222
+ def parse(x)
223
+ units = @parser.parse(x)
224
+ if !units || Rsec::INVALID[units]
225
+ raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
226
+ end
227
+ Rsec::Fail.reset
228
+ units
229
+ end
230
+
231
+ def unitsml(units, text)
232
+ dims = units2dimensions(units)
233
+ <<~END
234
+ #{unit(units, text, dims)}
235
+ #{prefix(units)}
236
+ #{dimension(dims)}
237
+ END
238
+ end
239
+ end
240
+ end
@@ -0,0 +1,58 @@
1
+ module Asciimath2UnitsML
2
+ class Conv
3
+ include Rsec::Helpers
4
+
5
+ def read_yaml(path)
6
+ symbolize_keys(YAML.load_file(File.join(File.join(File.dirname(__FILE__), path))))
7
+ end
8
+
9
+ def flip_name_and_id(yaml)
10
+ yaml.each_with_object({}) do |(k, v), m|
11
+ next if v[:name].nil? || v[:name].empty?
12
+ symbol = v[:symbol] || v[:short]
13
+ m[symbol.to_sym] = v
14
+ m[symbol.to_sym][:symbol] = symbol
15
+ m[symbol.to_sym][:id] = k.to_s
16
+ end
17
+ end
18
+
19
+ def symbolize_keys(hash)
20
+ hash.inject({})do |result, (key, value)|
21
+ new_key = case key
22
+ when String then key.to_sym
23
+ else key
24
+ end
25
+ new_value = case value
26
+ when Hash then symbolize_keys(value)
27
+ else value
28
+ end
29
+ result[new_key] = new_value
30
+ result
31
+ end
32
+ end
33
+
34
+ def parser
35
+ prefix = /#{@prefixes.keys.join("|")}/.r
36
+ unit_keys = @units.keys.reject do |k|
37
+ @units[k][:type]&.include?("buildable") || /\*|\^/.match(k)
38
+ end.map { |k| Regexp.escape(k) }
39
+ unit1 = /#{unit_keys.sort_by(&:length).reverse.join("|")}/.r
40
+ exponent = /\^-?\d+/.r.map { |m| m.sub(/\^/, "") }
41
+ multiplier = /\*/.r
42
+ unit = seq(unit1, exponent._?) { |x| { prefix: nil, unit: x[0], exponent: x[1][0] } } |
43
+ seq(prefix, unit1, exponent._?) { |x| { prefix: x[0][0], unit: x[1], exponent: x[2][0] } }
44
+ units_tail = seq(multiplier, unit) { |u| u[1] }
45
+ units = seq(unit, units_tail.star) { |x| [x[0], x[1]].flatten }
46
+ parser = units.eof
47
+ end
48
+
49
+ def parse(x)
50
+ units = @parser.parse(x)
51
+ if !units || Rsec::INVALID[units]
52
+ raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
53
+ end
54
+ Rsec::Fail.reset
55
+ units
56
+ end
57
+ end
58
+ end