asciimath2unitsml 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +2 -0
- data/.gitmodules +4 -0
- data/README.adoc +147 -6
- data/asciimath2unitsml.gemspec +24 -2
- data/lib/asciimath2unitsml.rb +1 -0
- data/lib/asciimath2unitsml/conv.rb +94 -146
- data/lib/asciimath2unitsml/parse.rb +126 -21
- data/lib/asciimath2unitsml/render.rb +53 -0
- data/lib/asciimath2unitsml/unit.rb +78 -0
- data/lib/asciimath2unitsml/version.rb +1 -1
- data/lib/unitsdb/dimensions.yaml +802 -0
- data/lib/unitsdb/prefixes.yaml +154 -27
- data/lib/unitsdb/quantities.yaml +2246 -440
- data/lib/unitsdb/unit_systems.yaml +16 -0
- data/lib/unitsdb/units.yaml +10196 -1714
- data/lib/unitsdb_ruby/unitsdb.rb +163 -0
- data/spec/conv_spec.rb +330 -111
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca1751cf852b77599154aa142ea719f0510210ddebb667ace7225cb94f7d598f
|
4
|
+
data.tar.gz: 332e23b81a688d1386a4f084dc3db1f401e1e7634beb70c17466e18a23d1f51a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f300b5a9c8af814f4c61ff9be4407f45350fedfc76a7c9b3e4da90a793fcf5cf7004b24e5719a562cc2cb355bb8be355ee4d1e23909f6bbbd4060a97affcd7de
|
7
|
+
data.tar.gz: abeedd889b14c45eeeb90a2f56fc5a09de04ca54f7b15f39677ad516fda17426dbd47977fdc71199ed77424639dd4c86a40164672dd81aad92fd512f872b1723
|
data/.github/workflows/rake.yml
CHANGED
data/.gitmodules
ADDED
data/README.adoc
CHANGED
@@ -4,24 +4,57 @@ Convert Units expressions via MathML to UnitsML
|
|
4
4
|
This gem converts
|
5
5
|
MathML incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
|
6
6
|
into MathML complying with https://www.w3.org/TR/mathml-units/[], with
|
7
|
-
UnitsML markup embedded in it, and with unique identifiers for each distinct unit and dimension.
|
7
|
+
UnitsML markup embedded in it, and with unique identifiers for each distinct unit, prefix, and dimension.
|
8
|
+
Dimensions are automatically inserted corresponding to each unit.
|
8
9
|
Units expressions are identified in MathML as `<mtext>unitsml(...)</mtext>`, which in turn
|
9
10
|
can be identified in AsciiMath as `"unitsml(...)"`.
|
11
|
+
|
10
12
|
The consuming document is meant to deduplicate the instances of UnitsML markup
|
11
13
|
with the same identifier, and potentially remove them to elsewhere in the document
|
12
14
|
or another document.
|
13
15
|
|
16
|
+
== Notation
|
17
|
+
|
18
|
+
The `unitsml()` expression consists of a unit string.
|
19
|
+
The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
|
20
|
+
https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
|
21
|
+
multiplication or division of single units, each of which is defined as a Prefix
|
22
|
+
(taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
|
23
|
+
unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
|
24
|
+
and exponent; e.g. `mm*s^-2`.
|
25
|
+
|
14
26
|
The conventions used for writing units are:
|
15
27
|
|
16
28
|
* `^` for exponents, e.g. `m^-2`
|
17
|
-
* `*` to combine two units by multiplication; e.g. `m*s^-2`.
|
29
|
+
* `*` to combine two units by multiplication; e.g. `m*s^-2`.
|
30
|
+
* `/` to combine two units by division;
|
18
31
|
* `u` for μ (micro-)
|
19
32
|
|
33
|
+
For more on units notation, see <<units_notation,Units Notation>>.
|
34
|
+
|
35
|
+
The `unitsml()` can take additional optional parameters, giving further information for the UnitsML
|
36
|
+
to be generated:
|
37
|
+
|
38
|
+
* `unitsml(unit-string, quantity: ID)` provides the UnitsDB identifier for the quantity being measured
|
39
|
+
(taken from https://github.com/unitsml/unitsdb/blob/master/quantities.yaml[]). For example,
|
40
|
+
`unitsml(s, quantity: NISTq109)` indicates that the second is used to measure period duration.
|
41
|
+
If a single quantity is associated with the unit in UnitsDB (as given in
|
42
|
+
https://github.com/unitsml/unitsdb/blob/master/units.yaml[]), that quantity is added automatically;
|
43
|
+
otherwise, no quantity is added unless explicitly nominated in this way.
|
44
|
+
* `unitsml(unit-string, name: NAME)` provides a name for the unit, if one is not already available
|
45
|
+
from UnitsDB. For example, `unitsml(cal_th/cm^2, name: langley)`.
|
46
|
+
|
47
|
+
== Rendering
|
48
|
+
|
49
|
+
The output of the gem is MathML, with MathML unit expressions (expressed as `<mi>`,
|
50
|
+
complying with https://www.w3.org/TR/mathml-units/[MathML Units]) cross-referenced to UnitsML
|
51
|
+
definitions embedded in the MathML.
|
52
|
+
|
20
53
|
The gem follows the MathML Units convention of inserting a spacing invisible times operator
|
21
54
|
(`<mo rspace='thickmathspace'>⁢</mo>`) between any numbers (`<mn>`) and unit expressions
|
22
55
|
in MathML, and representing units in MathML as non-italic variables (`<mi mathvariant='normal'>`).
|
23
56
|
|
24
|
-
|
57
|
+
== Example
|
25
58
|
|
26
59
|
[source]
|
27
60
|
----
|
@@ -83,14 +116,18 @@ is converted into:
|
|
83
116
|
</math>
|
84
117
|
----
|
85
118
|
|
119
|
+
== Usage
|
120
|
+
|
86
121
|
The converter is run as:
|
87
122
|
|
88
123
|
[source,ruby]
|
89
124
|
----
|
90
125
|
c = Asciimath2UnitsML::Conv.new()
|
91
|
-
c.Asciimath2UnitsML(
|
92
|
-
c.MathML2UnitsML(
|
93
|
-
|
126
|
+
c.Asciimath2UnitsML(1 "unitsml(mm*s^-2)") # AsciiMath string containing UnitsML
|
127
|
+
c.MathML2UnitsML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
|
128
|
+
"<mtext>unitsml(kg^-2)</mtext></math>") # AsciiMath string containing <mtext>unitsml()</mtext>
|
129
|
+
c.MathML2UnitsML(Nokogiri::XML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
|
130
|
+
"<mtext>unitsml(kg^-2)</mtext></math>")) # Nokogiri parse of MathML document containing <mtext>unitsml()</mtext>
|
94
131
|
----
|
95
132
|
|
96
133
|
The converter class may be initialised with options:
|
@@ -101,3 +138,107 @@ supplied instead; it will be encoded as XML entities. The value `:space` is rend
|
|
101
138
|
as a spacing invisible times in MathML (`<mo rspace='thickmathspace'>⁢</mo>`),
|
102
139
|
and as a non-breaking space in HTML. The value `:nospace` is rendered as a non-spacing
|
103
140
|
invisible times in MathML (`<mo>⁢</mo>`), and is not rendered in HTML.
|
141
|
+
|
142
|
+
[[units_notation]]
|
143
|
+
== Units Notation
|
144
|
+
|
145
|
+
The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
|
146
|
+
https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
|
147
|
+
multiplication or division of single units, each of which is defined as a Prefix
|
148
|
+
(taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
|
149
|
+
unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
|
150
|
+
and exponent; e.g. `mm*s^-2`.
|
151
|
+
|
152
|
+
In case of ambiguity, the interpretation with no prefix is prioritised over the interpretation
|
153
|
+
as a unit; so `ct` is interpreted as _hundredweight_, rather than _centi-ton_. Exceptionally,
|
154
|
+
`kg` is decomposed into kilo-gram rather than treated as a basic unit, for consistency with
|
155
|
+
other prefixes of grams. (Prefixed units appear in UnitsDB, and are indicated as `prefixed: true`.)
|
156
|
+
|
157
|
+
A unit may have multiple symbols; these are registered separately in
|
158
|
+
https://github.com/unitsml/unitsdb/units.yaml[units.yaml], as entries under `unit_symbols`.
|
159
|
+
These different symbols will be recognised as the same Unit in the UnitsML markup, but
|
160
|
+
the original symbol will be retained in the MathML expression. So an expression like `1 unitsml(mL)`
|
161
|
+
will be recognised as referring to microlitres; the expression will be given under its canonical
|
162
|
+
rendering `ml` in UnitsML markup, but the MathML rendering referencing that UnitsML expression
|
163
|
+
will keep the notation `mL`.
|
164
|
+
|
165
|
+
The symbols used for units can be highly ambiguous; in order to guarantee accurate parsing,
|
166
|
+
the symbols used to data enter units are unambiguous in https://github.com/unitsml/unitsdb/units.yaml[units.yaml].
|
167
|
+
They may be found as the entries for `unit_symbols/id` under each unit. For example, `B` is ambiguous between
|
168
|
+
_bel_ (as in decibel) and _byte_; they are kept unambiguous by using `bel_B` and `byte_B` to refer to them,
|
169
|
+
although they will still both be rendered as `B`.
|
170
|
+
|
171
|
+
The following table is the current list of ambiguous symbols, which are disambiguated in the symbol ids used.
|
172
|
+
This table can be generated (in Asciidoc format) through `Asciimath2UnitsML::Conv.new().ambig_units`:
|
173
|
+
|
174
|
+
[cols="7*"]
|
175
|
+
|===
|
176
|
+
|Symbol | Unit + ID | | | | |
|
177
|
+
|
178
|
+
|
179
|
+
| ′ | minute (minute of arc): `'` | foot: `'_ft` | minute: `'_min` | minute (minute of arc): `prime` | foot: `prime_ft` | minute: `prime_min`
|
180
|
+
| ″ | second (second of arc): `"` | second: `"_s` | inch: `"_in` | second (second of arc): `dprime` | second: `dprime_s` | inch: `dprime_in`
|
181
|
+
| ″Hg | conventional inch of mercury: `"Hg` | conventional inch of mercury: `dprime_Hg` | inch of mercury (32 degF): `"Hg_32degF` | inch of mercury (60 degF): `"Hg_60degF` | inch of mercury (32 degF): `dprime_Hg_32degF` | inch of mercury (60 degF): `dprime_Hg_60degF`
|
182
|
+
| hp | horsepower: `hp` | horsepower (UK): `hp_UK` | horsepower, water: `hp_water` | horsepower, metric: `hp_metric` | horsepower, boiler: `hp_boiler` | horsepower, electric: `hp_electric`
|
183
|
+
| Btu | British thermal unit_IT: `Btu` | British thermal unit (mean): `Btu_mean` | British thermal unit (39 degF): `Btu_39degF` | British thermal unit (59 degF): `Btu_59degF` | British thermal unit (60 degF): `Btu_60degF` |
|
184
|
+
| a | are: `a` | year (365 days): `a_year` | year, tropical: `a_tropical_year` | year, sidereal: `a_sidereal_year` | |
|
185
|
+
| d | day: `d` | darcy: `darcy` | day, sidereal: `d_sidereal` | | |
|
186
|
+
| inHg | conventional inch of mercury: `inHg` | inch of mercury (32 degF): `inHg_32degF` | inch of mercury (60 degF): `inHg_60degF` | | |
|
187
|
+
| inH~2~O | conventional inch of water: `inH_2O` | inch of water (39.2 degF): `inH_2O_39degF` | inch of water (60 degF): `inH_2O_60degF` | | |
|
188
|
+
| min | minute: `min` | minim: `minim` | minute, sidereal: `min_sidereal` | | |
|
189
|
+
| pc | parsec: `pc` | pica (printer's): `pica_printer` | pica (computer): `pica_computer` | | |
|
190
|
+
| t | metric ton: `t` | long ton: `ton_long` | short ton: `ton_short` | | |
|
191
|
+
| B | bel: `bel_B` | byte: `byte_B` | | | |
|
192
|
+
| cmHg | conventional centimeter of mercury: `cmHg` | centimeter of mercury (0 degC): `cmHg_0degC` | | | |
|
193
|
+
| cmH~2~O | conventional centimeter of water: `cmH_2O` | centimeter of water (4 degC): `cmH_2O_4degC` | | | |
|
194
|
+
| cup | cup (US): `cup` | cup (FDA): `cup_label` | | | |
|
195
|
+
| D | debye: `D` | darcy: `Darcy` | | | |
|
196
|
+
| ft | foot: `ft` | foot (based on US survey foot): `ft_US_survey` | | | |
|
197
|
+
| ftH~2~O | conventional foot of water: `ftH_2O` | foot of water (39.2 degF): `ftH_2O_39degF` | | | |
|
198
|
+
| gi | gill (US): `gi` | gill [Canadian and UK (Imperial)]: `gi_imperial` | | | |
|
199
|
+
| h | hour: `h` | hour, sidereal: `h_sidereal` | | | |
|
200
|
+
| ′Hg | conventional foot of mercury: `'Hg` | conventional foot of mercury: `prime_Hg` | | | |
|
201
|
+
| __ħ__ | natural unit of action: `h-bar` | atomic unit of action: `h-bar_atomic` | | | |
|
202
|
+
| __m__~e~ | natural unit of mass: `m_e` | atomic unit of mass: `m_e_atomic` | | | |
|
203
|
+
| in | inch: `in` | inch (based on US survey foot): `in_US_survey` | | | |
|
204
|
+
| K | kelvin: `K` | kayser: `kayser` | | | |
|
205
|
+
| L | liter: `L` | lambert: `Lambert` | | | |
|
206
|
+
| lb | pound (avoirdupois): `lb` | pound (troy or apothecary): `lb_troy` | | | |
|
207
|
+
| mi | mile: `mi` | mile (based on US survey foot): `mi_US_survey` | | | |
|
208
|
+
| mil | mil (length): `mil` | angular mil (NATO): `mil_nato` | | | |
|
209
|
+
| oz | ounce (avoirdupois): `oz` | ounce (troy or apothecary): `oz_troy` | | | |
|
210
|
+
| pt | point (printer's): `pt_printer` | point (computer): `pt_computer` | | | |
|
211
|
+
| rad | radian: `rad` | rad (absorbed dose): `rad_radiation` | | | |
|
212
|
+
| s | second: `s` | second, sidereal: `s_sidereal` | | | |
|
213
|
+
| tbsp | tablespoon: `tbsp` | tablespoon (FDA): `tbsp_label` | | | |
|
214
|
+
| ton | ton of TNT (energy equivalent): `ton_TNT` | ton of refrigeration (12 000 Btu_IT/h): `ton_refrigeration` | | | |
|
215
|
+
| tsp | teaspoon: `tsp` | teaspoon (FDA): `tsp_label` | | | |
|
216
|
+
| yd | yard: `yd` | yard (based on US survey foot): `yd_US_survey` | | | |
|
217
|
+
| º | degree (degree of arc): `deg` | | | | |
|
218
|
+
| γ | gamma: `gamma` | | | | |
|
219
|
+
| μ | micron: `micron` | | | | |
|
220
|
+
| Ω | ohm: `Ohm` | | | | |
|
221
|
+
| Å | angstrom: `Aring` | | | | |
|
222
|
+
| ħ | natural unit of action in eV s: `h-bar_eV_s` | | | | |
|
223
|
+
| abΩ | abohm: `abohm` | | | | |
|
224
|
+
| (abΩ)^-1^ | abmho: `abS` | | | | |
|
225
|
+
| aW | abwatt: `aW (Cardelli)` | | | | |
|
226
|
+
| b | barn: `barn` | | | | |
|
227
|
+
| Btu~th~ | British thermal unit_th: `Btu_th` | | | | |
|
228
|
+
| °C | degree Celsius: `degC` | | | | |
|
229
|
+
| cal~IT~ | I.T. calorie: `cal_IT` | | | | |
|
230
|
+
| cal~th~ | thermochemical calorie: `cal_th` | | | | |
|
231
|
+
| °F | degree Fahrenheit: `degF` | | | | |
|
232
|
+
| __a__~0~ | atomic unit of length: `a_0` | | | | |
|
233
|
+
| __c__ | natural unit of velocity: `c` | | | | |
|
234
|
+
| __c__~0~ | natural unit of velocity: `c_0` | | | | |
|
235
|
+
| __e__ | atomic unit of charge: `e` | | | | |
|
236
|
+
| __E__~h~ | atomic unit of energy: `e_h` | | | | |
|
237
|
+
| μin | microinch: `uin` | | | | |
|
238
|
+
| °K | kelvin: `degK` | | | | |
|
239
|
+
| kcal~IT~ | kilocalorie_IT: `kcal_IT` | | | | |
|
240
|
+
| kcal~th~ | kilocalorie_th: `kcal_th` | | | | |
|
241
|
+
| mmH~2~O | conventional millimeter of water: `mmH_2O` | | | | |
|
242
|
+
| °R | degree Rankine: `degR` | | | | |
|
243
|
+
| ƛ~C~ | natural unit of length: `lambda-bar_C` | | | | |
|
244
|
+
|===
|
data/asciimath2unitsml.gemspec
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
2
|
+
|
3
3
|
lib = File.expand_path("../lib", __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
require "asciimath2unitsml/version"
|
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = "Convert Asciimath via MathML to UnitsML"
|
14
14
|
spec.description = <<~DESCRIPTION
|
15
|
-
|
15
|
+
Convert Asciimath via MathML to UnitsML
|
16
16
|
DESCRIPTION
|
17
17
|
|
18
18
|
spec.homepage = "https://github.com/plurimath/asciimath2unitsml"
|
@@ -24,6 +24,27 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.test_files = `git ls-files -- {spec}/*`.split("\n")
|
25
25
|
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
26
26
|
|
27
|
+
# get an array of submodule dirs relatively to root repo
|
28
|
+
`git config --file .gitmodules --get-regexp '\\.path$'`
|
29
|
+
.split("\n")
|
30
|
+
.map { |kv_str| kv_str.split(" ") }
|
31
|
+
.each do |(_, submodule_path)|
|
32
|
+
|
33
|
+
# for each submodule, change working directory to that submodule
|
34
|
+
Dir.chdir(submodule_path) do
|
35
|
+
|
36
|
+
# issue git ls-files in submodule's directory
|
37
|
+
submodule_files = `git ls-files | grep -i '.yaml$'`.split($\)
|
38
|
+
|
39
|
+
submodule_files_paths = submodule_files.map do |filename|
|
40
|
+
File.join submodule_path, filename
|
41
|
+
end
|
42
|
+
|
43
|
+
# add relative paths to gem.files
|
44
|
+
spec.files += submodule_files_paths
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
27
48
|
spec.add_dependency "asciimath"
|
28
49
|
spec.add_dependency "htmlentities"
|
29
50
|
spec.add_dependency "nokogiri", "~> 1.10.4"
|
@@ -39,5 +60,6 @@ Gem::Specification.new do |spec|
|
|
39
60
|
spec.add_development_dependency "rubocop", "= 0.54.0"
|
40
61
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
41
62
|
spec.add_development_dependency "timecop", "~> 0.9"
|
63
|
+
spec.add_development_dependency "rexml"
|
42
64
|
end
|
43
65
|
|
data/lib/asciimath2unitsml.rb
CHANGED
@@ -5,6 +5,8 @@ require "yaml"
|
|
5
5
|
require "rsec"
|
6
6
|
require_relative "string"
|
7
7
|
require_relative "parse"
|
8
|
+
require_relative "render"
|
9
|
+
require_relative "unit"
|
8
10
|
|
9
11
|
module Asciimath2UnitsML
|
10
12
|
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
@@ -12,145 +14,43 @@ module Asciimath2UnitsML
|
|
12
14
|
|
13
15
|
class Conv
|
14
16
|
def initialize(options = {})
|
15
|
-
@
|
16
|
-
|
17
|
-
@quantities = read_yaml("../unitsdb/quantities.yaml")
|
18
|
-
@units_id = read_yaml("../unitsdb/units.yaml")
|
19
|
-
@units = flip_name_and_id(@units_id)
|
20
|
-
#temporary
|
21
|
-
@units[:degC][:render] = "°C"
|
22
|
-
@units[:degF][:render] = "°F"
|
23
|
-
@units[:Ohm][:render] = "Ω"
|
24
|
-
@parser = parser
|
25
|
-
@multiplier = multiplier(options[:multiplier] || "\u00b7")
|
26
|
-
end
|
27
|
-
|
28
|
-
def multiplier(x)
|
29
|
-
case x
|
30
|
-
when :space
|
31
|
-
{ html: " ", mathml: "<mo rspace='thickmathspace'>⁢</mo>" }
|
32
|
-
when :nospace
|
33
|
-
{ html: "", mathml: "<mo>⁢</mo>" }
|
34
|
-
else
|
35
|
-
{ html: HTMLEntities.new.encode(x), mathml: "<mo>#{HTMLEntities.new.encode(x)}</mo>" }
|
17
|
+
@dimensions_id = read_yaml("../unitsdb/dimensions.yaml").each_with_object({}) do |(k, v), m|
|
18
|
+
m[k.to_s] = UnitsDB::Dimension.new(k, v)
|
36
19
|
end
|
37
|
-
|
38
|
-
|
39
|
-
def units_only(units)
|
40
|
-
units.reject { |u| u[:multiplier] }
|
41
|
-
end
|
42
|
-
|
43
|
-
def unit_id(text)
|
44
|
-
text = text.gsub(/[()]/, "")
|
45
|
-
"U_" +
|
46
|
-
(@units[text.to_sym] ? @units[text.to_sym][:id] : text.gsub(/\*/, ".").gsub(/\^/, ""))
|
47
|
-
end
|
48
|
-
|
49
|
-
def unit(units, origtext, normtext, dims)
|
50
|
-
dimid = dim_id(dims)
|
51
|
-
<<~END
|
52
|
-
<Unit xmlns='#{UNITSML_NS}' xml:id='#{unit_id(origtext)}'#{dimid ? " dimensionURL='##{dimid}'" : ""}>
|
53
|
-
#{unitsystem(units)}
|
54
|
-
#{unitname(units, normtext)}
|
55
|
-
#{unitsymbol(units)}
|
56
|
-
#{rootunits(units)}
|
57
|
-
</Unit>
|
58
|
-
END
|
59
|
-
end
|
60
|
-
|
61
|
-
def unitsystem(units)
|
62
|
-
ret = []
|
63
|
-
units = units_only(units)
|
64
|
-
units.any? { |x| @units[x[:unit].to_sym][:si] != true } and
|
65
|
-
ret << "<UnitSystem name='not_SI' type='not_SI' xml:lang='en-US'/>"
|
66
|
-
if units.any? { |x| @units[x[:unit].to_sym][:si] == true }
|
67
|
-
base = units.size == 1 && @units[units[0][:unit].to_sym][:type].include?("si-base")
|
68
|
-
ret << "<UnitSystem name='SI' type='#{base ? "SI_base" : "SI_derived"}' xml:lang='en-US'/>"
|
20
|
+
@prefixes_id = read_yaml("../unitsdb/prefixes.yaml").each_with_object({}) do |(k, v), m|
|
21
|
+
m[k] = UnitsDB::Prefix.new(k, v)
|
69
22
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
def unitsymbol(units)
|
84
|
-
<<~END
|
85
|
-
<UnitSymbol type="HTML">#{htmlsymbol(units)}</UnitSymbol>
|
86
|
-
<UnitSymbol type="MathML">#{mathmlsymbolwrap(units)}</UnitSymbol>
|
87
|
-
END
|
88
|
-
end
|
89
|
-
|
90
|
-
def render(unit)
|
91
|
-
#require "byebug"; byebug if unit == "degC"
|
92
|
-
@units[unit.to_sym][:render] || unit
|
93
|
-
end
|
94
|
-
|
95
|
-
def htmlsymbol(units)
|
96
|
-
units.map do |u|
|
97
|
-
if u[:multiplier] then u[:multiplier] == "*" ? @multiplier[:html] : u[:multiplier]
|
98
|
-
else
|
99
|
-
u[:display_exponent] and exp = "<sup>#{u[:display_exponent].sub(/-/, "−")}</sup>"
|
100
|
-
"#{u[:prefix]}#{render(u[:unit])}#{exp}"
|
101
|
-
end
|
102
|
-
end.join("")
|
103
|
-
end
|
104
|
-
|
105
|
-
def mathmlsymbol(units)
|
106
|
-
exp = units.map do |u|
|
107
|
-
if u[:multiplier] then u[:multiplier] == "*" ? @multiplier[:mathml] : "<mo>#{u[:multiplier]}</mo>"
|
108
|
-
else
|
109
|
-
base = "<mi mathvariant='normal'>#{u[:prefix]}#{render(u[:unit])}</mi>"
|
110
|
-
if u[:display_exponent]
|
111
|
-
exp = "<mn>#{u[:display_exponent]}</mn>".sub(/<mn>-/, "<mo>−</mo><mn>")
|
112
|
-
"<msup><mrow>#{base}</mrow><mrow>#{exp}</mrow></msup>"
|
113
|
-
else
|
114
|
-
base
|
115
|
-
end
|
116
|
-
end
|
117
|
-
end.join("")
|
118
|
-
end
|
119
|
-
|
120
|
-
def mathmlsymbolwrap(units)
|
121
|
-
<<~END
|
122
|
-
<math xmlns='#{MATHML_NS}'>
|
123
|
-
<mrow>#{mathmlsymbol(units)}</mrow>
|
124
|
-
</math>
|
125
|
-
END
|
126
|
-
end
|
127
|
-
|
128
|
-
def rootunits(units)
|
129
|
-
return if units.size == 1
|
130
|
-
exp = units_only(units).map do |u|
|
131
|
-
prefix = " prefix='#{u[:prefix]}'" if u[:prefix]
|
132
|
-
exponent = " powerNumerator='#{u[:exponent]}'" if u[:exponent] && u[:exponent] != "1"
|
133
|
-
"<EnumeratedRootUnit unit='#{@units[u[:unit].to_sym][:name]}'#{prefix}#{exponent}/>"
|
134
|
-
end.join("\n")
|
135
|
-
<<~END
|
136
|
-
<RootUnits>#{exp}</RootUnits>
|
137
|
-
END
|
23
|
+
@prefixes = flip_name_and_symbol(@prefixes_id)
|
24
|
+
@quantities = read_yaml("../unitsdb/quantities.yaml").each_with_object({}) do |(k, v), m|
|
25
|
+
m[k.to_s] = UnitsDB::Quantity.new(k, v)
|
26
|
+
end
|
27
|
+
@units_id = read_yaml("../unitsdb/units.yaml").each_with_object({}) do |(k, v), m|
|
28
|
+
m[k.to_s] = UnitsDB::Unit.new(k.to_s, v)
|
29
|
+
end
|
30
|
+
@units = flip_name_and_symbols(@units_id)
|
31
|
+
@symbols = @units.each_with_object({}) do |(k, v), m|
|
32
|
+
v.symbolids.each { |x| m[x] = v.symbols_hash[x] }
|
33
|
+
end
|
34
|
+
@parser = parser
|
35
|
+
@multiplier = multiplier(options[:multiplier] || "\u00b7")
|
138
36
|
end
|
139
37
|
|
140
38
|
def prefix(units)
|
141
|
-
units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |
|
142
|
-
p = p1.to_sym
|
39
|
+
units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p|
|
143
40
|
<<~END
|
144
|
-
<Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p]
|
145
|
-
prefixPower='#{@prefixes[p]
|
146
|
-
<PrefixName xml:lang="en">#{@prefixes[p]
|
147
|
-
<PrefixSymbol type="ASCII">#{@prefixes[p]
|
41
|
+
<Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p].base}'
|
42
|
+
prefixPower='#{@prefixes[p].power}' xml:id='#{@prefixes[p].id}'>
|
43
|
+
<PrefixName xml:lang="en">#{@prefixes[p].name}</PrefixName>
|
44
|
+
<PrefixSymbol type="ASCII">#{@prefixes[p].ascii}</PrefixSymbol>
|
45
|
+
<PrefixSymbol type="unicode">#{@prefixes[p].unicode}</PrefixSymbol>
|
46
|
+
<PrefixSymbol type="LaTeX">#{@prefixes[p].latex}</PrefixSymbol>
|
47
|
+
<PrefixSymbol type="HTML">#{HTMLEntities.new.encode(@prefixes[p].html, :basic)}</PrefixSymbol>
|
148
48
|
</Prefix>
|
149
49
|
END
|
150
50
|
end.join("\n")
|
151
51
|
end
|
152
52
|
|
153
|
-
def
|
53
|
+
def dimension_components(dims)
|
154
54
|
return if dims.nil? || dims.empty?
|
155
55
|
<<~END
|
156
56
|
<Dimension xmlns='#{UNITSML_NS}' xml:id="#{dim_id(dims)}">
|
@@ -160,7 +60,7 @@ module Asciimath2UnitsML
|
|
160
60
|
end
|
161
61
|
|
162
62
|
def units2dimensions(units)
|
163
|
-
norm =
|
63
|
+
norm = decompose_units(units)
|
164
64
|
return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" }
|
165
65
|
norm.map do |u|
|
166
66
|
{ dimension: U2D[u[:unit]][:dimension],
|
@@ -176,11 +76,16 @@ module Asciimath2UnitsML
|
|
176
76
|
|
177
77
|
def dim_id(dims)
|
178
78
|
return nil if dims.nil? || dims.empty?
|
79
|
+
dimhash = dims.each_with_object({}) { |h, m| m[h[:dimension]] = h }
|
80
|
+
dimsvector = %w(Length Mass Time ElectricCurrent ThermodynamicTemperature
|
81
|
+
AmountOfSubstance LuminousIntensity PlaneAngle)
|
82
|
+
.map { |h| dimhash.dig(h, :exponent) }.join(":")
|
83
|
+
id = @dimensions_id&.values&.select { |d| d.vector == dimsvector }&.first&.id and return id.to_s
|
179
84
|
"D_" + dims.map { |d| U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : d[:exponent].to_s) }.join("")
|
180
85
|
end
|
181
86
|
|
182
|
-
def
|
183
|
-
gather_units(units_only(units).map { |u|
|
87
|
+
def decompose_units(units)
|
88
|
+
gather_units(units_only(units).map { |u| decompose_unit(u) }.flatten)
|
184
89
|
end
|
185
90
|
|
186
91
|
def gather_units(units)
|
@@ -194,36 +99,79 @@ module Asciimath2UnitsML
|
|
194
99
|
end
|
195
100
|
end
|
196
101
|
|
197
|
-
|
198
|
-
|
199
|
-
|
102
|
+
# treat g not kg as base unit: we have stripped the prefix k in parsing
|
103
|
+
# reduce units down to basic units
|
104
|
+
def decompose_unit(u)
|
105
|
+
if u[:unit] == "g" then u
|
106
|
+
elsif @units[u[:unit]].system_type == "SI_base" then u
|
107
|
+
elsif !@units[u[:unit]].si_derived_bases
|
108
|
+
{ prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
|
200
109
|
else
|
201
|
-
@units[u[:unit]
|
202
|
-
m << { prefix: k[
|
203
|
-
combine_prefixes(@prefixes_id[k[
|
204
|
-
unit: @units_id[k[
|
205
|
-
exponent: (k[
|
110
|
+
@units[u[:unit]].si_derived_bases.each_with_object([]) do |k, m|
|
111
|
+
m << { prefix: !k[:prefix].nil? && !k[:prefix].empty? ?
|
112
|
+
combine_prefixes(@prefixes_id[k[:prefix]], @prefixes[u[:prefix]]) : u[:prefix],
|
113
|
+
unit: @units_id[k[:id]].symbolid,
|
114
|
+
exponent: (k[:power]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
|
206
115
|
end
|
207
116
|
end
|
208
117
|
end
|
209
118
|
|
210
119
|
def combine_prefixes(p1, p2)
|
211
120
|
return nil if p1.nil? && p2.nil?
|
212
|
-
return p1
|
213
|
-
return p2
|
214
|
-
return "unknown" if p1
|
121
|
+
return p1.symbolid if p2.nil?
|
122
|
+
return p2.symbolid if p1.nil?
|
123
|
+
return "unknown" if p1.base != p2.base
|
215
124
|
@prefixes.each do |p|
|
216
|
-
return p
|
125
|
+
return p.symbolid if p.base == p1.base && p.power == p1.power + p2.power
|
217
126
|
end
|
218
127
|
"unknown"
|
219
128
|
end
|
220
129
|
|
221
|
-
def
|
130
|
+
def quantityname(id)
|
131
|
+
ret = ""
|
132
|
+
@quantities[id].names.each do |q|
|
133
|
+
ret += %(<QuantityName xml:lang="en-US">#{q}</QuantityName>)
|
134
|
+
end
|
135
|
+
ret
|
136
|
+
end
|
137
|
+
|
138
|
+
def quantity(normtext, quantity)
|
139
|
+
return unless @units[normtext] && @units[normtext].quantities.size == 1 || @quantities[quantity]
|
140
|
+
id = quantity || @units[normtext].quantities.first
|
141
|
+
dim = %( dimensionURL="##{@units[normtext].dimension}") if @units[normtext]&.dimension
|
142
|
+
<<~END
|
143
|
+
<Quantity xmlns='#{UNITSML_NS}' xml:id="#{id}"#{dim} quantityType="base">
|
144
|
+
#{quantityname(id)}
|
145
|
+
</Quantity>
|
146
|
+
END
|
147
|
+
end
|
148
|
+
|
149
|
+
def dimid2dimensions(normtext)
|
150
|
+
@dimensions_id[normtext].keys.map do |k|
|
151
|
+
{ dimension: k,
|
152
|
+
symbol: U2D.values.select { |v| v[:dimension] == k }.first[:symbol],
|
153
|
+
exponent: @dimensions_id[normtext].exponent(k) }
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def dimension(normtext)
|
158
|
+
return unless @units[normtext]&.dimension
|
159
|
+
dims = dimid2dimensions(@units[normtext]&.dimension)
|
160
|
+
<<~END
|
161
|
+
<Dimension xmlns='#{UNITSML_NS}' xml:id="#{@units[normtext]&.dimension}">
|
162
|
+
#{dims.map { |u| dimension1(u) }.join("\n") }
|
163
|
+
</Dimension>
|
164
|
+
END
|
165
|
+
end
|
166
|
+
|
167
|
+
def unitsml(units, origtext, normtext, quantity, name)
|
222
168
|
dims = units2dimensions(units)
|
223
169
|
<<~END
|
224
|
-
#{unit(units, origtext, normtext, dims)}
|
170
|
+
#{unit(units, origtext, normtext, dims, name)}
|
225
171
|
#{prefix(units)}
|
226
|
-
#{dimension(
|
172
|
+
#{dimension(normtext)}
|
173
|
+
#{dimension_components(dims)}
|
174
|
+
#{quantity(normtext, quantity)}
|
227
175
|
END
|
228
176
|
end
|
229
177
|
end
|