asciimath2unitsml 0.0.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2ef44eb717d6b445489de85ee1e8eb3f5b81a6d83602c48ea23a8cbe4f100c7
4
- data.tar.gz: 84b867f5a97b3ad154c7be8e5975ca4c97446049c4368ece6ae5e23da3c94437
3
+ metadata.gz: ca1751cf852b77599154aa142ea719f0510210ddebb667ace7225cb94f7d598f
4
+ data.tar.gz: 332e23b81a688d1386a4f084dc3db1f401e1e7634beb70c17466e18a23d1f51a
5
5
  SHA512:
6
- metadata.gz: b749aa65924f4b815a7d38d2df9c9d6b48e4cd8f9db62678c100d732a60c8159b537fa2c3bd192296f533371f731ec85365f0d7f231f560f173d167971c38871
7
- data.tar.gz: 64a47773ef26b6b870fc6a4406b697ec10b943e637a82f549946d8126830db9a521f3bdcb4e314718d084e40b7cfacbc820417476cd98663760ec8de29ba566e
6
+ metadata.gz: f300b5a9c8af814f4c61ff9be4407f45350fedfc76a7c9b3e4da90a793fcf5cf7004b24e5719a562cc2cb355bb8be355ee4d1e23909f6bbbd4060a97affcd7de
7
+ data.tar.gz: abeedd889b14c45eeeb90a2f56fc5a09de04ca54f7b15f39677ad516fda17426dbd47977fdc71199ed77424639dd4c86a40164672dd81aad92fd512f872b1723
@@ -0,0 +1,53 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: rake
4
+
5
+ on:
6
+ push:
7
+ branches: [ master, main ]
8
+ tags: [ v* ]
9
+ pull_request:
10
+
11
+ jobs:
12
+ rake:
13
+ name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
+ runs-on: ${{ matrix.os }}
15
+ continue-on-error: ${{ matrix.experimental }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ ruby: [ '2.7', '2.6', '2.5', '2.4' ]
20
+ os: [ ubuntu-latest, windows-latest, macos-latest ]
21
+ experimental: [ false ]
22
+ include:
23
+ - ruby: '3.0'
24
+ os: 'ubuntu-latest'
25
+ experimental: true
26
+ - ruby: '3.0'
27
+ os: 'windows-latest'
28
+ experimental: true
29
+ - ruby: '3.0'
30
+ os: 'macos-latest'
31
+ experimental: true
32
+ steps:
33
+ - uses: actions/checkout@master
34
+ with:
35
+ submodules: true
36
+
37
+ - uses: ruby/setup-ruby@v1
38
+ with:
39
+ ruby-version: ${{ matrix.ruby }}
40
+ bundler-cache: true
41
+
42
+ - run: bundle exec rake
43
+
44
+ tests-passed:
45
+ needs: rake
46
+ runs-on: ubuntu-latest
47
+ steps:
48
+ - uses: peter-evans/repository-dispatch@v1
49
+ with:
50
+ token: ${{ secrets.METANORMA_CI_PAT_TOKEN || secrets.GITHUB_TOKEN }}
51
+ repository: ${{ github.repository }}
52
+ event-type: notify
53
+ client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}"}'
data/.gitmodules ADDED
@@ -0,0 +1,4 @@
1
+ [submodule "lib/unitsdb"]
2
+ branch = master
3
+ path = lib/unitsdb
4
+ url = https://github.com/unitsml/unitsdb.git
data/README.adoc CHANGED
@@ -1,21 +1,60 @@
1
1
  = asciimath2unitsml
2
- Convert Asciimath via MathML to UnitsML
2
+ Convert Units expressions via MathML to UnitsML
3
3
 
4
- Encode UnitsML expressions in AsciiMath as `"unitsml(...)"`. The gem converts
5
- AsciiMath incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
4
+ This gem converts
5
+ MathML incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
6
6
  into MathML complying with https://www.w3.org/TR/mathml-units/[], with
7
- UnitsML markup embedded in it, with identifiers for each unit and dimension.
7
+ UnitsML markup embedded in it, and with unique identifiers for each distinct unit, prefix, and dimension.
8
+ Dimensions are automatically inserted corresponding to each unit.
9
+ Units expressions are identified in MathML as `<mtext>unitsml(...)</mtext>`, which in turn
10
+ can be identified in AsciiMath as `"unitsml(...)"`.
11
+
8
12
  The consuming document is meant to deduplicate the instances of UnitsML markup
9
13
  with the same identifier, and potentially remove them to elsewhere in the document
10
14
  or another document.
11
15
 
12
- The AsciiMath conventions used are:
16
+ == Notation
17
+
18
+ The `unitsml()` expression consists of a unit string.
19
+ The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
20
+ https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
21
+ multiplication or division of single units, each of which is defined as a Prefix
22
+ (taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
23
+ unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
24
+ and exponent; e.g. `mm*s^-2`.
25
+
26
+ The conventions used for writing units are:
13
27
 
14
28
  * `^` for exponents, e.g. `m^-2`
15
- * `*` to combine two units by multiplication; e.g. `m*s^-2`. Division is not supported, use negative exponents instead
29
+ * `*` to combine two units by multiplication; e.g. `m*s^-2`.
30
+ * `/` to combine two units by division;
16
31
  * `u` for μ (micro-)
17
32
 
18
- So
33
+ For more on units notation, see <<units_notation,Units Notation>>.
34
+
35
+ The `unitsml()` can take additional optional parameters, giving further information for the UnitsML
36
+ to be generated:
37
+
38
+ * `unitsml(unit-string, quantity: ID)` provides the UnitsDB identifier for the quantity being measured
39
+ (taken from https://github.com/unitsml/unitsdb/blob/master/quantities.yaml[]). For example,
40
+ `unitsml(s, quantity: NISTq109)` indicates that the second is used to measure period duration.
41
+ If a single quantity is associated with the unit in UnitsDB (as given in
42
+ https://github.com/unitsml/unitsdb/blob/master/units.yaml[]), that quantity is added automatically;
43
+ otherwise, no quantity is added unless explicitly nominated in this way.
44
+ * `unitsml(unit-string, name: NAME)` provides a name for the unit, if one is not already available
45
+ from UnitsDB. For example, `unitsml(cal_th/cm^2, name: langley)`.
46
+
47
+ == Rendering
48
+
49
+ The output of the gem is MathML, with MathML unit expressions (expressed as `<mi>`,
50
+ complying with https://www.w3.org/TR/mathml-units/[MathML Units]) cross-referenced to UnitsML
51
+ definitions embedded in the MathML.
52
+
53
+ The gem follows the MathML Units convention of inserting a spacing invisible times operator
54
+ (`<mo rspace='thickmathspace'>&#x2062;</mo>`) between any numbers (`<mn>`) and unit expressions
55
+ in MathML, and representing units in MathML as non-italic variables (`<mi mathvariant='normal'>`).
56
+
57
+ == Example
19
58
 
20
59
  [source]
21
60
  ----
@@ -77,4 +116,129 @@ is converted into:
77
116
  </math>
78
117
  ----
79
118
 
119
+ == Usage
120
+
121
+ The converter is run as:
122
+
123
+ [source,ruby]
124
+ ----
125
+ c = Asciimath2UnitsML::Conv.new()
126
+ c.Asciimath2UnitsML(1 "unitsml(mm*s^-2)") # AsciiMath string containing UnitsML
127
+ c.MathML2UnitsML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
128
+ "<mtext>unitsml(kg^-2)</mtext></math>") # AsciiMath string containing <mtext>unitsml()</mtext>
129
+ c.MathML2UnitsML(Nokogiri::XML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
130
+ "<mtext>unitsml(kg^-2)</mtext></math>")) # Nokogiri parse of MathML document containing <mtext>unitsml()</mtext>
131
+ ----
132
+
133
+ The converter class may be initialised with options:
134
+
135
+ * `multiplier` is the symbol used to represent the multiplication of units. By default,
136
+ following MathML Units, the symbol is middle dot (`&#xB7`). An arbitrary UTF-8 string can be
137
+ supplied instead; it will be encoded as XML entities. The value `:space` is rendered
138
+ as a spacing invisible times in MathML (`<mo rspace='thickmathspace'>&#x2062;</mo>`),
139
+ and as a non-breaking space in HTML. The value `:nospace` is rendered as a non-spacing
140
+ invisible times in MathML (`<mo>&#x2062;</mo>`), and is not rendered in HTML.
141
+
142
+ [[units_notation]]
143
+ == Units Notation
144
+
145
+ The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
146
+ https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
147
+ multiplication or division of single units, each of which is defined as a Prefix
148
+ (taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
149
+ unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
150
+ and exponent; e.g. `mm*s^-2`.
151
+
152
+ In case of ambiguity, the interpretation with no prefix is prioritised over the interpretation
153
+ as a unit; so `ct` is interpreted as _hundredweight_, rather than _centi-ton_. Exceptionally,
154
+ `kg` is decomposed into kilo-gram rather than treated as a basic unit, for consistency with
155
+ other prefixes of grams. (Prefixed units appear in UnitsDB, and are indicated as `prefixed: true`.)
156
+
157
+ A unit may have multiple symbols; these are registered separately in
158
+ https://github.com/unitsml/unitsdb/units.yaml[units.yaml], as entries under `unit_symbols`.
159
+ These different symbols will be recognised as the same Unit in the UnitsML markup, but
160
+ the original symbol will be retained in the MathML expression. So an expression like `1 unitsml(mL)`
161
+ will be recognised as referring to microlitres; the expression will be given under its canonical
162
+ rendering `ml` in UnitsML markup, but the MathML rendering referencing that UnitsML expression
163
+ will keep the notation `mL`.
164
+
165
+ The symbols used for units can be highly ambiguous; in order to guarantee accurate parsing,
166
+ the symbols used to data enter units are unambiguous in https://github.com/unitsml/unitsdb/units.yaml[units.yaml].
167
+ They may be found as the entries for `unit_symbols/id` under each unit. For example, `B` is ambiguous between
168
+ _bel_ (as in decibel) and _byte_; they are kept unambiguous by using `bel_B` and `byte_B` to refer to them,
169
+ although they will still both be rendered as `B`.
170
+
171
+ The following table is the current list of ambiguous symbols, which are disambiguated in the symbol ids used.
172
+ This table can be generated (in Asciidoc format) through `Asciimath2UnitsML::Conv.new().ambig_units`:
173
+
174
+ [cols="7*"]
175
+ |===
176
+ |Symbol | Unit + ID | | | | |
177
+
80
178
 
179
+ | &#8242; | minute (minute of arc): `'` | foot: `'_ft` | minute: `'_min` | minute (minute of arc): `prime` | foot: `prime_ft` | minute: `prime_min`
180
+ | &#8243; | second (second of arc): `"` | second: `"_s` | inch: `"_in` | second (second of arc): `dprime` | second: `dprime_s` | inch: `dprime_in`
181
+ | &#8243;Hg | conventional inch of mercury: `"Hg` | conventional inch of mercury: `dprime_Hg` | inch of mercury (32 degF): `"Hg_32degF` | inch of mercury (60 degF): `"Hg_60degF` | inch of mercury (32 degF): `dprime_Hg_32degF` | inch of mercury (60 degF): `dprime_Hg_60degF`
182
+ | hp | horsepower: `hp` | horsepower (UK): `hp_UK` | horsepower, water: `hp_water` | horsepower, metric: `hp_metric` | horsepower, boiler: `hp_boiler` | horsepower, electric: `hp_electric`
183
+ | Btu | British thermal unit_IT: `Btu` | British thermal unit (mean): `Btu_mean` | British thermal unit (39 degF): `Btu_39degF` | British thermal unit (59 degF): `Btu_59degF` | British thermal unit (60 degF): `Btu_60degF` |
184
+ | a | are: `a` | year (365 days): `a_year` | year, tropical: `a_tropical_year` | year, sidereal: `a_sidereal_year` | |
185
+ | d | day: `d` | darcy: `darcy` | day, sidereal: `d_sidereal` | | |
186
+ | inHg | conventional inch of mercury: `inHg` | inch of mercury (32 degF): `inHg_32degF` | inch of mercury (60 degF): `inHg_60degF` | | |
187
+ | inH~2~O | conventional inch of water: `inH_2O` | inch of water (39.2 degF): `inH_2O_39degF` | inch of water (60 degF): `inH_2O_60degF` | | |
188
+ | min | minute: `min` | minim: `minim` | minute, sidereal: `min_sidereal` | | |
189
+ | pc | parsec: `pc` | pica (printer's): `pica_printer` | pica (computer): `pica_computer` | | |
190
+ | t | metric ton: `t` | long ton: `ton_long` | short ton: `ton_short` | | |
191
+ | B | bel: `bel_B` | byte: `byte_B` | | | |
192
+ | cmHg | conventional centimeter of mercury: `cmHg` | centimeter of mercury (0 degC): `cmHg_0degC` | | | |
193
+ | cmH~2~O | conventional centimeter of water: `cmH_2O` | centimeter of water (4 degC): `cmH_2O_4degC` | | | |
194
+ | cup | cup (US): `cup` | cup (FDA): `cup_label` | | | |
195
+ | D | debye: `D` | darcy: `Darcy` | | | |
196
+ | ft | foot: `ft` | foot (based on US survey foot): `ft_US_survey` | | | |
197
+ | ftH~2~O | conventional foot of water: `ftH_2O` | foot of water (39.2 degF): `ftH_2O_39degF` | | | |
198
+ | gi | gill (US): `gi` | gill [Canadian and UK (Imperial)]: `gi_imperial` | | | |
199
+ | h | hour: `h` | hour, sidereal: `h_sidereal` | | | |
200
+ | &#8242;Hg | conventional foot of mercury: `'Hg` | conventional foot of mercury: `prime_Hg` | | | |
201
+ | __&#295;__ | natural unit of action: `h-bar` | atomic unit of action: `h-bar_atomic` | | | |
202
+ | __m__~e~ | natural unit of mass: `m_e` | atomic unit of mass: `m_e_atomic` | | | |
203
+ | in | inch: `in` | inch (based on US survey foot): `in_US_survey` | | | |
204
+ | K | kelvin: `K` | kayser: `kayser` | | | |
205
+ | L | liter: `L` | lambert: `Lambert` | | | |
206
+ | lb | pound (avoirdupois): `lb` | pound (troy or apothecary): `lb_troy` | | | |
207
+ | mi | mile: `mi` | mile (based on US survey foot): `mi_US_survey` | | | |
208
+ | mil | mil (length): `mil` | angular mil (NATO): `mil_nato` | | | |
209
+ | oz | ounce (avoirdupois): `oz` | ounce (troy or apothecary): `oz_troy` | | | |
210
+ | pt | point (printer's): `pt_printer` | point (computer): `pt_computer` | | | |
211
+ | rad | radian: `rad` | rad (absorbed dose): `rad_radiation` | | | |
212
+ | s | second: `s` | second, sidereal: `s_sidereal` | | | |
213
+ | tbsp | tablespoon: `tbsp` | tablespoon (FDA): `tbsp_label` | | | |
214
+ | ton | ton of TNT (energy equivalent): `ton_TNT` | ton of refrigeration (12 000 Btu_IT/h): `ton_refrigeration` | | | |
215
+ | tsp | teaspoon: `tsp` | teaspoon (FDA): `tsp_label` | | | |
216
+ | yd | yard: `yd` | yard (based on US survey foot): `yd_US_survey` | | | |
217
+ | &#186; | degree (degree of arc): `deg` | | | | |
218
+ | &#947; | gamma: `gamma` | | | | |
219
+ | &#956; | micron: `micron` | | | | |
220
+ | &#8486; | ohm: `Ohm` | | | | |
221
+ | &#197; | angstrom: `Aring` | | | | |
222
+ | &#295; | natural unit of action in eV s: `h-bar_eV_s` | | | | |
223
+ | ab&#937; | abohm: `abohm` | | | | |
224
+ | (ab&#937;)^-1^ | abmho: `abS` | | | | |
225
+ | aW | abwatt: `aW (Cardelli)` | | | | |
226
+ | b | barn: `barn` | | | | |
227
+ | Btu~th~ | British thermal unit_th: `Btu_th` | | | | |
228
+ | &#176;C | degree Celsius: `degC` | | | | |
229
+ | cal~IT~ | I.T. calorie: `cal_IT` | | | | |
230
+ | cal~th~ | thermochemical calorie: `cal_th` | | | | |
231
+ | &#176;F | degree Fahrenheit: `degF` | | | | |
232
+ | __a__~0~ | atomic unit of length: `a_0` | | | | |
233
+ | __c__ | natural unit of velocity: `c` | | | | |
234
+ | __c__~0~ | natural unit of velocity: `c_0` | | | | |
235
+ | __e__ | atomic unit of charge: `e` | | | | |
236
+ | __E__~h~ | atomic unit of energy: `e_h` | | | | |
237
+ | &#956;in | microinch: `uin` | | | | |
238
+ | &#176;K | kelvin: `degK` | | | | |
239
+ | kcal~IT~ | kilocalorie_IT: `kcal_IT` | | | | |
240
+ | kcal~th~ | kilocalorie_th: `kcal_th` | | | | |
241
+ | mmH~2~O | conventional millimeter of water: `mmH_2O` | | | | |
242
+ | &#176;R | degree Rankine: `degR` | | | | |
243
+ | &#x19b;~C~ | natural unit of length: `lambda-bar_C` | | | | |
244
+ |===
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
-
2
+
3
3
  lib = File.expand_path("../lib", __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "asciimath2unitsml/version"
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
 
13
13
  spec.summary = "Convert Asciimath via MathML to UnitsML"
14
14
  spec.description = <<~DESCRIPTION
15
- Convert Asciimath via MathML to UnitsML
15
+ Convert Asciimath via MathML to UnitsML
16
16
  DESCRIPTION
17
17
 
18
18
  spec.homepage = "https://github.com/plurimath/asciimath2unitsml"
@@ -24,6 +24,27 @@ Gem::Specification.new do |spec|
24
24
  spec.test_files = `git ls-files -- {spec}/*`.split("\n")
25
25
  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
26
26
 
27
+ # get an array of submodule dirs relatively to root repo
28
+ `git config --file .gitmodules --get-regexp '\\.path$'`
29
+ .split("\n")
30
+ .map { |kv_str| kv_str.split(" ") }
31
+ .each do |(_, submodule_path)|
32
+
33
+ # for each submodule, change working directory to that submodule
34
+ Dir.chdir(submodule_path) do
35
+
36
+ # issue git ls-files in submodule's directory
37
+ submodule_files = `git ls-files | grep -i '.yaml$'`.split($\)
38
+
39
+ submodule_files_paths = submodule_files.map do |filename|
40
+ File.join submodule_path, filename
41
+ end
42
+
43
+ # add relative paths to gem.files
44
+ spec.files += submodule_files_paths
45
+ end
46
+ end
47
+
27
48
  spec.add_dependency "asciimath"
28
49
  spec.add_dependency "htmlentities"
29
50
  spec.add_dependency "nokogiri", "~> 1.10.4"
@@ -39,5 +60,6 @@ Gem::Specification.new do |spec|
39
60
  spec.add_development_dependency "rubocop", "= 0.54.0"
40
61
  spec.add_development_dependency "simplecov", "~> 0.15"
41
62
  spec.add_development_dependency "timecop", "~> 0.9"
63
+ spec.add_development_dependency "rexml"
42
64
  end
43
65
 
@@ -1,3 +1,4 @@
1
1
  require_relative "asciimath2unitsml/version"
2
2
  require_relative "asciimath2unitsml/conv"
3
+ require_relative "unitsdb_ruby/unitsdb"
3
4
 
@@ -5,142 +5,52 @@ require "yaml"
5
5
  require "rsec"
6
6
  require_relative "string"
7
7
  require_relative "parse"
8
+ require_relative "render"
9
+ require_relative "unit"
8
10
 
9
11
  module Asciimath2UnitsML
10
12
  MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
11
13
  UNITSML_NS = "http://unitsml.nist.gov/2005".freeze
12
14
 
13
15
  class Conv
14
- def initialize
15
- @prefixes_id = read_yaml("../unitsdb/prefixes.yaml")
16
- @prefixes = flip_name_and_id(@prefixes_id)
17
- @quantities = read_yaml("../unitsdb/quantities.yaml")
18
- @units_id = read_yaml("../unitsdb/units.yaml")
19
- @units = flip_name_and_id(@units_id)
20
- @parser = parser
21
- end
22
-
23
- # https://www.w3.org/TR/mathml-units/ section 2: delimit number Invisible-Times unit
24
- def Asciimath2UnitsML(expression)
25
- xml = Nokogiri::XML(asciimath2mathml(expression))
26
- MathML2UnitsML(xml).to_xml
27
- end
28
-
29
- def MathML2UnitsML(xml)
30
- xml.xpath(".//m:mtext", "m" => MATHML_NS).each do |x|
31
- next unless %r{^unitsml\(.+\)$}.match(x.text)
32
- text = x.text.sub(%r{^unitsml\((.+)\)$}m, "\\1")
33
- units = parse(text)
34
- delim = x&.previous_element&.name == "mn" ? "<mo rspace='thickmathspace'>&#x2062;</mo>" : ""
35
- x.replace("#{delim}<mrow xref='#{unit_id(text)}'>#{mathmlsymbol(units)}</mrow>\n#{unitsml(units, text)}")
16
+ def initialize(options = {})
17
+ @dimensions_id = read_yaml("../unitsdb/dimensions.yaml").each_with_object({}) do |(k, v), m|
18
+ m[k.to_s] = UnitsDB::Dimension.new(k, v)
36
19
  end
37
- xml
38
- end
39
-
40
- def asciimath2mathml(expression)
41
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
42
- AsciiMath.parse(HTMLEntities.new.decode(expression)).ast).to_s.
43
- gsub(/<math>/, "<math xmlns='#{MATHML_NS}'>")
44
- end
45
-
46
- def unit_id(text)
47
- "U_" +
48
- (@units[text.to_sym] ? @units[text.to_sym][:id] : text.gsub(/\*/, ".").gsub(/\^/, ""))
49
- end
50
-
51
- def unit(units, text, dims)
52
- dimid = dim_id(dims)
53
- <<~END
54
- <Unit xmlns='#{UNITSML_NS}' xml:id='#{unit_id(text)}'#{dimid ? " dimensionURL='##{dimid}'" : ""}>
55
- #{unitsystem(units)}
56
- #{unitname(units, text)}
57
- #{unitsymbol(units)}
58
- #{rootunits(units)}
59
- </Unit>
60
- END
61
- end
62
-
63
- def unitsystem(units)
64
- ret = []
65
- units.any? { |x| @units[x[:unit].to_sym][:si] != true } and
66
- ret << "<UnitSystem name='not_SI' type='not_SI' xml:lang='en-US'/>"
67
- if units.any? { |x| @units[x[:unit].to_sym][:si] == true }
68
- base = units.size == 1 && @units[units[0][:unit].to_sym][:type].include?("si-base")
69
- ret << "<UnitSystem name='SI' type='#{base ? "SI_base" : "SI_derived"}' xml:lang='en-US'/>"
20
+ @prefixes_id = read_yaml("../unitsdb/prefixes.yaml").each_with_object({}) do |(k, v), m|
21
+ m[k] = UnitsDB::Prefix.new(k, v)
70
22
  end
71
- ret.join("\n")
72
- end
73
-
74
- def unitname(units, text)
75
- name = @units[text.to_sym] ? @units[text.to_sym][:name] : compose_name(units, text)
76
- "<UnitName xml:lang='en'>#{name}</UnitName>"
77
- end
78
-
79
- # TODO: compose name from the component units
80
- def compose_name(units, text)
81
- text
82
- end
83
-
84
- def unitsymbol(units)
85
- <<~END
86
- <UnitSymbol type="HTML">#{htmlsymbol(units)}</UnitSymbol>
87
- <UnitSymbol type="MathML">#{mathmlsymbolwrap(units)}</UnitSymbol>
88
- END
89
- end
90
-
91
- def htmlsymbol(units)
92
- units.map do |u|
93
- u[:exponent] and exp = "<sup>#{u[:exponent].sub(/-/, "&#x2212;")}</sup>"
94
- "#{u[:prefix]}#{u[:unit]}#{exp}"
95
- end.join(" &#183; ")
96
- end
97
-
98
- def mathmlsymbol(units)
99
- exp = units.map do |u|
100
- base = "<mi mathvariant='normal'>#{u[:prefix]}#{u[:unit]}</mi>"
101
- if u[:exponent]
102
- exp = "<mn>#{u[:exponent]}</mn>".sub(/<mn>-/, "<mo>&#x2212;</mo><mn>")
103
- "<msup><mrow>#{base}</mrow><mrow>#{exp}</mrow></msup>"
104
- else
105
- base
106
- end
107
- end.join("<mo>&#xB7;</mo>")
108
- end
109
-
110
- def mathmlsymbolwrap(units)
111
- <<~END
112
- <math xmlns='#{MATHML_NS}'>
113
- <mrow>#{mathmlsymbol(units)}</mrow>
114
- </math>
115
- END
116
- end
117
-
118
- def rootunits(units)
119
- return if units.size == 1
120
- exp = units.map do |u|
121
- prefix = " prefix='#{u[:prefix]}'" if u[:prefix]
122
- exponent = " powerNumerator='#{u[:exponent]}'" if u[:exponent]
123
- "<EnumeratedRootUnit unit='#{@units[u[:unit].to_sym][:name]}'#{prefix}#{exponent}/>"
124
- end.join("\n")
125
- <<~END
126
- <RootUnits>#{exp}</RootUnits>
127
- END
23
+ @prefixes = flip_name_and_symbol(@prefixes_id)
24
+ @quantities = read_yaml("../unitsdb/quantities.yaml").each_with_object({}) do |(k, v), m|
25
+ m[k.to_s] = UnitsDB::Quantity.new(k, v)
26
+ end
27
+ @units_id = read_yaml("../unitsdb/units.yaml").each_with_object({}) do |(k, v), m|
28
+ m[k.to_s] = UnitsDB::Unit.new(k.to_s, v)
29
+ end
30
+ @units = flip_name_and_symbols(@units_id)
31
+ @symbols = @units.each_with_object({}) do |(k, v), m|
32
+ v.symbolids.each { |x| m[x] = v.symbols_hash[x] }
33
+ end
34
+ @parser = parser
35
+ @multiplier = multiplier(options[:multiplier] || "\u00b7")
128
36
  end
129
37
 
130
38
  def prefix(units)
131
- units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p1|
132
- p = p1.to_sym
39
+ units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p|
133
40
  <<~END
134
- <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p][:base]}'
135
- prefixPower='#{@prefixes[p][:power]}' xml:id='#{@prefixes[p][:id]}'>
136
- <PrefixName xml:lang="en">#{@prefixes[p][:name]}</PrefixName>
137
- <PrefixSymbol type="ASCII">#{@prefixes[p][:symbol]}</PrefixSymbol>
41
+ <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p].base}'
42
+ prefixPower='#{@prefixes[p].power}' xml:id='#{@prefixes[p].id}'>
43
+ <PrefixName xml:lang="en">#{@prefixes[p].name}</PrefixName>
44
+ <PrefixSymbol type="ASCII">#{@prefixes[p].ascii}</PrefixSymbol>
45
+ <PrefixSymbol type="unicode">#{@prefixes[p].unicode}</PrefixSymbol>
46
+ <PrefixSymbol type="LaTeX">#{@prefixes[p].latex}</PrefixSymbol>
47
+ <PrefixSymbol type="HTML">#{HTMLEntities.new.encode(@prefixes[p].html, :basic)}</PrefixSymbol>
138
48
  </Prefix>
139
49
  END
140
50
  end.join("\n")
141
51
  end
142
52
 
143
- def dimension(dims)
53
+ def dimension_components(dims)
144
54
  return if dims.nil? || dims.empty?
145
55
  <<~END
146
56
  <Dimension xmlns='#{UNITSML_NS}' xml:id="#{dim_id(dims)}">
@@ -149,19 +59,8 @@ module Asciimath2UnitsML
149
59
  END
150
60
  end
151
61
 
152
- U2D = {
153
- "m" => { dimension: "Length", order: 1, symbol: "L" },
154
- "g" => { dimension: "Mass", order: 2, symbol: "M" },
155
- "kg" => { dimension: "Mass", order: 2, symbol: "M" },
156
- "s" => { dimension: "Time", order: 3, symbol: "T" },
157
- "A" => { dimension: "ElectricCurrent", order: 4, symbol: "I" },
158
- "K" => { dimension: "ThermodynamicTemperature", order: 5, symbol: "Theta" },
159
- "mol" => { dimension: "AmountOfSubstance", order: 6, symbol: "N" },
160
- "cd" => { dimension: "LuminousIntensity", order: 7, symbol: "J" },
161
- }
162
-
163
62
  def units2dimensions(units)
164
- norm = normalise_units(units)
63
+ norm = decompose_units(units)
165
64
  return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" }
166
65
  norm.map do |u|
167
66
  { dimension: U2D[u[:unit]][:dimension],
@@ -177,11 +76,16 @@ module Asciimath2UnitsML
177
76
 
178
77
  def dim_id(dims)
179
78
  return nil if dims.nil? || dims.empty?
79
+ dimhash = dims.each_with_object({}) { |h, m| m[h[:dimension]] = h }
80
+ dimsvector = %w(Length Mass Time ElectricCurrent ThermodynamicTemperature
81
+ AmountOfSubstance LuminousIntensity PlaneAngle)
82
+ .map { |h| dimhash.dig(h, :exponent) }.join(":")
83
+ id = @dimensions_id&.values&.select { |d| d.vector == dimsvector }&.first&.id and return id.to_s
180
84
  "D_" + dims.map { |d| U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : d[:exponent].to_s) }.join("")
181
85
  end
182
86
 
183
- def normalise_units(units)
184
- gather_units(units.map { |u| normalise_unit(u) }.flatten)
87
+ def decompose_units(units)
88
+ gather_units(units_only(units).map { |u| decompose_unit(u) }.flatten)
185
89
  end
186
90
 
187
91
  def gather_units(units)
@@ -195,45 +99,79 @@ module Asciimath2UnitsML
195
99
  end
196
100
  end
197
101
 
198
- def normalise_unit(u)
199
- if @units[u[:unit].to_sym][:type]&.include?("si-base") then u
200
- elsif !@units[u[:unit].to_sym][:bases] then { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
102
+ # treat g not kg as base unit: we have stripped the prefix k in parsing
103
+ # reduce units down to basic units
104
+ def decompose_unit(u)
105
+ if u[:unit] == "g" then u
106
+ elsif @units[u[:unit]].system_type == "SI_base" then u
107
+ elsif !@units[u[:unit]].si_derived_bases
108
+ { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
201
109
  else
202
- @units[u[:unit].to_sym][:bases].each_with_object([]) do |k, m|
203
- m << { prefix: k["prefix"] ?
204
- combine_prefixes(@prefixes_id[k["prefix"]], @prefixes[u[:prefix]]) : u[:prefix],
205
- unit: @units_id[k["id"].to_sym][:symbol],
206
- exponent: (k["power"]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
110
+ @units[u[:unit]].si_derived_bases.each_with_object([]) do |k, m|
111
+ m << { prefix: !k[:prefix].nil? && !k[:prefix].empty? ?
112
+ combine_prefixes(@prefixes_id[k[:prefix]], @prefixes[u[:prefix]]) : u[:prefix],
113
+ unit: @units_id[k[:id]].symbolid,
114
+ exponent: (k[:power]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
207
115
  end
208
116
  end
209
117
  end
210
118
 
211
119
  def combine_prefixes(p1, p2)
212
120
  return nil if p1.nil? && p2.nil?
213
- return p1[:symbol] if p2.nil?
214
- return p2[:symbol] if p1.nil?
215
- return "unknown" if p1[:base] != p2[:base]
121
+ return p1.symbolid if p2.nil?
122
+ return p2.symbolid if p1.nil?
123
+ return "unknown" if p1.base != p2.base
216
124
  @prefixes.each do |p|
217
- return p[:symbol] if p[:base] == p1[:base] && p[:power] == p1[:power] + p2[:power]
125
+ return p.symbolid if p.base == p1.base && p.power == p1.power + p2.power
218
126
  end
219
127
  "unknown"
220
128
  end
221
129
 
222
- def parse(x)
223
- units = @parser.parse(x)
224
- if !units || Rsec::INVALID[units]
225
- raise Rsec::SyntaxError.new "error parsing UnitsML expression", x, 1, 0
130
+ def quantityname(id)
131
+ ret = ""
132
+ @quantities[id].names.each do |q|
133
+ ret += %(<QuantityName xml:lang="en-US">#{q}</QuantityName>)
134
+ end
135
+ ret
136
+ end
137
+
138
+ def quantity(normtext, quantity)
139
+ return unless @units[normtext] && @units[normtext].quantities.size == 1 || @quantities[quantity]
140
+ id = quantity || @units[normtext].quantities.first
141
+ dim = %( dimensionURL="##{@units[normtext].dimension}") if @units[normtext]&.dimension
142
+ <<~END
143
+ <Quantity xmlns='#{UNITSML_NS}' xml:id="#{id}"#{dim} quantityType="base">
144
+ #{quantityname(id)}
145
+ </Quantity>
146
+ END
147
+ end
148
+
149
+ def dimid2dimensions(normtext)
150
+ @dimensions_id[normtext].keys.map do |k|
151
+ { dimension: k,
152
+ symbol: U2D.values.select { |v| v[:dimension] == k }.first[:symbol],
153
+ exponent: @dimensions_id[normtext].exponent(k) }
226
154
  end
227
- Rsec::Fail.reset
228
- units
229
155
  end
230
156
 
231
- def unitsml(units, text)
157
+ def dimension(normtext)
158
+ return unless @units[normtext]&.dimension
159
+ dims = dimid2dimensions(@units[normtext]&.dimension)
160
+ <<~END
161
+ <Dimension xmlns='#{UNITSML_NS}' xml:id="#{@units[normtext]&.dimension}">
162
+ #{dims.map { |u| dimension1(u) }.join("\n") }
163
+ </Dimension>
164
+ END
165
+ end
166
+
167
+ def unitsml(units, origtext, normtext, quantity, name)
232
168
  dims = units2dimensions(units)
233
169
  <<~END
234
- #{unit(units, text, dims)}
170
+ #{unit(units, origtext, normtext, dims, name)}
235
171
  #{prefix(units)}
236
- #{dimension(dims)}
172
+ #{dimension(normtext)}
173
+ #{dimension_components(dims)}
174
+ #{quantity(normtext, quantity)}
237
175
  END
238
176
  end
239
177
  end