asciimath2unitsml 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d06ecf91cb804e1a5827a46ef0e458a0fe1b444a476bc6e9d99ff2dd6c06d37
4
- data.tar.gz: 615d0c46c6beb12bea4fb427379ba49835679fb3f1efd56692909d049e9e9be7
3
+ metadata.gz: 161503157f4b3622c8bcae6ece418ed5f675a95f409d60017287017c923f7749
4
+ data.tar.gz: 48571d4a30ea4ade2f864178dcb473c9a636d71d5088b64a2356e3cbfa5f2886
5
5
  SHA512:
6
- metadata.gz: 270b64a21c8a566907c1d0a3e81e20f126bd48d277631678b65d662eb8d80da92799e127c0d75f0555226762157eefd1cf27135f3daa1b79693199c00758fa22
7
- data.tar.gz: b78b7cc95c55fde702b677e8e0a70126066341afeed726d4e3d00958b4cd4ad4d11850ef694329eca640019b5354bb34fb7efd246d023c065e7c37388c9de87e
6
+ metadata.gz: a50f75b036f2a19dcd1925965f192d1652ddabc242ce22c33efcfaf0d03c0ee5118c46021f22ee02237f93e5e32492c922466fd7f3206a20713c7a2bf749e6ef
7
+ data.tar.gz: 279f88fb13d2ed6961b42fec192312b52b82d993daf4efa2750de21b2e240b2b63702d82dac831e4dc3a0b86bd6b797cbbca057809d4c971eb27bf71548f9010
@@ -0,0 +1,53 @@
1
+ # Auto-generated by Cimas: Do not edit it manually!
2
+ # See https://github.com/metanorma/cimas
3
+ name: rake
4
+
5
+ on:
6
+ push:
7
+ branches: [ master, main ]
8
+ tags: [ v* ]
9
+ pull_request:
10
+
11
+ jobs:
12
+ rake:
13
+ name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
+ runs-on: ${{ matrix.os }}
15
+ continue-on-error: ${{ matrix.experimental }}
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ ruby: [ '2.7', '2.6', '2.5', '2.4' ]
20
+ os: [ ubuntu-latest, windows-latest, macos-latest ]
21
+ experimental: [ false ]
22
+ include:
23
+ - ruby: '3.0'
24
+ os: 'ubuntu-latest'
25
+ experimental: true
26
+ - ruby: '3.0'
27
+ os: 'windows-latest'
28
+ experimental: true
29
+ - ruby: '3.0'
30
+ os: 'macos-latest'
31
+ experimental: true
32
+ steps:
33
+ - uses: actions/checkout@master
34
+ with:
35
+ submodules: true
36
+
37
+ - uses: ruby/setup-ruby@v1
38
+ with:
39
+ ruby-version: ${{ matrix.ruby }}
40
+ bundler-cache: true
41
+
42
+ - run: bundle exec rake
43
+
44
+ tests-passed:
45
+ needs: rake
46
+ runs-on: ubuntu-latest
47
+ steps:
48
+ - uses: peter-evans/repository-dispatch@v1
49
+ with:
50
+ token: ${{ secrets.METANORMA_CI_PAT_TOKEN || secrets.GITHUB_TOKEN }}
51
+ repository: ${{ github.repository }}
52
+ event-type: notify
53
+ client-payload: '{"ref": "${{ github.ref }}", "sha": "${{ github.sha }}"}'
data/.gitmodules ADDED
@@ -0,0 +1,4 @@
1
+ [submodule "lib/unitsdb"]
2
+ branch = master
3
+ path = lib/unitsdb
4
+ url = https://github.com/unitsml/unitsdb.git
data/README.adoc CHANGED
@@ -1,27 +1,76 @@
1
+ image:https://img.shields.io/gem/v/asciimath2unitsml.svg["Gem Version", link="https://rubygems.org/gems/asciimath2unitsml"]
2
+ image:https://github.com/plurimath/asciimath2unitsml/workflows/rake/badge.svg["Build Status", link="https://github.com/plurimath/asciimath2unitsml/actions?workflow=rake"]
3
+ // image:https://codeclimate.com/github/plurimath/asciimath2unitsml/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/plurimath/asciimath2unitsml"]
4
+ image:https://img.shields.io/github/issues-pr-raw/plurimath/asciimath2unitsml.svg["Pull Requests", link="https://github.com/plurimath/asciimath2unitsml/pulls"]
5
+ image:https://img.shields.io/github/commits-since/plurimath/asciimath2unitsml/latest.svg["Commits since latest",link="https://github.com/plurimath/asciimath2unitsml/releases"]
6
+
1
7
  = asciimath2unitsml
2
8
  Convert Units expressions via MathML to UnitsML
3
9
 
4
10
  This gem converts
5
11
  MathML incorporating UnitsML expressions (based on the Ascii representation provided by NIST)
6
12
  into MathML complying with https://www.w3.org/TR/mathml-units/[], with
7
- UnitsML markup embedded in it, and with unique identifiers for each distinct unit and dimension.
13
+ UnitsML markup embedded in it, and with unique identifiers for each distinct unit, prefix, and dimension.
14
+ Dimensions are automatically inserted corresponding to each unit.
8
15
  Units expressions are identified in MathML as `<mtext>unitsml(...)</mtext>`, which in turn
9
16
  can be identified in AsciiMath as `"unitsml(...)"`.
17
+
10
18
  The consuming document is meant to deduplicate the instances of UnitsML markup
11
19
  with the same identifier, and potentially remove them to elsewhere in the document
12
20
  or another document.
13
21
 
22
+ == Notation
23
+
24
+ The `unitsml()` expression consists of a unit string.
25
+ The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
26
+ https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
27
+ multiplication or division of single units, each of which is defined as a Prefix
28
+ (taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
29
+ unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
30
+ and exponent; e.g. `mm*s^-2`.
31
+
14
32
  The conventions used for writing units are:
15
33
 
16
34
  * `^` for exponents, e.g. `m^-2`
17
- * `*` to combine two units by multiplication; e.g. `m*s^-2`. Division is not supported, use negative exponents instead
35
+ * `*` to combine two units by multiplication; e.g. `m*s^-2`.
36
+ * `/` to combine two units by division;
18
37
  * `u` for μ (micro-)
19
38
 
39
+ For more on units notation, see <<units_notation,Units Notation>>.
40
+
41
+ The `unitsml()` can take additional optional parameters, giving further information for the UnitsML
42
+ to be generated:
43
+
44
+ * `unitsml(unit-string, quantity: ID)` provides the UnitsDB identifier for the quantity being measured
45
+ (taken from https://github.com/unitsml/unitsdb/blob/master/quantities.yaml[]). For example,
46
+ `unitsml(s, quantity: NISTq109)` indicates that the second is used to measure period duration.
47
+ If a single quantity is associated with the unit in UnitsDB (as given in
48
+ https://github.com/unitsml/unitsdb/blob/master/units.yaml[]), that quantity is added automatically;
49
+ otherwise, no quantity is added unless explicitly nominated in this way.
50
+ * `unitsml(unit-string, name: NAME)` provides a name for the unit, if one is not already available
51
+ from UnitsDB. For example, `unitsml(cal_th/cm^2, name: langley)`.
52
+ * `unitsml(unit-string, symbol: SYMBOL)` provides an alternate symbol for the unit, in AsciiMath.
53
+ The unit-string gives the canonical representation of the unit, but SYMBOL is what will be rendered.
54
+ For example, `unitsml(cal_th/cm^2, name: langley, symbol: La)`, or `unitsml(mm*s^-2, symbol: mm cdot s^-2)`.
55
+ (All variables in SYMBOL are rendered upright, as is the default for units.)
56
+
57
+ Standalone prefixes can be recognised by replacing the unit with hyphen; so `unitsml(p-)` corresponds
58
+ to the standalone prefix "pico" (and is rendered as "p").
59
+
60
+ == Rendering
61
+
62
+ The output of the gem is MathML, with MathML unit expressions (expressed as `<mi>`,
63
+ complying with https://www.w3.org/TR/mathml-units/[MathML Units]) cross-referenced to UnitsML
64
+ definitions embedded in the MathML.
65
+
20
66
  The gem follows the MathML Units convention of inserting a spacing invisible times operator
21
67
  (`<mo rspace='thickmathspace'>&#x2062;</mo>`) between any numbers (`<mn>`) and unit expressions
22
68
  in MathML, and representing units in MathML as non-italic variables (`<mi mathvariant='normal'>`).
23
69
 
24
- So:
70
+ Space is not inserted between a number and a unit expression, when that unit expression wholly consists
71
+ of punctuation: _1 m_, _1 °C_, but _9° 7′ 22″_.
72
+
73
+ == Example
25
74
 
26
75
  [source]
27
76
  ----
@@ -83,14 +132,18 @@ is converted into:
83
132
  </math>
84
133
  ----
85
134
 
135
+ == Usage
136
+
86
137
  The converter is run as:
87
138
 
88
139
  [source,ruby]
89
140
  ----
90
141
  c = Asciimath2UnitsML::Conv.new()
91
- c.Asciimath2UnitsML({Asciimath string containing "unitsml()"})
92
- c.MathML2UnitsML({MathML document containing <mtext>unitsml()</mtext>})
93
- c.MathML2UnitsML({Nokogiri parse of MathML document containing <mtext>unitsml()</mtext>})
142
+ c.Asciimath2UnitsML(1 "unitsml(mm*s^-2)") # AsciiMath string containing UnitsML
143
+ c.MathML2UnitsML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
144
+ "<mtext>unitsml(kg^-2)</mtext></math>") # AsciiMath string containing <mtext>unitsml()</mtext>
145
+ c.MathML2UnitsML(Nokogiri::XML("<math xmlns='http://www.w3.org/1998/Math/MathML'><mn>7</mn>"\
146
+ "<mtext>unitsml(kg^-2)</mtext></math>")) # Nokogiri parse of MathML document containing <mtext>unitsml()</mtext>
94
147
  ----
95
148
 
96
149
  The converter class may be initialised with options:
@@ -101,3 +154,107 @@ supplied instead; it will be encoded as XML entities. The value `:space` is rend
101
154
  as a spacing invisible times in MathML (`<mo rspace='thickmathspace'>&#x2062;</mo>`),
102
155
  and as a non-breaking space in HTML. The value `:nospace` is rendered as a non-spacing
103
156
  invisible times in MathML (`<mo>&#x2062;</mo>`), and is not rendered in HTML.
157
+
158
+ [[units_notation]]
159
+ == Units Notation
160
+
161
+ The units used in `unitsml()` are taken from the UnitsDB database as updated by Ribose:
162
+ https://github.com/unitsml/unitsdb[]. Units are given as an ASCII based code, consisting of
163
+ multiplication or division of single units, each of which is defined as a Prefix
164
+ (taken from https://github.com/unitsml/unitsdb/blob/master/prefixes.yaml[]),
165
+ unit (taken from https://github.com/unitsml/unitsdb/blob/master/units.yaml[]),
166
+ and exponent; e.g. `mm*s^-2`.
167
+
168
+ In case of ambiguity, the interpretation with no prefix is prioritised over the interpretation
169
+ as a unit; so `ct` is interpreted as _hundredweight_, rather than _centi-ton_. Exceptionally,
170
+ `kg` is decomposed into kilo-gram rather than treated as a basic unit, for consistency with
171
+ other prefixes of grams. (Prefixed units appear in UnitsDB, and are indicated as `prefixed: true`.)
172
+
173
+ A unit may have multiple symbols; these are registered separately in
174
+ https://github.com/unitsml/unitsdb/units.yaml[units.yaml], as entries under `unit_symbols`.
175
+ These different symbols will be recognised as the same Unit in the UnitsML markup, but
176
+ the original symbol will be retained in the MathML expression. So an expression like `1 unitsml(mL)`
177
+ will be recognised as referring to microlitres; the expression will be given under its canonical
178
+ rendering `ml` in UnitsML markup, but the MathML rendering referencing that UnitsML expression
179
+ will keep the notation `mL`.
180
+
181
+ The symbols used for units can be highly ambiguous; in order to guarantee accurate parsing,
182
+ the symbols used to data enter units are unambiguous in https://github.com/unitsml/unitsdb/units.yaml[units.yaml].
183
+ They may be found as the entries for `unit_symbols/id` under each unit. For example, `B` is ambiguous between
184
+ _bel_ (as in decibel) and _byte_; they are kept unambiguous by using `bel_B` and `byte_B` to refer to them,
185
+ although they will still both be rendered as `B`.
186
+
187
+ The following table is the current list of ambiguous symbols, which are disambiguated in the symbol ids used.
188
+ This table can be generated (in Asciidoc format) through `Asciimath2UnitsML::Conv.new().ambig_units`:
189
+
190
+ [cols="7*"]
191
+ |===
192
+ |Symbol | Unit + ID | | | | |
193
+
194
+
195
+ | &#8242; | minute (minute of arc): `'` | foot: `'_ft` | minute: `'_min` | minute (minute of arc): `prime` | foot: `prime_ft` | minute: `prime_min`
196
+ | &#8243; | second (second of arc): `"` | second: `"_s` | inch: `"_in` | second (second of arc): `dprime` | second: `dprime_s` | inch: `dprime_in`
197
+ | &#8243;Hg | conventional inch of mercury: `"Hg` | conventional inch of mercury: `dprime_Hg` | inch of mercury (32 degF): `"Hg_32degF` | inch of mercury (60 degF): `"Hg_60degF` | inch of mercury (32 degF): `dprime_Hg_32degF` | inch of mercury (60 degF): `dprime_Hg_60degF`
198
+ | hp | horsepower: `hp` | horsepower (UK): `hp_UK` | horsepower, water: `hp_water` | horsepower, metric: `hp_metric` | horsepower, boiler: `hp_boiler` | horsepower, electric: `hp_electric`
199
+ | Btu | British thermal unit_IT: `Btu` | British thermal unit (mean): `Btu_mean` | British thermal unit (39 degF): `Btu_39degF` | British thermal unit (59 degF): `Btu_59degF` | British thermal unit (60 degF): `Btu_60degF` |
200
+ | a | are: `a` | year (365 days): `a_year` | year, tropical: `a_tropical_year` | year, sidereal: `a_sidereal_year` | |
201
+ | d | day: `d` | darcy: `darcy` | day, sidereal: `d_sidereal` | | |
202
+ | inHg | conventional inch of mercury: `inHg` | inch of mercury (32 degF): `inHg_32degF` | inch of mercury (60 degF): `inHg_60degF` | | |
203
+ | inH~2~O | conventional inch of water: `inH_2O` | inch of water (39.2 degF): `inH_2O_39degF` | inch of water (60 degF): `inH_2O_60degF` | | |
204
+ | min | minute: `min` | minim: `minim` | minute, sidereal: `min_sidereal` | | |
205
+ | pc | parsec: `pc` | pica (printer's): `pica_printer` | pica (computer): `pica_computer` | | |
206
+ | t | metric ton: `t` | long ton: `ton_long` | short ton: `ton_short` | | |
207
+ | B | bel: `bel_B` | byte: `byte_B` | | | |
208
+ | cmHg | conventional centimeter of mercury: `cmHg` | centimeter of mercury (0 degC): `cmHg_0degC` | | | |
209
+ | cmH~2~O | conventional centimeter of water: `cmH_2O` | centimeter of water (4 degC): `cmH_2O_4degC` | | | |
210
+ | cup | cup (US): `cup` | cup (FDA): `cup_label` | | | |
211
+ | D | debye: `D` | darcy: `Darcy` | | | |
212
+ | ft | foot: `ft` | foot (based on US survey foot): `ft_US_survey` | | | |
213
+ | ftH~2~O | conventional foot of water: `ftH_2O` | foot of water (39.2 degF): `ftH_2O_39degF` | | | |
214
+ | gi | gill (US): `gi` | gill [Canadian and UK (Imperial)]: `gi_imperial` | | | |
215
+ | h | hour: `h` | hour, sidereal: `h_sidereal` | | | |
216
+ | &#8242;Hg | conventional foot of mercury: `'Hg` | conventional foot of mercury: `prime_Hg` | | | |
217
+ | __&#295;__ | natural unit of action: `h-bar` | atomic unit of action: `h-bar_atomic` | | | |
218
+ | __m__~e~ | natural unit of mass: `m_e` | atomic unit of mass: `m_e_atomic` | | | |
219
+ | in | inch: `in` | inch (based on US survey foot): `in_US_survey` | | | |
220
+ | K | kelvin: `K` | kayser: `kayser` | | | |
221
+ | L | liter: `L` | lambert: `Lambert` | | | |
222
+ | lb | pound (avoirdupois): `lb` | pound (troy or apothecary): `lb_troy` | | | |
223
+ | mi | mile: `mi` | mile (based on US survey foot): `mi_US_survey` | | | |
224
+ | mil | mil (length): `mil` | angular mil (NATO): `mil_nato` | | | |
225
+ | oz | ounce (avoirdupois): `oz` | ounce (troy or apothecary): `oz_troy` | | | |
226
+ | pt | point (printer's): `pt_printer` | point (computer): `pt_computer` | | | |
227
+ | rad | radian: `rad` | rad (absorbed dose): `rad_radiation` | | | |
228
+ | s | second: `s` | second, sidereal: `s_sidereal` | | | |
229
+ | tbsp | tablespoon: `tbsp` | tablespoon (FDA): `tbsp_label` | | | |
230
+ | ton | ton of TNT (energy equivalent): `ton_TNT` | ton of refrigeration (12 000 Btu_IT/h): `ton_refrigeration` | | | |
231
+ | tsp | teaspoon: `tsp` | teaspoon (FDA): `tsp_label` | | | |
232
+ | yd | yard: `yd` | yard (based on US survey foot): `yd_US_survey` | | | |
233
+ | &#186; | degree (degree of arc): `deg` | | | | |
234
+ | &#947; | gamma: `gamma` | | | | |
235
+ | &#956; | micron: `micron` | | | | |
236
+ | &#8486; | ohm: `Ohm` | | | | |
237
+ | &#197; | angstrom: `Aring` | | | | |
238
+ | &#295; | natural unit of action in eV s: `h-bar_eV_s` | | | | |
239
+ | ab&#937; | abohm: `abohm` | | | | |
240
+ | (ab&#937;)^-1^ | abmho: `abS` | | | | |
241
+ | aW | abwatt: `aW (Cardelli)` | | | | |
242
+ | b | barn: `barn` | | | | |
243
+ | Btu~th~ | British thermal unit_th: `Btu_th` | | | | |
244
+ | &#176;C | degree Celsius: `degC` | | | | |
245
+ | cal~IT~ | I.T. calorie: `cal_IT` | | | | |
246
+ | cal~th~ | thermochemical calorie: `cal_th` | | | | |
247
+ | &#176;F | degree Fahrenheit: `degF` | | | | |
248
+ | __a__~0~ | atomic unit of length: `a_0` | | | | |
249
+ | __c__ | natural unit of velocity: `c` | | | | |
250
+ | __c__~0~ | natural unit of velocity: `c_0` | | | | |
251
+ | __e__ | atomic unit of charge: `e` | | | | |
252
+ | __E__~h~ | atomic unit of energy: `e_h` | | | | |
253
+ | &#956;in | microinch: `uin` | | | | |
254
+ | &#176;K | kelvin: `degK` | | | | |
255
+ | kcal~IT~ | kilocalorie_IT: `kcal_IT` | | | | |
256
+ | kcal~th~ | kilocalorie_th: `kcal_th` | | | | |
257
+ | mmH~2~O | conventional millimeter of water: `mmH_2O` | | | | |
258
+ | &#176;R | degree Rankine: `degR` | | | | |
259
+ | &#x19b;~C~ | natural unit of length: `lambda-bar_C` | | | | |
260
+ |===
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
-
2
+
3
3
  lib = File.expand_path("../lib", __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "asciimath2unitsml/version"
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
12
12
 
13
13
  spec.summary = "Convert Asciimath via MathML to UnitsML"
14
14
  spec.description = <<~DESCRIPTION
15
- Convert Asciimath via MathML to UnitsML
15
+ Convert Asciimath via MathML to UnitsML
16
16
  DESCRIPTION
17
17
 
18
18
  spec.homepage = "https://github.com/plurimath/asciimath2unitsml"
@@ -24,6 +24,27 @@ Gem::Specification.new do |spec|
24
24
  spec.test_files = `git ls-files -- {spec}/*`.split("\n")
25
25
  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
26
26
 
27
+ # get an array of submodule dirs relatively to root repo
28
+ `git config --file .gitmodules --get-regexp '\\.path$'`
29
+ .split("\n")
30
+ .map { |kv_str| kv_str.split(" ") }
31
+ .each do |(_, submodule_path)|
32
+
33
+ # for each submodule, change working directory to that submodule
34
+ Dir.chdir(submodule_path) do
35
+
36
+ # issue git ls-files in submodule's directory
37
+ submodule_files = `git ls-files | grep -i '.yaml$'`.split($\)
38
+
39
+ submodule_files_paths = submodule_files.map do |filename|
40
+ File.join submodule_path, filename
41
+ end
42
+
43
+ # add relative paths to gem.files
44
+ spec.files += submodule_files_paths
45
+ end
46
+ end
47
+
27
48
  spec.add_dependency "asciimath"
28
49
  spec.add_dependency "htmlentities"
29
50
  spec.add_dependency "nokogiri", "~> 1.10.4"
@@ -39,5 +60,6 @@ Gem::Specification.new do |spec|
39
60
  spec.add_development_dependency "rubocop", "= 0.54.0"
40
61
  spec.add_development_dependency "simplecov", "~> 0.15"
41
62
  spec.add_development_dependency "timecop", "~> 0.9"
63
+ spec.add_development_dependency "rexml"
42
64
  end
43
65
 
@@ -1,3 +1,4 @@
1
1
  require_relative "asciimath2unitsml/version"
2
2
  require_relative "asciimath2unitsml/conv"
3
+ require_relative "unitsdb_ruby/unitsdb"
3
4
 
@@ -5,6 +5,8 @@ require "yaml"
5
5
  require "rsec"
6
6
  require_relative "string"
7
7
  require_relative "parse"
8
+ require_relative "render"
9
+ require_relative "unit"
8
10
 
9
11
  module Asciimath2UnitsML
10
12
  MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
@@ -12,135 +14,47 @@ module Asciimath2UnitsML
12
14
 
13
15
  class Conv
14
16
  def initialize(options = {})
15
- @prefixes_id = read_yaml("../unitsdb/prefixes.yaml")
16
- @prefixes = flip_name_and_id(@prefixes_id)
17
- @quantities = read_yaml("../unitsdb/quantities.yaml")
18
- @units_id = read_yaml("../unitsdb/units.yaml")
19
- @units = flip_name_and_id(@units_id)
20
- @parser = parser
21
- @multiplier = multiplier(options[:multiplier] || "\u00b7")
22
- end
23
-
24
- def multiplier(x)
25
- case x
26
- when :space
27
- { html: "&#xA0;", mathml: "<mo rspace='thickmathspace'>&#x2062;</mo>" }
28
- when :nospace
29
- { html: "", mathml: "<mo>&#x2062;</mo>" }
30
- else
31
- { html: HTMLEntities.new.encode(x), mathml: "<mo>#{HTMLEntities.new.encode(x)}</mo>" }
17
+ @dimensions_id = read_yaml("../unitsdb/dimensions.yaml").each_with_object({}) do |(k, v), m|
18
+ m[k.to_s] = UnitsDB::Dimension.new(k, v)
32
19
  end
33
- end
34
-
35
- def units_only(units)
36
- units.reject { |u| u[:multiplier] }
37
- end
38
-
39
- def unit_id(text)
40
- "U_" +
41
- (@units[text.to_sym] ? @units[text.to_sym][:id] : text.gsub(/\*/, ".").gsub(/\^/, ""))
42
- end
43
-
44
- def unit(units, origtext, normtext, dims)
45
- dimid = dim_id(dims)
46
- <<~END
47
- <Unit xmlns='#{UNITSML_NS}' xml:id='#{unit_id(origtext)}'#{dimid ? " dimensionURL='##{dimid}'" : ""}>
48
- #{unitsystem(units)}
49
- #{unitname(units, normtext)}
50
- #{unitsymbol(units)}
51
- #{rootunits(units)}
52
- </Unit>
53
- END
54
- end
55
-
56
- def unitsystem(units)
57
- ret = []
58
- units = units_only(units)
59
- units.any? { |x| @units[x[:unit].to_sym][:si] != true } and
60
- ret << "<UnitSystem name='not_SI' type='not_SI' xml:lang='en-US'/>"
61
- if units.any? { |x| @units[x[:unit].to_sym][:si] == true }
62
- base = units.size == 1 && @units[units[0][:unit].to_sym][:type].include?("si-base")
63
- ret << "<UnitSystem name='SI' type='#{base ? "SI_base" : "SI_derived"}' xml:lang='en-US'/>"
20
+ @prefixes_id = read_yaml("../unitsdb/prefixes.yaml").each_with_object({}) do |(k, v), m|
21
+ m[k] = UnitsDB::Prefix.new(k, v)
64
22
  end
65
- ret.join("\n")
66
- end
67
-
68
- def unitname(units, text)
69
- name = @units[text.to_sym] ? @units[text.to_sym][:name] : compose_name(units, text)
70
- "<UnitName xml:lang='en'>#{name}</UnitName>"
71
- end
72
-
73
- # TODO: compose name from the component units
74
- def compose_name(units, text)
75
- text
76
- end
77
-
78
- def unitsymbol(units)
79
- <<~END
80
- <UnitSymbol type="HTML">#{htmlsymbol(units)}</UnitSymbol>
81
- <UnitSymbol type="MathML">#{mathmlsymbolwrap(units)}</UnitSymbol>
82
- END
83
- end
84
-
85
- def htmlsymbol(units)
86
- units.map do |u|
87
- if u[:multiplier] then u[:multiplier] == "*" ? @multiplier[:html] : u[:multiplier]
88
- else
89
- u[:display_exponent] and exp = "<sup>#{u[:display_exponent].sub(/-/, "&#x2212;")}</sup>"
90
- "#{u[:prefix]}#{u[:unit]}#{exp}"
91
- end
92
- end.join("")
93
- end
94
-
95
- def mathmlsymbol(units)
96
- exp = units.map do |u|
97
- if u[:multiplier] then u[:multiplier] == "*" ? @multiplier[:mathml] : "<mo>#{u[:multiplier]}</mo>"
98
- else
99
- base = "<mi mathvariant='normal'>#{u[:prefix]}#{u[:unit]}</mi>"
100
- if u[:display_exponent]
101
- exp = "<mn>#{u[:display_exponent]}</mn>".sub(/<mn>-/, "<mo>&#x2212;</mo><mn>")
102
- "<msup><mrow>#{base}</mrow><mrow>#{exp}</mrow></msup>"
103
- else
104
- base
105
- end
106
- end
107
- end.join("")
108
- end
109
-
110
- def mathmlsymbolwrap(units)
111
- <<~END
112
- <math xmlns='#{MATHML_NS}'>
113
- <mrow>#{mathmlsymbol(units)}</mrow>
114
- </math>
115
- END
23
+ @prefixes = flip_name_and_symbol(@prefixes_id)
24
+ @quantities = read_yaml("../unitsdb/quantities.yaml").each_with_object({}) do |(k, v), m|
25
+ m[k.to_s] = UnitsDB::Quantity.new(k, v)
26
+ end
27
+ @units_id = read_yaml("../unitsdb/units.yaml").each_with_object({}) do |(k, v), m|
28
+ m[k.to_s] = UnitsDB::Unit.new(k.to_s, v)
29
+ end
30
+ @units = flip_name_and_symbols(@units_id)
31
+ @symbols = @units.each_with_object({}) do |(k, v), m|
32
+ v.symbolids.each { |x| m[x] = v.symbols_hash[x] }
33
+ end
34
+ @parser = parser
35
+ @multiplier = multiplier(options[:multiplier] || "\u00b7")
116
36
  end
117
37
 
118
- def rootunits(units)
119
- return if units.size == 1
120
- exp = units_only(units).map do |u|
121
- prefix = " prefix='#{u[:prefix]}'" if u[:prefix]
122
- exponent = " powerNumerator='#{u[:exponent]}'" if u[:exponent] && u[:exponent] != "1"
123
- "<EnumeratedRootUnit unit='#{@units[u[:unit].to_sym][:name]}'#{prefix}#{exponent}/>"
124
- end.join("\n")
125
- <<~END
126
- <RootUnits>#{exp}</RootUnits>
127
- END
38
+ def float_to_display(f)
39
+ ret = f.to_f.round(1).to_s.sub(/\.0$/, "")
128
40
  end
129
41
 
130
42
  def prefix(units)
131
- units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p1|
132
- p = p1.to_sym
43
+ units.map { |u| u[:prefix] }.reject { |u| u.nil? }.uniq.map do |p|
133
44
  <<~END
134
- <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p][:base]}'
135
- prefixPower='#{@prefixes[p][:power]}' xml:id='#{@prefixes[p][:id]}'>
136
- <PrefixName xml:lang="en">#{@prefixes[p][:name]}</PrefixName>
137
- <PrefixSymbol type="ASCII">#{@prefixes[p][:symbol]}</PrefixSymbol>
45
+ <Prefix xmlns='#{UNITSML_NS}' prefixBase='#{@prefixes[p].base}'
46
+ prefixPower='#{@prefixes[p].power}' xml:id='#{@prefixes[p].id}'>
47
+ <PrefixName xml:lang="en">#{@prefixes[p].name}</PrefixName>
48
+ <PrefixSymbol type="ASCII">#{@prefixes[p].ascii}</PrefixSymbol>
49
+ <PrefixSymbol type="unicode">#{@prefixes[p].unicode}</PrefixSymbol>
50
+ <PrefixSymbol type="LaTeX">#{@prefixes[p].latex}</PrefixSymbol>
51
+ <PrefixSymbol type="HTML">#{htmlent @prefixes[p].html}</PrefixSymbol>
138
52
  </Prefix>
139
53
  END
140
54
  end.join("\n")
141
55
  end
142
56
 
143
- def dimension(dims)
57
+ def dimension_components(dims)
144
58
  return if dims.nil? || dims.empty?
145
59
  <<~END
146
60
  <Dimension xmlns='#{UNITSML_NS}' xml:id="#{dim_id(dims)}">
@@ -149,9 +63,22 @@ module Asciimath2UnitsML
149
63
  END
150
64
  end
151
65
 
66
+ U2D = {
67
+ "m" => { dimension: "Length", order: 1, symbol: "L" },
68
+ "g" => { dimension: "Mass", order: 2, symbol: "M" },
69
+ "kg" => { dimension: "Mass", order: 2, symbol: "M" },
70
+ "s" => { dimension: "Time", order: 3, symbol: "T" },
71
+ "A" => { dimension: "ElectricCurrent", order: 4, symbol: "I" },
72
+ "K" => { dimension: "ThermodynamicTemperature", order: 5, symbol: "Theta" },
73
+ "degK" => { dimension: "ThermodynamicTemperature", order: 5, symbol: "Theta" },
74
+ "mol" => { dimension: "AmountOfSubstance", order: 6, symbol: "N" },
75
+ "cd" => { dimension: "LuminousIntensity", order: 7, symbol: "J" },
76
+ "deg" => { dimension: "PlaneAngle", order: 8, symbol: "Phi" },
77
+ }.freeze
78
+
152
79
  def units2dimensions(units)
153
- norm = normalise_units(units)
154
- return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" }
80
+ norm = decompose_units(units)
81
+ return if norm.any? { |u| u[:unit] == "unknown" || u[:prefix] == "unknown" || u[:unit].nil? }
155
82
  norm.map do |u|
156
83
  { dimension: U2D[u[:unit]][:dimension],
157
84
  unit: u[:unit],
@@ -161,16 +88,23 @@ module Asciimath2UnitsML
161
88
  end
162
89
 
163
90
  def dimension1(u)
164
- %(<#{u[:dimension]} symbol="#{u[:symbol]}" powerNumerator="#{u[:exponent]}"/>)
91
+ %(<#{u[:dimension]} symbol="#{u[:symbol]}" powerNumerator="#{float_to_display(u[:exponent])}"/>)
165
92
  end
166
93
 
167
94
  def dim_id(dims)
168
95
  return nil if dims.nil? || dims.empty?
169
- "D_" + dims.map { |d| U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : d[:exponent].to_s) }.join("")
96
+ dimhash = dims.each_with_object({}) { |h, m| m[h[:dimension]] = h }
97
+ dimsvector = %w(Length Mass Time ElectricCurrent ThermodynamicTemperature
98
+ AmountOfSubstance LuminousIntensity PlaneAngle)
99
+ .map { |h| dimhash.dig(h, :exponent) }.join(":")
100
+ id = @dimensions_id&.values&.select { |d| d.vector == dimsvector }&.first&.id and return id.to_s
101
+ "D_" + dims.map do |d|
102
+ U2D[d[:unit]][:symbol] + (d[:exponent] == 1 ? "" : float_to_display(d[:exponent]))
103
+ end.join("")
170
104
  end
171
105
 
172
- def normalise_units(units)
173
- gather_units(units_only(units).map { |u| normalise_unit(u) }.flatten)
106
+ def decompose_units(units)
107
+ gather_units(units_only(units).map { |u| decompose_unit(u) }.flatten)
174
108
  end
175
109
 
176
110
  def gather_units(units)
@@ -179,41 +113,85 @@ module Asciimath2UnitsML
179
113
  else
180
114
  m[-1] = { prefix: combine_prefixes(@prefixes[m[-1][:prefix]], @prefixes[k[:prefix]]),
181
115
  unit: m[-1][:unit],
182
- exponent: (k[:exponent]&.to_i || 1) + (m[-1][:exponent]&.to_i || 1) }
116
+ exponent: (k[:exponent]&.to_f || 1) + (m[-1][:exponent]&.to_f || 1) }
183
117
  end
184
118
  end
185
119
  end
186
120
 
187
- def normalise_unit(u)
188
- if @units[u[:unit].to_sym][:type]&.include?("si-base") then u
189
- elsif !@units[u[:unit].to_sym][:bases] then { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
121
+ # treat g not kg as base unit: we have stripped the prefix k in parsing
122
+ # reduce units down to basic units
123
+ def decompose_unit(u)
124
+ if u[:unit].nil? then u
125
+ elsif u[:unit] == "g" then u
126
+ elsif @units[u[:unit]].system_type == "SI_base" then u
127
+ elsif !@units[u[:unit]].si_derived_bases
128
+ { prefix: u[:prefix], unit: "unknown", exponent: u[:exponent] }
190
129
  else
191
- @units[u[:unit].to_sym][:bases].each_with_object([]) do |k, m|
192
- m << { prefix: k["prefix"] ?
193
- combine_prefixes(@prefixes_id[k["prefix"]], @prefixes[u[:prefix]]) : u[:prefix],
194
- unit: @units_id[k["id"].to_sym][:symbol],
195
- exponent: (k["power"]&.to_i || 1) * (u[:exponent]&.to_i || 1) }
130
+ @units[u[:unit]].si_derived_bases.each_with_object([]) do |k, m|
131
+ m << { prefix: !k[:prefix].nil? && !k[:prefix].empty? ?
132
+ combine_prefixes(@prefixes_id[k[:prefix]], @prefixes[u[:prefix]]) : u[:prefix],
133
+ unit: @units_id[k[:id]].symbolid,
134
+ exponent: (k[:power]&.to_i || 1) * (u[:exponent]&.to_f || 1) }
196
135
  end
197
136
  end
198
137
  end
199
138
 
200
139
  def combine_prefixes(p1, p2)
201
140
  return nil if p1.nil? && p2.nil?
202
- return p1[:symbol] if p2.nil?
203
- return p2[:symbol] if p1.nil?
204
- return "unknown" if p1[:base] != p2[:base]
205
- @prefixes.each do |p|
206
- return p[:symbol] if p[:base] == p1[:base] && p[:power] == p1[:power] + p2[:power]
141
+ return p1.symbolid if p2.nil?
142
+ return p2.symbolid if p1.nil?
143
+ return "unknown" if p1.base != p2.base
144
+ @prefixes.each do |_, p|
145
+ return p.symbolid if p.base == p1.base && p.power == p1.power + p2.power
207
146
  end
208
147
  "unknown"
209
148
  end
210
149
 
211
- def unitsml(units, origtext, normtext)
150
+ def quantityname(id)
151
+ ret = ""
152
+ @quantities[id].names.each do |q|
153
+ ret += %(<QuantityName xml:lang="en-US">#{q}</QuantityName>)
154
+ end
155
+ ret
156
+ end
157
+
158
+ def quantity(normtext, quantity)
159
+ return unless @units[normtext] && @units[normtext].quantities.size == 1 || @quantities[quantity]
160
+ id = quantity || @units[normtext].quantities.first
161
+ dim = %( dimensionURL="##{@units[normtext].dimension}") if @units[normtext]&.dimension
162
+ <<~END
163
+ <Quantity xmlns='#{UNITSML_NS}' xml:id="#{id}"#{dim} quantityType="base">
164
+ #{quantityname(id)}
165
+ </Quantity>
166
+ END
167
+ end
168
+
169
+ def dimid2dimensions(normtext)
170
+ @dimensions_id[normtext].keys.map do |k|
171
+ { dimension: k,
172
+ symbol: U2D.values.select { |v| v[:dimension] == k }.first[:symbol],
173
+ exponent: @dimensions_id[normtext].exponent(k) }
174
+ end
175
+ end
176
+
177
+ def dimension(normtext)
178
+ return unless @units[normtext]&.dimension
179
+ dims = dimid2dimensions(@units[normtext]&.dimension)
180
+ <<~END
181
+ <Dimension xmlns='#{UNITSML_NS}' xml:id="#{@units[normtext]&.dimension}">
182
+ #{dims.map { |u| dimension1(u) }.join("\n") }
183
+ </Dimension>
184
+ END
185
+ end
186
+
187
+ def unitsml(units, origtext, normtext, quantity, name)
212
188
  dims = units2dimensions(units)
213
189
  <<~END
214
- #{unit(units, origtext, normtext, dims)}
190
+ #{unit(units, origtext, normtext, dims, name)}
215
191
  #{prefix(units)}
216
- #{dimension(dims)}
192
+ #{dimension(normtext)}
193
+ #{dimension_components(dims)}
194
+ #{quantity(normtext, quantity)}
217
195
  END
218
196
  end
219
197
  end