mathtype_to_mathml_plus 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +45 -0
- data/Rakefile +7 -0
- data/lib/mathtype_to_mathml/char_replacer.rb +366 -0
- data/lib/mathtype_to_mathml/mover.rb +151 -0
- data/lib/mathtype_to_mathml/version.rb +3 -0
- data/lib/mathtype_to_mathml.rb +45 -0
- data/lib/transform.xsl +53 -0
- data/lib/xsl/arrow.xsl +319 -0
- data/lib/xsl/box.xsl +42 -0
- data/lib/xsl/char.xsl +1854 -0
- data/lib/xsl/embellishment.xsl +389 -0
- data/lib/xsl/fence.xsl +228 -0
- data/lib/xsl/frac.xsl +46 -0
- data/lib/xsl/int.xsl +622 -0
- data/lib/xsl/lim.xsl +32 -0
- data/lib/xsl/long_division.xsl +32 -0
- data/lib/xsl/long_embellishment.xsl +150 -0
- data/lib/xsl/matrix.xsl +115 -0
- data/lib/xsl/pile.xsl +54 -0
- data/lib/xsl/product_coproduct.xsl +107 -0
- data/lib/xsl/subsup.xsl +55 -0
- data/lib/xsl/sum.xsl +81 -0
- data/lib/xsl/union_intersection.xsl +104 -0
- data/mathtype_to_mathml.gemspec +29 -0
- data/spec/fixtures/expected/280.xml +122 -0
- data/spec/fixtures/expected/281.xml +60 -0
- data/spec/fixtures/expected/299.xml +70 -0
- data/spec/fixtures/expected/326.xml +173 -0
- data/spec/fixtures/expected/424.xml +425 -0
- data/spec/fixtures/expected/450.xml +174 -0
- data/spec/fixtures/expected/452.xml +166 -0
- data/spec/fixtures/expected/478.xml +303 -0
- data/spec/fixtures/expected/629.xml +98 -0
- data/spec/fixtures/expected/arrows.xml +389 -0
- data/spec/fixtures/expected/boxes.xml +22 -0
- data/spec/fixtures/expected/embellishments.xml +178 -0
- data/spec/fixtures/expected/embellishments_roots_long_divisions.xml +162 -0
- data/spec/fixtures/expected/equation1.xml +52 -0
- data/spec/fixtures/expected/equation10.xml +19 -0
- data/spec/fixtures/expected/equation11.xml +17 -0
- data/spec/fixtures/expected/equation12.xml +34 -0
- data/spec/fixtures/expected/equation13.xml +113 -0
- data/spec/fixtures/expected/equation14.xml +54 -0
- data/spec/fixtures/expected/equation2.xml +33 -0
- data/spec/fixtures/expected/equation3.xml +324 -0
- data/spec/fixtures/expected/equation4.xml +14 -0
- data/spec/fixtures/expected/equation5.xml +23 -0
- data/spec/fixtures/expected/equation6.xml +13 -0
- data/spec/fixtures/expected/equation7.xml +19 -0
- data/spec/fixtures/expected/equation8.xml +17 -0
- data/spec/fixtures/expected/equation9.xml +15 -0
- data/spec/fixtures/expected/fences.xml +64 -0
- data/spec/fixtures/expected/integrals.xml +264 -0
- data/spec/fixtures/expected/matrices.xml +253 -0
- data/spec/fixtures/expected/sums.xml +36 -0
- data/spec/fixtures/expected/unions_and_intersections.xml +140 -0
- data/spec/fixtures/input/280.bin +0 -0
- data/spec/fixtures/input/281.bin +0 -0
- data/spec/fixtures/input/299.bin +0 -0
- data/spec/fixtures/input/326.bin +0 -0
- data/spec/fixtures/input/424.bin +0 -0
- data/spec/fixtures/input/450.bin +0 -0
- data/spec/fixtures/input/452.bin +0 -0
- data/spec/fixtures/input/478.bin +0 -0
- data/spec/fixtures/input/629.bin +0 -0
- data/spec/fixtures/input/arrows.bin +0 -0
- data/spec/fixtures/input/boxes.bin +0 -0
- data/spec/fixtures/input/embellishments.bin +0 -0
- data/spec/fixtures/input/embellishments_roots_long_divisions.bin +0 -0
- data/spec/fixtures/input/equation1.bin +0 -0
- data/spec/fixtures/input/equation10.bin +0 -0
- data/spec/fixtures/input/equation11.bin +0 -0
- data/spec/fixtures/input/equation12.bin +0 -0
- data/spec/fixtures/input/equation13.bin +0 -0
- data/spec/fixtures/input/equation14.bin +0 -0
- data/spec/fixtures/input/equation2.bin +0 -0
- data/spec/fixtures/input/equation3.bin +0 -0
- data/spec/fixtures/input/equation4.bin +0 -0
- data/spec/fixtures/input/equation5.bin +0 -0
- data/spec/fixtures/input/equation6.bin +0 -0
- data/spec/fixtures/input/equation7.bin +0 -0
- data/spec/fixtures/input/equation8.bin +0 -0
- data/spec/fixtures/input/equation9.bin +0 -0
- data/spec/fixtures/input/fences.bin +0 -0
- data/spec/fixtures/input/integrals.bin +0 -0
- data/spec/fixtures/input/matrices.bin +0 -0
- data/spec/fixtures/input/sums.bin +0 -0
- data/spec/fixtures/input/unions_and_intersections.bin +0 -0
- data/spec/html_output.rb +28 -0
- data/spec/mathtype_to_mathml_spec.rb +16 -0
- data/spec/spec_helper.rb +4 -0
- metadata +305 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b0d7a2cafb7ebf7215d919139933493434e2adbb28eef0e1b5454147246217fe
|
|
4
|
+
data.tar.gz: 7171abda1cc4e10bd2d3183e00885048eaee2e39d0a3640b29e74b01e0e50608
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: ddc97e1f19c6db5e1112d875270a40c2d26954b7d393c7f65bfbbc860fe2863c0dcb7e151135878c25a59aaded99ae437b364403b257baad18cf5e0b5c85da2e
|
|
7
|
+
data.tar.gz: 3e44ee439201b4da295c4bf106a86b8de494b0dae91723fa9f62c78c5450f2ec2368ece0551fd0cd879d6c87becc64349fb029ac04737b8e42f4a6fe09cef8dc
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.2.2
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
Copyright (c) 2015 Jure Triglav
|
|
2
|
+
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# MathTypeToMathML
|
|
2
|
+
|
|
3
|
+
This gem can be used to convert MathType equations from a binary format (e.g. embedded in Word documents) to an open MathML representation. It achieves that in several stages, first using the [`mathtype`](https://github.com/jure/mathtype) gem to convert from a binary to an XML form of MTEF, and second, using XSLTs to convert XML to MathML.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add this line to your application's Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'mathtype_to_mathml'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
$ bundle
|
|
16
|
+
|
|
17
|
+
Or install it yourself as:
|
|
18
|
+
|
|
19
|
+
$ gem install mathtype_to_mathml
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
To convert a MathType equation embedded in a Word document (the file is usually named something like `oleObject1.bin`):
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
mathml = MathTypeToMathML::Converter.new(`oleObject1.bin`).convert
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
This will return a MathML string of the MathType equation.
|
|
30
|
+
|
|
31
|
+
# Testing
|
|
32
|
+
|
|
33
|
+
Run `bundle exec rspec` to run specs. Additionally, you can create a visual output using `html_output.rb`, like so:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
bundle exec ruby spec/html_output.rb > test.html
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Contributing
|
|
40
|
+
|
|
41
|
+
1. Fork it ( https://github.com/jure/mathtype_to_mathml/fork )
|
|
42
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
43
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
44
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
45
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
require "nokogiri"
|
|
2
|
+
|
|
3
|
+
# XSLT 1.0 has virtually non-existent capabilities for character ranges,
|
|
4
|
+
# codepoints, hex to decimal, etc., so we replace characters within character
|
|
5
|
+
# ranges with Ruby. Additonally, it's not possible to get the actual character
|
|
6
|
+
# represented by a hexadecimal number in XSLT, so we deal with those here too.
|
|
7
|
+
# Regular single character translations are still done with XSLT (char.xsl).
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
module MathTypeToMathML
|
|
13
|
+
class CharReplacer
|
|
14
|
+
UNSUPPORTED = "Unsupported (Char)"
|
|
15
|
+
|
|
16
|
+
DEFAULT_TEXTMODE = "(Char)"
|
|
17
|
+
DEFAULT_MATHMODE = "<mi>(Char)<mi>"
|
|
18
|
+
|
|
19
|
+
REPLACEMENTS = {
|
|
20
|
+
0x0021 => { # Exclamation mark
|
|
21
|
+
mathmode: "<mo>(Char)</mo>"
|
|
22
|
+
},
|
|
23
|
+
0x0028 => { # Left parenthesis
|
|
24
|
+
mathmode: "<mo stretchy='false'>(Char)</mo>"
|
|
25
|
+
},
|
|
26
|
+
0x0029 => { # Right parenthesis
|
|
27
|
+
mathmode: "<mo stretchy='false'>(Char)</mo>"
|
|
28
|
+
},
|
|
29
|
+
0x002A => { # Asterisk
|
|
30
|
+
mathmode: "<mo>(Char)</mo>"
|
|
31
|
+
},
|
|
32
|
+
0x002B => { # Plus sign
|
|
33
|
+
mathmode: "<mo>(Char)</mo>"
|
|
34
|
+
},
|
|
35
|
+
0x002C => { # Plus sign
|
|
36
|
+
mathmode: "<mo>(Char)</mo>"
|
|
37
|
+
},
|
|
38
|
+
0x002D => { # Hyphen-minus
|
|
39
|
+
mathmode: "<mo>(Char)</mo>"
|
|
40
|
+
},
|
|
41
|
+
0x002E => { # Full stop
|
|
42
|
+
mathmode: "<mo>(Char)</mo>"
|
|
43
|
+
},
|
|
44
|
+
0x002F => { # Solidus
|
|
45
|
+
mathmode: "<mo>(Char)</mo>"
|
|
46
|
+
},
|
|
47
|
+
0x003D => { # Equals sign
|
|
48
|
+
mathmode: "<mo>(Char)</mo>"
|
|
49
|
+
},
|
|
50
|
+
0x003F => { # Question mark
|
|
51
|
+
mathmode: "<mo>(Char)</mo>"
|
|
52
|
+
},
|
|
53
|
+
0x005B => { # Left square bracket
|
|
54
|
+
mathmode: "<mo stretchy='false'>(Char)</mo>"
|
|
55
|
+
},
|
|
56
|
+
0x005D => { # Right square bracket
|
|
57
|
+
mathmode: "<mo stretchy='false'>(Char)</mo>"
|
|
58
|
+
},
|
|
59
|
+
0x007E => { # Tilda
|
|
60
|
+
mathmode: "<mo>(Char)</mo>"
|
|
61
|
+
},
|
|
62
|
+
0x0000..0x0008 => {
|
|
63
|
+
mathmode: UNSUPPORTED,
|
|
64
|
+
textmode: UNSUPPORTED
|
|
65
|
+
},
|
|
66
|
+
0x000B..0x001F => {
|
|
67
|
+
mathmode: UNSUPPORTED,
|
|
68
|
+
textmode: UNSUPPORTED
|
|
69
|
+
},
|
|
70
|
+
0x0030..0x0039 => { # (range) DIGITS 0-9
|
|
71
|
+
mathmode: "<mn>(Char)</mn>",
|
|
72
|
+
number: "(Char)",
|
|
73
|
+
textmode: "(Char)"
|
|
74
|
+
},
|
|
75
|
+
0x003A..0x003B => { # (range) COLON, SEMICOLON
|
|
76
|
+
mathmode: "<mo>(Char)</mo>",
|
|
77
|
+
textmode: "(Char)"
|
|
78
|
+
},
|
|
79
|
+
0x0041..0x005A => { # range: Basic Latin
|
|
80
|
+
mathmode: "<mi>(Char)</mi>",
|
|
81
|
+
textmode: "(Char)"
|
|
82
|
+
},
|
|
83
|
+
0x0061..0x007A => { # range: Basic Latin
|
|
84
|
+
mathmode: "<mi>(Char)</mi>",
|
|
85
|
+
textmode: "(Char)"
|
|
86
|
+
},
|
|
87
|
+
0x0080..0x009F => { # range: C1 Controls
|
|
88
|
+
mathmode: UNSUPPORTED,
|
|
89
|
+
textmode: UNSUPPORTED
|
|
90
|
+
},
|
|
91
|
+
0x00A0..0x00B0 => { # range: Latin-1 Supplement
|
|
92
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
93
|
+
},
|
|
94
|
+
0x00B2..0x00BB => { # range: Latin-1 Supplement
|
|
95
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
96
|
+
},
|
|
97
|
+
0x00BC..0x00BE => { # range: Latin-1 Supplement
|
|
98
|
+
mathmode: "<mn>(CharHex)</mn>"
|
|
99
|
+
},
|
|
100
|
+
0x02C6..0x02FF => { # range: Spacing Modifier Letters
|
|
101
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
102
|
+
},
|
|
103
|
+
0x0300..0x036F => { # range: Combining Diacritical Marks
|
|
104
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
105
|
+
},
|
|
106
|
+
0x2000..0x200B => { # range: Spaces
|
|
107
|
+
mathmode: "<mtext>(CharHex)</mtext>"
|
|
108
|
+
},
|
|
109
|
+
0x200C..0x200F => { # range: Formatting Characters
|
|
110
|
+
mathmode: UNSUPPORTED,
|
|
111
|
+
textmode: UNSUPPORTED
|
|
112
|
+
},
|
|
113
|
+
0x2010..0x2027 => { # range: General Punctuation
|
|
114
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
115
|
+
},
|
|
116
|
+
0x2028..0x202F => { # range: Formatting Characters
|
|
117
|
+
mathmode: UNSUPPORTED,
|
|
118
|
+
textmode: UNSUPPORTED
|
|
119
|
+
},
|
|
120
|
+
0x2030..0x2069 => { # range: General Punctuation
|
|
121
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
122
|
+
},
|
|
123
|
+
0x206A..0x206F => {
|
|
124
|
+
mathmode: UNSUPPORTED,
|
|
125
|
+
textmode: UNSUPPORTED
|
|
126
|
+
},
|
|
127
|
+
0x2070..0x209F => { # range: Superscripts and Subscripts
|
|
128
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
129
|
+
},
|
|
130
|
+
0x20A0..0x20CF => { # range: Currency Symbols
|
|
131
|
+
mathmode: "<mi>(CharHex)</mi>"
|
|
132
|
+
},
|
|
133
|
+
0x20D0..0x20FF => { # range: Combining Diacritical Marks for Symbols
|
|
134
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
135
|
+
},
|
|
136
|
+
0x2100..0x2101 => { # range: Letterlike Symbols
|
|
137
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
138
|
+
},
|
|
139
|
+
0x2103..0x210A => { # range: CJK Symbols and Punctuation
|
|
140
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
141
|
+
},
|
|
142
|
+
0x2116..0x2117 => { # range: Supplemental Mathematical Operators
|
|
143
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
144
|
+
},
|
|
145
|
+
0x213C..0x2146 => { # range: Miscellaneous Mathematical Symbols-B
|
|
146
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
147
|
+
},
|
|
148
|
+
0x2150..0x218F => { # range: Miscellaneous Mathematical Symbols-B
|
|
149
|
+
mathmode: "<mn>(CharHex)</mn>"
|
|
150
|
+
},
|
|
151
|
+
0x2190..0x21FF => { # range: Miscellaneous Mathematical Symbols-B
|
|
152
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
153
|
+
},
|
|
154
|
+
0x2200..0x2211 => { # range: Supplemental Arrows-B
|
|
155
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
156
|
+
},
|
|
157
|
+
0x2213..0x221D => { # range: Supplemental Arrows-A
|
|
158
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
159
|
+
},
|
|
160
|
+
0x221F..0x22FF => { # range: Dingbats
|
|
161
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
162
|
+
},
|
|
163
|
+
0x2300..0x23FF => { # range: Miscellaneous Symbols
|
|
164
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
165
|
+
},
|
|
166
|
+
0x2400..0x243F => { # range: Geometric Shapes
|
|
167
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
168
|
+
},
|
|
169
|
+
0x2500..0x257F => { # range: Block Elements
|
|
170
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
171
|
+
},
|
|
172
|
+
0x2580..0x259F => { # range: Box Drawing
|
|
173
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
174
|
+
},
|
|
175
|
+
0x25A0..0x25FF => { # range: Control Pictures
|
|
176
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
177
|
+
},
|
|
178
|
+
0x2600..0x267F => { # range: Miscellaneous Technical
|
|
179
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
180
|
+
},
|
|
181
|
+
0x2700..0x27BF => { # range: Mathematical Operators
|
|
182
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
183
|
+
},
|
|
184
|
+
0x27F0..0x27FF => { # range: Mathematical Operators
|
|
185
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
186
|
+
},
|
|
187
|
+
0x2900..0x297F => { # range: Mathematical Operators
|
|
188
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
189
|
+
},
|
|
190
|
+
0x2980..0x29AF => { # range: Arrows
|
|
191
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
192
|
+
},
|
|
193
|
+
0x29B1..0x29DB => { # range: Number Forms
|
|
194
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
195
|
+
},
|
|
196
|
+
0x29DD..0x29FF => { # range: Letterlike Symbols
|
|
197
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
198
|
+
},
|
|
199
|
+
0x2A00..0x2AFF => { # range: Letterlike Symbols
|
|
200
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
201
|
+
},
|
|
202
|
+
0x3000..0x303F => { # range: Letterlike Symbols
|
|
203
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
204
|
+
},
|
|
205
|
+
# 0xE000..0xE900 => {
|
|
206
|
+
# mathmode: UNSUPPORTED,
|
|
207
|
+
# textmode: UNSUPPORTED
|
|
208
|
+
# },
|
|
209
|
+
# 0xE905..0xE90A => {
|
|
210
|
+
# mathmode: UNSUPPORTED,
|
|
211
|
+
# textmode: UNSUPPORTED
|
|
212
|
+
# },
|
|
213
|
+
# 0xE90D..0xE921 => {
|
|
214
|
+
# mathmode: UNSUPPORTED,
|
|
215
|
+
# textmode: UNSUPPORTED
|
|
216
|
+
# },
|
|
217
|
+
# 0xE926..0xE92C => {
|
|
218
|
+
# mathmode: UNSUPPORTED,
|
|
219
|
+
# textmode: UNSUPPORTED
|
|
220
|
+
# },
|
|
221
|
+
# 0xE92E..0xE931 => {
|
|
222
|
+
# mathmode: UNSUPPORTED,
|
|
223
|
+
# textmode: UNSUPPORTED
|
|
224
|
+
# },
|
|
225
|
+
# 0xE934..0xE939 => {
|
|
226
|
+
# mathmode: UNSUPPORTED,
|
|
227
|
+
# textmode: UNSUPPORTED
|
|
228
|
+
# },
|
|
229
|
+
# 0xE93C..0xE98E => {
|
|
230
|
+
# mathmode: UNSUPPORTED,
|
|
231
|
+
# textmode: UNSUPPORTED
|
|
232
|
+
# },
|
|
233
|
+
# 0xE990..0xEA05 => {
|
|
234
|
+
# mathmode: UNSUPPORTED,
|
|
235
|
+
# textmode: UNSUPPORTED
|
|
236
|
+
# },
|
|
237
|
+
# 0xEA08..0xEA0A => {
|
|
238
|
+
# mathmode: UNSUPPORTED,
|
|
239
|
+
# textmode: UNSUPPORTED
|
|
240
|
+
# },
|
|
241
|
+
# 0xEA0D..0xEA31 => {
|
|
242
|
+
# mathmode: UNSUPPORTED,
|
|
243
|
+
# textmode: UNSUPPORTED
|
|
244
|
+
# },
|
|
245
|
+
# 0xEA36..0xEA39 => {
|
|
246
|
+
# mathmode: UNSUPPORTED,
|
|
247
|
+
# textmode: UNSUPPORTED
|
|
248
|
+
# },
|
|
249
|
+
# 0xEA3C..0xEA3F => {
|
|
250
|
+
# mathmode: UNSUPPORTED,
|
|
251
|
+
# textmode: UNSUPPORTED
|
|
252
|
+
# },
|
|
253
|
+
# 0xEA46..0xEB00 => {
|
|
254
|
+
# mathmode: UNSUPPORTED,
|
|
255
|
+
# textmode: UNSUPPORTED
|
|
256
|
+
# },
|
|
257
|
+
# 0xEB03..0xEB04 => {
|
|
258
|
+
# mathmode: UNSUPPORTED,
|
|
259
|
+
# textmode: UNSUPPORTED
|
|
260
|
+
# },
|
|
261
|
+
# 0xEB07..0xED09 => {
|
|
262
|
+
# mathmode: UNSUPPORTED,
|
|
263
|
+
# textmode: UNSUPPORTED
|
|
264
|
+
# },
|
|
265
|
+
# 0xED14..0xED15 => {
|
|
266
|
+
# mathmode: UNSUPPORTED,
|
|
267
|
+
# textmode: UNSUPPORTED
|
|
268
|
+
# },
|
|
269
|
+
# 0xED17..0xEE03 => {
|
|
270
|
+
# mathmode: UNSUPPORTED,
|
|
271
|
+
# textmode: UNSUPPORTED
|
|
272
|
+
# },
|
|
273
|
+
# 0xEE04..0xEE0C => {
|
|
274
|
+
# textmode: UNSUPPORTED
|
|
275
|
+
# },
|
|
276
|
+
# 0xEE0D..0xEE18 => {
|
|
277
|
+
# mathmode: UNSUPPORTED,
|
|
278
|
+
# textmode: UNSUPPORTED
|
|
279
|
+
# },
|
|
280
|
+
# 0xEE1A..0xEEFF => {
|
|
281
|
+
# mathmode: UNSUPPORTED,
|
|
282
|
+
# textmode: UNSUPPORTED
|
|
283
|
+
# },
|
|
284
|
+
# 0xEF09..0xEFFF => {
|
|
285
|
+
# mathmode: UNSUPPORTED,
|
|
286
|
+
# textmode: UNSUPPORTED
|
|
287
|
+
# },
|
|
288
|
+
# 0xF000..0xF033 => {
|
|
289
|
+
# textmode: UNSUPPORTED
|
|
290
|
+
# },
|
|
291
|
+
# 0xF034..0xF07F => {
|
|
292
|
+
# mathmode: UNSUPPORTED,
|
|
293
|
+
# textmode: UNSUPPORTED
|
|
294
|
+
# },
|
|
295
|
+
# 0xF080..0xF0B3 => {
|
|
296
|
+
# textmode: UNSUPPORTED
|
|
297
|
+
# },
|
|
298
|
+
# 0xF0B4..0xF0BF => {
|
|
299
|
+
# mathmode: UNSUPPORTED,
|
|
300
|
+
# textmode: UNSUPPORTED
|
|
301
|
+
# },
|
|
302
|
+
# 0xF0C0..0xF0C9 => {
|
|
303
|
+
# textmode: UNSUPPORTED
|
|
304
|
+
# },
|
|
305
|
+
# 0xF0CA..0xF0FF => {
|
|
306
|
+
# mathmode: UNSUPPORTED,
|
|
307
|
+
# textmode: UNSUPPORTED
|
|
308
|
+
# },
|
|
309
|
+
# 0xF100..0xF133 => {
|
|
310
|
+
# textmode: UNSUPPORTED
|
|
311
|
+
# },
|
|
312
|
+
# 0xF134..0xF8FF => {
|
|
313
|
+
# mathmode: UNSUPPORTED,
|
|
314
|
+
# textmode: UNSUPPORTED
|
|
315
|
+
# },
|
|
316
|
+
0xFB00..0xFB4F => { # range: Alphabetic Presentation Forms
|
|
317
|
+
mathmode: "<mtext>(CharHex)</mtext>"
|
|
318
|
+
},
|
|
319
|
+
0xFE35..0xFE4F => { # range: CJK Compatibility Forms
|
|
320
|
+
mathmode: "<mo>(CharHex)</mo>"
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
attr_accessor :mathtype
|
|
325
|
+
|
|
326
|
+
def initialize(mathtype)
|
|
327
|
+
@mathtype = mathtype
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def replace
|
|
331
|
+
@mathtype.css("char").each do |char|
|
|
332
|
+
replacement = REPLACEMENTS.find do |range, _|
|
|
333
|
+
range === char.xpath("mt_code_value").text.hex
|
|
334
|
+
end
|
|
335
|
+
replace_character(replacement, char) if replacement
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Reparse XML to merge adjacent text nodes
|
|
339
|
+
@mathtype = Nokogiri::XML(@mathtype.to_xml)
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def replace_character(replacement, char)
|
|
343
|
+
if char.xpath("variation = 'textmode'")
|
|
344
|
+
replacement = replacement[1][:textmode] || DEFAULT_TEXTMODE
|
|
345
|
+
xml = replacement_xml(replacement, char)
|
|
346
|
+
else
|
|
347
|
+
replacement = replacement[1][:mathmode] || DEFAULT_MATHMODE
|
|
348
|
+
xml = replacement_xml(replacement, char)
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
char.replace Nokogiri::HTML::DocumentFragment.parse(xml)
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def replacement_xml(string, char)
|
|
355
|
+
string.gsub("(Char)") do
|
|
356
|
+
char.xpath("mt_code_value").text.hex.chr("UTF-8") # e.g. π
|
|
357
|
+
end.gsub("(CharHex)") do
|
|
358
|
+
"&#x#{char.xpath('mt_code_value').text[2..-1]};" # e.g. ∩
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
require "nokogiri"
|
|
2
|
+
|
|
3
|
+
# We're moving elements around to avoid "scanning" for characters when
|
|
4
|
+
# using the /scan translator mode. The original issue is that MathType
|
|
5
|
+
# considers subscripts and superscripts to be stand-alone, while MathML
|
|
6
|
+
# considers them subscripts and superscripts of an object, and the object
|
|
7
|
+
# is then included in the sub/superscript element.
|
|
8
|
+
|
|
9
|
+
# MathType's translator has a scan mode, which does the following:
|
|
10
|
+
# 1. Scanning goes to the right for commands with the "pre" option,
|
|
11
|
+
# otherwise left;
|
|
12
|
+
# 2. If we are scanning right and the object next to the template is an
|
|
13
|
+
# opening "fence" character (parenthesis, bracket, brace, etc.),
|
|
14
|
+
# the slot is scanned for the corresponding closing fence character;
|
|
15
|
+
# 3. If we are scanning left and the object next to the template is an closing
|
|
16
|
+
# "fence" character (parenthesis, bracket, brace, etc.), the slot is scanned
|
|
17
|
+
# for the corresponding opening fence character;
|
|
18
|
+
# 4. If the fenced expression is found, it is used as #1 in the template translation.
|
|
19
|
+
|
|
20
|
+
# Additionally, Mover inverts <emb>ellishments from <char><emb></emb></char> to
|
|
21
|
+
# <emb><char></char></emb>.
|
|
22
|
+
|
|
23
|
+
module MathTypeToMathML
|
|
24
|
+
class Mover
|
|
25
|
+
attr_reader :mathtype
|
|
26
|
+
attr_accessor :last_preceding_siblings
|
|
27
|
+
|
|
28
|
+
PARENS_SELECTOR = "selector='tmPARENS' or "\
|
|
29
|
+
"selector='tmBRACK' or " \
|
|
30
|
+
"selector='tmBRACE' or " \
|
|
31
|
+
"selector='tmOBRACK' or " \
|
|
32
|
+
"selector='tmOBRACE' or " \
|
|
33
|
+
"selector='tmHBRACK' or " \
|
|
34
|
+
"selector='tmHBRACE'"
|
|
35
|
+
|
|
36
|
+
SUBSUP_SELECTOR = "selector='tmSUP' or " \
|
|
37
|
+
"selector='tmSUB' or " \
|
|
38
|
+
"selector='tmSUBSUP'"
|
|
39
|
+
|
|
40
|
+
PRE = "variation='tvSU_PRECEDES'"
|
|
41
|
+
|
|
42
|
+
OPEN_PAREN = "mt_code_value = '0x0028' or " \
|
|
43
|
+
"mt_code_value = '0x005B' or " \
|
|
44
|
+
"mt_code_value = '0x007B'"
|
|
45
|
+
|
|
46
|
+
CLOSE_PAREN = "mt_code_value = '0x0029' or " \
|
|
47
|
+
"mt_code_value = '0x005D' or " \
|
|
48
|
+
"mt_code_value = '0x007D'"
|
|
49
|
+
|
|
50
|
+
OPEN_CLOSE_PAIRS = {
|
|
51
|
+
"0x0028" => "0x0029", # ( )
|
|
52
|
+
"0x005B" => "0x005D", # [ ]
|
|
53
|
+
"0x007B" => "0x007D", # { }
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
def initialize(mathtype)
|
|
57
|
+
@mathtype = mathtype
|
|
58
|
+
@last_preceding_siblings = Nokogiri::XML::NodeSet.new(@mathtype)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def move_until_mt_code(elements, mt_code_value, parent)
|
|
62
|
+
elements.each do |element|
|
|
63
|
+
element.parent = parent
|
|
64
|
+
break if element.xpath("mt_code_value = '#{mt_code_value}'")
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def move_paren(siblings, node)
|
|
69
|
+
OPEN_CLOSE_PAIRS.each do |open, close|
|
|
70
|
+
if siblings[0].xpath("mt_code_value = '#{open}'") # ( )
|
|
71
|
+
move_until_mt_code(siblings, close, node)
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def new_preceding_siblings(el)
|
|
77
|
+
all_siblings = el.xpath("preceding-sibling::tmpl | preceding-sibling::char")
|
|
78
|
+
siblings = all_siblings - last_preceding_siblings
|
|
79
|
+
self.last_preceding_siblings = all_siblings
|
|
80
|
+
siblings
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def new_following_siblings(el)
|
|
84
|
+
el.xpath("following-sibling::tmpl | following-sibling::char")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def move_following_subsup
|
|
88
|
+
mathtype.xpath("//tmpl[(#{SUBSUP_SELECTOR}) and not(#{PRE})]").each do |el|
|
|
89
|
+
siblings = new_preceding_siblings(el)
|
|
90
|
+
|
|
91
|
+
node = Nokogiri::XML::Node.new "slot", mathtype
|
|
92
|
+
|
|
93
|
+
if siblings.last.xpath(CLOSE_PAREN)
|
|
94
|
+
siblings = siblings.reverse.take_while do |sibling|
|
|
95
|
+
!sibling.next_element.xpath(OPEN_PAREN)
|
|
96
|
+
end.reverse
|
|
97
|
+
|
|
98
|
+
move_paren(siblings, node)
|
|
99
|
+
elsif siblings.last.xpath("self::tmpl[#{PARENS_SELECTOR}]")
|
|
100
|
+
siblings.last.parent = node
|
|
101
|
+
else
|
|
102
|
+
siblings.last.parent = node
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
el.at_css("slot").add_previous_sibling node
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def move_preceding_subsup
|
|
110
|
+
mathtype.xpath("//tmpl[(#{SUBSUP_SELECTOR}) and #{PRE}]").each do |el|
|
|
111
|
+
siblings = new_following_siblings(el)
|
|
112
|
+
|
|
113
|
+
node = Nokogiri::XML::Node.new "slot", mathtype
|
|
114
|
+
|
|
115
|
+
if siblings.first.xpath(OPEN_PAREN)
|
|
116
|
+
siblings = siblings.reverse.take_while do |sibling|
|
|
117
|
+
!sibling.next_element.xpath(CLOSE_PAREN)
|
|
118
|
+
end.reverse
|
|
119
|
+
|
|
120
|
+
move_paren(siblings, node)
|
|
121
|
+
elsif siblings.first.xpath("self::tmpl[#{PARENS_SELECTOR}]")
|
|
122
|
+
siblings.first.parent = node
|
|
123
|
+
else
|
|
124
|
+
siblings.first.parent = node
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
el.at_css("slot").add_previous_sibling node
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def invert_char_embell
|
|
132
|
+
# Invert char -> embell to embell -> char.
|
|
133
|
+
mathtype.xpath("//char[embell]").each do |el|
|
|
134
|
+
embell = el.xpath("embell").first.remove
|
|
135
|
+
char = el.clone
|
|
136
|
+
char.parent = embell
|
|
137
|
+
el.replace(embell)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def move
|
|
142
|
+
move_following_subsup
|
|
143
|
+
move_preceding_subsup
|
|
144
|
+
invert_char_embell
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require "mathtype_to_mathml/version"
|
|
2
|
+
require "nokogiri"
|
|
3
|
+
require "mathtype"
|
|
4
|
+
require_relative "mathtype_to_mathml/mover"
|
|
5
|
+
require_relative "mathtype_to_mathml/char_replacer"
|
|
6
|
+
|
|
7
|
+
module MathTypeToMathML
|
|
8
|
+
class Converter
|
|
9
|
+
def initialize(mathtype)
|
|
10
|
+
@xslt = Nokogiri::XSLT(File.open(path_to_xslt))
|
|
11
|
+
|
|
12
|
+
converter = Mathtype::Converter.new(mathtype)
|
|
13
|
+
doc = converter.xml.doc
|
|
14
|
+
|
|
15
|
+
File.open("mathtype.log", "a") do |f|
|
|
16
|
+
f.puts "----- NEW LOG #{Time.now} -----"
|
|
17
|
+
f.puts doc.to_xml(indent: 2)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
@mathtype = doc
|
|
21
|
+
# Addresses lack of scaning mode in our translator. See "Mover" for more.
|
|
22
|
+
mover = Mover.new(@mathtype)
|
|
23
|
+
mover.move
|
|
24
|
+
|
|
25
|
+
# Character ranges are tricky in XSLT 1.0, so we deal with them in Ruby
|
|
26
|
+
char_replacer = CharReplacer.new(@mathtype)
|
|
27
|
+
char_replacer.replace
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def convert
|
|
31
|
+
out = @xslt.transform(@mathtype)
|
|
32
|
+
# This is a hack, but XML namespaces are such a pain to get
|
|
33
|
+
# right, especially in nokigiri, so... We assume all content is
|
|
34
|
+
# mathml, remove namespaces & set the root default namespace to
|
|
35
|
+
# mathml
|
|
36
|
+
out.remove_namespaces!
|
|
37
|
+
out.root.default_namespace = 'http://www.w3.org/1998/Math/MathML'
|
|
38
|
+
out.to_s
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def path_to_xslt
|
|
42
|
+
File.join(File.dirname(File.expand_path(__FILE__)), "transform.xsl")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|