mathtype_to_mathml_plus 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +3 -0
  6. data/Gemfile +6 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +45 -0
  9. data/Rakefile +7 -0
  10. data/lib/mathtype_to_mathml/char_replacer.rb +366 -0
  11. data/lib/mathtype_to_mathml/mover.rb +151 -0
  12. data/lib/mathtype_to_mathml/version.rb +3 -0
  13. data/lib/mathtype_to_mathml.rb +45 -0
  14. data/lib/transform.xsl +53 -0
  15. data/lib/xsl/arrow.xsl +319 -0
  16. data/lib/xsl/box.xsl +42 -0
  17. data/lib/xsl/char.xsl +1854 -0
  18. data/lib/xsl/embellishment.xsl +389 -0
  19. data/lib/xsl/fence.xsl +228 -0
  20. data/lib/xsl/frac.xsl +46 -0
  21. data/lib/xsl/int.xsl +622 -0
  22. data/lib/xsl/lim.xsl +32 -0
  23. data/lib/xsl/long_division.xsl +32 -0
  24. data/lib/xsl/long_embellishment.xsl +150 -0
  25. data/lib/xsl/matrix.xsl +115 -0
  26. data/lib/xsl/pile.xsl +54 -0
  27. data/lib/xsl/product_coproduct.xsl +107 -0
  28. data/lib/xsl/subsup.xsl +55 -0
  29. data/lib/xsl/sum.xsl +81 -0
  30. data/lib/xsl/union_intersection.xsl +104 -0
  31. data/mathtype_to_mathml.gemspec +29 -0
  32. data/spec/fixtures/expected/280.xml +122 -0
  33. data/spec/fixtures/expected/281.xml +60 -0
  34. data/spec/fixtures/expected/299.xml +70 -0
  35. data/spec/fixtures/expected/326.xml +173 -0
  36. data/spec/fixtures/expected/424.xml +425 -0
  37. data/spec/fixtures/expected/450.xml +174 -0
  38. data/spec/fixtures/expected/452.xml +166 -0
  39. data/spec/fixtures/expected/478.xml +303 -0
  40. data/spec/fixtures/expected/629.xml +98 -0
  41. data/spec/fixtures/expected/arrows.xml +389 -0
  42. data/spec/fixtures/expected/boxes.xml +22 -0
  43. data/spec/fixtures/expected/embellishments.xml +178 -0
  44. data/spec/fixtures/expected/embellishments_roots_long_divisions.xml +162 -0
  45. data/spec/fixtures/expected/equation1.xml +52 -0
  46. data/spec/fixtures/expected/equation10.xml +19 -0
  47. data/spec/fixtures/expected/equation11.xml +17 -0
  48. data/spec/fixtures/expected/equation12.xml +34 -0
  49. data/spec/fixtures/expected/equation13.xml +113 -0
  50. data/spec/fixtures/expected/equation14.xml +54 -0
  51. data/spec/fixtures/expected/equation2.xml +33 -0
  52. data/spec/fixtures/expected/equation3.xml +324 -0
  53. data/spec/fixtures/expected/equation4.xml +14 -0
  54. data/spec/fixtures/expected/equation5.xml +23 -0
  55. data/spec/fixtures/expected/equation6.xml +13 -0
  56. data/spec/fixtures/expected/equation7.xml +19 -0
  57. data/spec/fixtures/expected/equation8.xml +17 -0
  58. data/spec/fixtures/expected/equation9.xml +15 -0
  59. data/spec/fixtures/expected/fences.xml +64 -0
  60. data/spec/fixtures/expected/integrals.xml +264 -0
  61. data/spec/fixtures/expected/matrices.xml +253 -0
  62. data/spec/fixtures/expected/sums.xml +36 -0
  63. data/spec/fixtures/expected/unions_and_intersections.xml +140 -0
  64. data/spec/fixtures/input/280.bin +0 -0
  65. data/spec/fixtures/input/281.bin +0 -0
  66. data/spec/fixtures/input/299.bin +0 -0
  67. data/spec/fixtures/input/326.bin +0 -0
  68. data/spec/fixtures/input/424.bin +0 -0
  69. data/spec/fixtures/input/450.bin +0 -0
  70. data/spec/fixtures/input/452.bin +0 -0
  71. data/spec/fixtures/input/478.bin +0 -0
  72. data/spec/fixtures/input/629.bin +0 -0
  73. data/spec/fixtures/input/arrows.bin +0 -0
  74. data/spec/fixtures/input/boxes.bin +0 -0
  75. data/spec/fixtures/input/embellishments.bin +0 -0
  76. data/spec/fixtures/input/embellishments_roots_long_divisions.bin +0 -0
  77. data/spec/fixtures/input/equation1.bin +0 -0
  78. data/spec/fixtures/input/equation10.bin +0 -0
  79. data/spec/fixtures/input/equation11.bin +0 -0
  80. data/spec/fixtures/input/equation12.bin +0 -0
  81. data/spec/fixtures/input/equation13.bin +0 -0
  82. data/spec/fixtures/input/equation14.bin +0 -0
  83. data/spec/fixtures/input/equation2.bin +0 -0
  84. data/spec/fixtures/input/equation3.bin +0 -0
  85. data/spec/fixtures/input/equation4.bin +0 -0
  86. data/spec/fixtures/input/equation5.bin +0 -0
  87. data/spec/fixtures/input/equation6.bin +0 -0
  88. data/spec/fixtures/input/equation7.bin +0 -0
  89. data/spec/fixtures/input/equation8.bin +0 -0
  90. data/spec/fixtures/input/equation9.bin +0 -0
  91. data/spec/fixtures/input/fences.bin +0 -0
  92. data/spec/fixtures/input/integrals.bin +0 -0
  93. data/spec/fixtures/input/matrices.bin +0 -0
  94. data/spec/fixtures/input/sums.bin +0 -0
  95. data/spec/fixtures/input/unions_and_intersections.bin +0 -0
  96. data/spec/html_output.rb +28 -0
  97. data/spec/mathtype_to_mathml_spec.rb +16 -0
  98. data/spec/spec_helper.rb +4 -0
  99. metadata +305 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b0d7a2cafb7ebf7215d919139933493434e2adbb28eef0e1b5454147246217fe
4
+ data.tar.gz: 7171abda1cc4e10bd2d3183e00885048eaee2e39d0a3640b29e74b01e0e50608
5
+ SHA512:
6
+ metadata.gz: ddc97e1f19c6db5e1112d875270a40c2d26954b7d393c7f65bfbbc860fe2863c0dcb7e151135878c25a59aaded99ae437b364403b257baad18cf5e0b5c85da2e
7
+ data.tar.gz: 3e44ee439201b4da295c4bf106a86b8de494b0dae91723fa9f62c78c5450f2ec2368ece0551fd0cd879d6c87becc64349fb029ac04737b8e42f4a6fe09cef8dc
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in mathtype_to_mathml.gemspec
5
+
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Jure Triglav
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # MathTypeToMathML
2
+
3
+ This gem can be used to convert MathType equations from a binary format (e.g. embedded in Word documents) to an open MathML representation. It achieves that in several stages, first using the [`mathtype`](https://github.com/jure/mathtype) gem to convert from a binary to an XML form of MTEF, and second, using XSLTs to convert XML to MathML.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'mathtype_to_mathml'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install mathtype_to_mathml
20
+
21
+ ## Usage
22
+
23
+ To convert a MathType equation embedded in a Word document (the file is usually named something like `oleObject1.bin`):
24
+
25
+ ```
26
+ mathml = MathTypeToMathML::Converter.new(`oleObject1.bin`).convert
27
+ ```
28
+
29
+ This will return a MathML string of the MathType equation.
30
+
31
+ # Testing
32
+
33
+ Run `bundle exec rspec` to run specs. Additionally, you can create a visual output using `html_output.rb`, like so:
34
+
35
+ ```
36
+ bundle exec ruby spec/html_output.rb > test.html
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/jure/mathtype_to_mathml/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,366 @@
1
+ require "nokogiri"
2
+
3
+ # XSLT 1.0 has virtually non-existent capabilities for character ranges,
4
+ # codepoints, hex to decimal, etc., so we replace characters within character
5
+ # ranges with Ruby. Additonally, it's not possible to get the actual character
6
+ # represented by a hexadecimal number in XSLT, so we deal with those here too.
7
+ # Regular single character translations are still done with XSLT (char.xsl).
8
+
9
+
10
+
11
+
12
+ module MathTypeToMathML
13
+ class CharReplacer
14
+ UNSUPPORTED = "Unsupported (Char)"
15
+
16
+ DEFAULT_TEXTMODE = "(Char)"
17
+ DEFAULT_MATHMODE = "<mi>(Char)<mi>"
18
+
19
+ REPLACEMENTS = {
20
+ 0x0021 => { # Exclamation mark
21
+ mathmode: "<mo>(Char)</mo>"
22
+ },
23
+ 0x0028 => { # Left parenthesis
24
+ mathmode: "<mo stretchy='false'>(Char)</mo>"
25
+ },
26
+ 0x0029 => { # Right parenthesis
27
+ mathmode: "<mo stretchy='false'>(Char)</mo>"
28
+ },
29
+ 0x002A => { # Asterisk
30
+ mathmode: "<mo>(Char)</mo>"
31
+ },
32
+ 0x002B => { # Plus sign
33
+ mathmode: "<mo>(Char)</mo>"
34
+ },
35
+ 0x002C => { # Plus sign
36
+ mathmode: "<mo>(Char)</mo>"
37
+ },
38
+ 0x002D => { # Hyphen-minus
39
+ mathmode: "<mo>(Char)</mo>"
40
+ },
41
+ 0x002E => { # Full stop
42
+ mathmode: "<mo>(Char)</mo>"
43
+ },
44
+ 0x002F => { # Solidus
45
+ mathmode: "<mo>(Char)</mo>"
46
+ },
47
+ 0x003D => { # Equals sign
48
+ mathmode: "<mo>(Char)</mo>"
49
+ },
50
+ 0x003F => { # Question mark
51
+ mathmode: "<mo>(Char)</mo>"
52
+ },
53
+ 0x005B => { # Left square bracket
54
+ mathmode: "<mo stretchy='false'>(Char)</mo>"
55
+ },
56
+ 0x005D => { # Right square bracket
57
+ mathmode: "<mo stretchy='false'>(Char)</mo>"
58
+ },
59
+ 0x007E => { # Tilda
60
+ mathmode: "<mo>(Char)</mo>"
61
+ },
62
+ 0x0000..0x0008 => {
63
+ mathmode: UNSUPPORTED,
64
+ textmode: UNSUPPORTED
65
+ },
66
+ 0x000B..0x001F => {
67
+ mathmode: UNSUPPORTED,
68
+ textmode: UNSUPPORTED
69
+ },
70
+ 0x0030..0x0039 => { # (range) DIGITS 0-9
71
+ mathmode: "<mn>(Char)</mn>",
72
+ number: "(Char)",
73
+ textmode: "(Char)"
74
+ },
75
+ 0x003A..0x003B => { # (range) COLON, SEMICOLON
76
+ mathmode: "<mo>(Char)</mo>",
77
+ textmode: "(Char)"
78
+ },
79
+ 0x0041..0x005A => { # range: Basic Latin
80
+ mathmode: "<mi>(Char)</mi>",
81
+ textmode: "(Char)"
82
+ },
83
+ 0x0061..0x007A => { # range: Basic Latin
84
+ mathmode: "<mi>(Char)</mi>",
85
+ textmode: "(Char)"
86
+ },
87
+ 0x0080..0x009F => { # range: C1 Controls
88
+ mathmode: UNSUPPORTED,
89
+ textmode: UNSUPPORTED
90
+ },
91
+ 0x00A0..0x00B0 => { # range: Latin-1 Supplement
92
+ mathmode: "<mo>(CharHex)</mo>"
93
+ },
94
+ 0x00B2..0x00BB => { # range: Latin-1 Supplement
95
+ mathmode: "<mo>(CharHex)</mo>"
96
+ },
97
+ 0x00BC..0x00BE => { # range: Latin-1 Supplement
98
+ mathmode: "<mn>(CharHex)</mn>"
99
+ },
100
+ 0x02C6..0x02FF => { # range: Spacing Modifier Letters
101
+ mathmode: "<mo>(CharHex)</mo>"
102
+ },
103
+ 0x0300..0x036F => { # range: Combining Diacritical Marks
104
+ mathmode: "<mo>(CharHex)</mo>"
105
+ },
106
+ 0x2000..0x200B => { # range: Spaces
107
+ mathmode: "<mtext>(CharHex)</mtext>"
108
+ },
109
+ 0x200C..0x200F => { # range: Formatting Characters
110
+ mathmode: UNSUPPORTED,
111
+ textmode: UNSUPPORTED
112
+ },
113
+ 0x2010..0x2027 => { # range: General Punctuation
114
+ mathmode: "<mo>(CharHex)</mo>"
115
+ },
116
+ 0x2028..0x202F => { # range: Formatting Characters
117
+ mathmode: UNSUPPORTED,
118
+ textmode: UNSUPPORTED
119
+ },
120
+ 0x2030..0x2069 => { # range: General Punctuation
121
+ mathmode: "<mo>(CharHex)</mo>"
122
+ },
123
+ 0x206A..0x206F => {
124
+ mathmode: UNSUPPORTED,
125
+ textmode: UNSUPPORTED
126
+ },
127
+ 0x2070..0x209F => { # range: Superscripts and Subscripts
128
+ mathmode: "<mo>(CharHex)</mo>"
129
+ },
130
+ 0x20A0..0x20CF => { # range: Currency Symbols
131
+ mathmode: "<mi>(CharHex)</mi>"
132
+ },
133
+ 0x20D0..0x20FF => { # range: Combining Diacritical Marks for Symbols
134
+ mathmode: "<mo>(CharHex)</mo>"
135
+ },
136
+ 0x2100..0x2101 => { # range: Letterlike Symbols
137
+ mathmode: "<mo>(CharHex)</mo>"
138
+ },
139
+ 0x2103..0x210A => { # range: CJK Symbols and Punctuation
140
+ mathmode: "<mo>(CharHex)</mo>"
141
+ },
142
+ 0x2116..0x2117 => { # range: Supplemental Mathematical Operators
143
+ mathmode: "<mo>(CharHex)</mo>"
144
+ },
145
+ 0x213C..0x2146 => { # range: Miscellaneous Mathematical Symbols-B
146
+ mathmode: "<mo>(CharHex)</mo>"
147
+ },
148
+ 0x2150..0x218F => { # range: Miscellaneous Mathematical Symbols-B
149
+ mathmode: "<mn>(CharHex)</mn>"
150
+ },
151
+ 0x2190..0x21FF => { # range: Miscellaneous Mathematical Symbols-B
152
+ mathmode: "<mo>(CharHex)</mo>"
153
+ },
154
+ 0x2200..0x2211 => { # range: Supplemental Arrows-B
155
+ mathmode: "<mo>(CharHex)</mo>"
156
+ },
157
+ 0x2213..0x221D => { # range: Supplemental Arrows-A
158
+ mathmode: "<mo>(CharHex)</mo>"
159
+ },
160
+ 0x221F..0x22FF => { # range: Dingbats
161
+ mathmode: "<mo>(CharHex)</mo>"
162
+ },
163
+ 0x2300..0x23FF => { # range: Miscellaneous Symbols
164
+ mathmode: "<mo>(CharHex)</mo>"
165
+ },
166
+ 0x2400..0x243F => { # range: Geometric Shapes
167
+ mathmode: "<mo>(CharHex)</mo>"
168
+ },
169
+ 0x2500..0x257F => { # range: Block Elements
170
+ mathmode: "<mo>(CharHex)</mo>"
171
+ },
172
+ 0x2580..0x259F => { # range: Box Drawing
173
+ mathmode: "<mo>(CharHex)</mo>"
174
+ },
175
+ 0x25A0..0x25FF => { # range: Control Pictures
176
+ mathmode: "<mo>(CharHex)</mo>"
177
+ },
178
+ 0x2600..0x267F => { # range: Miscellaneous Technical
179
+ mathmode: "<mo>(CharHex)</mo>"
180
+ },
181
+ 0x2700..0x27BF => { # range: Mathematical Operators
182
+ mathmode: "<mo>(CharHex)</mo>"
183
+ },
184
+ 0x27F0..0x27FF => { # range: Mathematical Operators
185
+ mathmode: "<mo>(CharHex)</mo>"
186
+ },
187
+ 0x2900..0x297F => { # range: Mathematical Operators
188
+ mathmode: "<mo>(CharHex)</mo>"
189
+ },
190
+ 0x2980..0x29AF => { # range: Arrows
191
+ mathmode: "<mo>(CharHex)</mo>"
192
+ },
193
+ 0x29B1..0x29DB => { # range: Number Forms
194
+ mathmode: "<mo>(CharHex)</mo>"
195
+ },
196
+ 0x29DD..0x29FF => { # range: Letterlike Symbols
197
+ mathmode: "<mo>(CharHex)</mo>"
198
+ },
199
+ 0x2A00..0x2AFF => { # range: Letterlike Symbols
200
+ mathmode: "<mo>(CharHex)</mo>"
201
+ },
202
+ 0x3000..0x303F => { # range: Letterlike Symbols
203
+ mathmode: "<mo>(CharHex)</mo>"
204
+ },
205
+ # 0xE000..0xE900 => {
206
+ # mathmode: UNSUPPORTED,
207
+ # textmode: UNSUPPORTED
208
+ # },
209
+ # 0xE905..0xE90A => {
210
+ # mathmode: UNSUPPORTED,
211
+ # textmode: UNSUPPORTED
212
+ # },
213
+ # 0xE90D..0xE921 => {
214
+ # mathmode: UNSUPPORTED,
215
+ # textmode: UNSUPPORTED
216
+ # },
217
+ # 0xE926..0xE92C => {
218
+ # mathmode: UNSUPPORTED,
219
+ # textmode: UNSUPPORTED
220
+ # },
221
+ # 0xE92E..0xE931 => {
222
+ # mathmode: UNSUPPORTED,
223
+ # textmode: UNSUPPORTED
224
+ # },
225
+ # 0xE934..0xE939 => {
226
+ # mathmode: UNSUPPORTED,
227
+ # textmode: UNSUPPORTED
228
+ # },
229
+ # 0xE93C..0xE98E => {
230
+ # mathmode: UNSUPPORTED,
231
+ # textmode: UNSUPPORTED
232
+ # },
233
+ # 0xE990..0xEA05 => {
234
+ # mathmode: UNSUPPORTED,
235
+ # textmode: UNSUPPORTED
236
+ # },
237
+ # 0xEA08..0xEA0A => {
238
+ # mathmode: UNSUPPORTED,
239
+ # textmode: UNSUPPORTED
240
+ # },
241
+ # 0xEA0D..0xEA31 => {
242
+ # mathmode: UNSUPPORTED,
243
+ # textmode: UNSUPPORTED
244
+ # },
245
+ # 0xEA36..0xEA39 => {
246
+ # mathmode: UNSUPPORTED,
247
+ # textmode: UNSUPPORTED
248
+ # },
249
+ # 0xEA3C..0xEA3F => {
250
+ # mathmode: UNSUPPORTED,
251
+ # textmode: UNSUPPORTED
252
+ # },
253
+ # 0xEA46..0xEB00 => {
254
+ # mathmode: UNSUPPORTED,
255
+ # textmode: UNSUPPORTED
256
+ # },
257
+ # 0xEB03..0xEB04 => {
258
+ # mathmode: UNSUPPORTED,
259
+ # textmode: UNSUPPORTED
260
+ # },
261
+ # 0xEB07..0xED09 => {
262
+ # mathmode: UNSUPPORTED,
263
+ # textmode: UNSUPPORTED
264
+ # },
265
+ # 0xED14..0xED15 => {
266
+ # mathmode: UNSUPPORTED,
267
+ # textmode: UNSUPPORTED
268
+ # },
269
+ # 0xED17..0xEE03 => {
270
+ # mathmode: UNSUPPORTED,
271
+ # textmode: UNSUPPORTED
272
+ # },
273
+ # 0xEE04..0xEE0C => {
274
+ # textmode: UNSUPPORTED
275
+ # },
276
+ # 0xEE0D..0xEE18 => {
277
+ # mathmode: UNSUPPORTED,
278
+ # textmode: UNSUPPORTED
279
+ # },
280
+ # 0xEE1A..0xEEFF => {
281
+ # mathmode: UNSUPPORTED,
282
+ # textmode: UNSUPPORTED
283
+ # },
284
+ # 0xEF09..0xEFFF => {
285
+ # mathmode: UNSUPPORTED,
286
+ # textmode: UNSUPPORTED
287
+ # },
288
+ # 0xF000..0xF033 => {
289
+ # textmode: UNSUPPORTED
290
+ # },
291
+ # 0xF034..0xF07F => {
292
+ # mathmode: UNSUPPORTED,
293
+ # textmode: UNSUPPORTED
294
+ # },
295
+ # 0xF080..0xF0B3 => {
296
+ # textmode: UNSUPPORTED
297
+ # },
298
+ # 0xF0B4..0xF0BF => {
299
+ # mathmode: UNSUPPORTED,
300
+ # textmode: UNSUPPORTED
301
+ # },
302
+ # 0xF0C0..0xF0C9 => {
303
+ # textmode: UNSUPPORTED
304
+ # },
305
+ # 0xF0CA..0xF0FF => {
306
+ # mathmode: UNSUPPORTED,
307
+ # textmode: UNSUPPORTED
308
+ # },
309
+ # 0xF100..0xF133 => {
310
+ # textmode: UNSUPPORTED
311
+ # },
312
+ # 0xF134..0xF8FF => {
313
+ # mathmode: UNSUPPORTED,
314
+ # textmode: UNSUPPORTED
315
+ # },
316
+ 0xFB00..0xFB4F => { # range: Alphabetic Presentation Forms
317
+ mathmode: "<mtext>(CharHex)</mtext>"
318
+ },
319
+ 0xFE35..0xFE4F => { # range: CJK Compatibility Forms
320
+ mathmode: "<mo>(CharHex)</mo>"
321
+ }
322
+ }
323
+
324
+ attr_accessor :mathtype
325
+
326
+ def initialize(mathtype)
327
+ @mathtype = mathtype
328
+ end
329
+
330
+ def replace
331
+ @mathtype.css("char").each do |char|
332
+ replacement = REPLACEMENTS.find do |range, _|
333
+ range === char.xpath("mt_code_value").text.hex
334
+ end
335
+ replace_character(replacement, char) if replacement
336
+ end
337
+
338
+ # Reparse XML to merge adjacent text nodes
339
+ @mathtype = Nokogiri::XML(@mathtype.to_xml)
340
+ end
341
+
342
+ def replace_character(replacement, char)
343
+ if char.xpath("variation = 'textmode'")
344
+ replacement = replacement[1][:textmode] || DEFAULT_TEXTMODE
345
+ xml = replacement_xml(replacement, char)
346
+ else
347
+ replacement = replacement[1][:mathmode] || DEFAULT_MATHMODE
348
+ xml = replacement_xml(replacement, char)
349
+ end
350
+
351
+ char.replace Nokogiri::HTML::DocumentFragment.parse(xml)
352
+ end
353
+
354
+ def replacement_xml(string, char)
355
+ string.gsub("(Char)") do
356
+ char.xpath("mt_code_value").text.hex.chr("UTF-8") # e.g. π
357
+ end.gsub("(CharHex)") do
358
+ "&#x#{char.xpath('mt_code_value').text[2..-1]};" # e.g. &#x2229;
359
+ end
360
+ end
361
+ end
362
+ end
363
+
364
+
365
+
366
+
@@ -0,0 +1,151 @@
1
+ require "nokogiri"
2
+
3
+ # We're moving elements around to avoid "scanning" for characters when
4
+ # using the /scan translator mode. The original issue is that MathType
5
+ # considers subscripts and superscripts to be stand-alone, while MathML
6
+ # considers them subscripts and superscripts of an object, and the object
7
+ # is then included in the sub/superscript element.
8
+
9
+ # MathType's translator has a scan mode, which does the following:
10
+ # 1. Scanning goes to the right for commands with the "pre" option,
11
+ # otherwise left;
12
+ # 2. If we are scanning right and the object next to the template is an
13
+ # opening "fence" character (parenthesis, bracket, brace, etc.),
14
+ # the slot is scanned for the corresponding closing fence character;
15
+ # 3. If we are scanning left and the object next to the template is an closing
16
+ # "fence" character (parenthesis, bracket, brace, etc.), the slot is scanned
17
+ # for the corresponding opening fence character;
18
+ # 4. If the fenced expression is found, it is used as #1 in the template translation.
19
+
20
+ # Additionally, Mover inverts <emb>ellishments from <char><emb></emb></char> to
21
+ # <emb><char></char></emb>.
22
+
23
+ module MathTypeToMathML
24
+ class Mover
25
+ attr_reader :mathtype
26
+ attr_accessor :last_preceding_siblings
27
+
28
+ PARENS_SELECTOR = "selector='tmPARENS' or "\
29
+ "selector='tmBRACK' or " \
30
+ "selector='tmBRACE' or " \
31
+ "selector='tmOBRACK' or " \
32
+ "selector='tmOBRACE' or " \
33
+ "selector='tmHBRACK' or " \
34
+ "selector='tmHBRACE'"
35
+
36
+ SUBSUP_SELECTOR = "selector='tmSUP' or " \
37
+ "selector='tmSUB' or " \
38
+ "selector='tmSUBSUP'"
39
+
40
+ PRE = "variation='tvSU_PRECEDES'"
41
+
42
+ OPEN_PAREN = "mt_code_value = '0x0028' or " \
43
+ "mt_code_value = '0x005B' or " \
44
+ "mt_code_value = '0x007B'"
45
+
46
+ CLOSE_PAREN = "mt_code_value = '0x0029' or " \
47
+ "mt_code_value = '0x005D' or " \
48
+ "mt_code_value = '0x007D'"
49
+
50
+ OPEN_CLOSE_PAIRS = {
51
+ "0x0028" => "0x0029", # ( )
52
+ "0x005B" => "0x005D", # [ ]
53
+ "0x007B" => "0x007D", # { }
54
+ }
55
+
56
+ def initialize(mathtype)
57
+ @mathtype = mathtype
58
+ @last_preceding_siblings = Nokogiri::XML::NodeSet.new(@mathtype)
59
+ end
60
+
61
+ def move_until_mt_code(elements, mt_code_value, parent)
62
+ elements.each do |element|
63
+ element.parent = parent
64
+ break if element.xpath("mt_code_value = '#{mt_code_value}'")
65
+ end
66
+ end
67
+
68
+ def move_paren(siblings, node)
69
+ OPEN_CLOSE_PAIRS.each do |open, close|
70
+ if siblings[0].xpath("mt_code_value = '#{open}'") # ( )
71
+ move_until_mt_code(siblings, close, node)
72
+ end
73
+ end
74
+ end
75
+
76
+ def new_preceding_siblings(el)
77
+ all_siblings = el.xpath("preceding-sibling::tmpl | preceding-sibling::char")
78
+ siblings = all_siblings - last_preceding_siblings
79
+ self.last_preceding_siblings = all_siblings
80
+ siblings
81
+ end
82
+
83
+ def new_following_siblings(el)
84
+ el.xpath("following-sibling::tmpl | following-sibling::char")
85
+ end
86
+
87
+ def move_following_subsup
88
+ mathtype.xpath("//tmpl[(#{SUBSUP_SELECTOR}) and not(#{PRE})]").each do |el|
89
+ siblings = new_preceding_siblings(el)
90
+
91
+ node = Nokogiri::XML::Node.new "slot", mathtype
92
+
93
+ if siblings.last.xpath(CLOSE_PAREN)
94
+ siblings = siblings.reverse.take_while do |sibling|
95
+ !sibling.next_element.xpath(OPEN_PAREN)
96
+ end.reverse
97
+
98
+ move_paren(siblings, node)
99
+ elsif siblings.last.xpath("self::tmpl[#{PARENS_SELECTOR}]")
100
+ siblings.last.parent = node
101
+ else
102
+ siblings.last.parent = node
103
+ end
104
+
105
+ el.at_css("slot").add_previous_sibling node
106
+ end
107
+ end
108
+
109
+ def move_preceding_subsup
110
+ mathtype.xpath("//tmpl[(#{SUBSUP_SELECTOR}) and #{PRE}]").each do |el|
111
+ siblings = new_following_siblings(el)
112
+
113
+ node = Nokogiri::XML::Node.new "slot", mathtype
114
+
115
+ if siblings.first.xpath(OPEN_PAREN)
116
+ siblings = siblings.reverse.take_while do |sibling|
117
+ !sibling.next_element.xpath(CLOSE_PAREN)
118
+ end.reverse
119
+
120
+ move_paren(siblings, node)
121
+ elsif siblings.first.xpath("self::tmpl[#{PARENS_SELECTOR}]")
122
+ siblings.first.parent = node
123
+ else
124
+ siblings.first.parent = node
125
+ end
126
+
127
+ el.at_css("slot").add_previous_sibling node
128
+ end
129
+ end
130
+
131
+ def invert_char_embell
132
+ # Invert char -> embell to embell -> char.
133
+ mathtype.xpath("//char[embell]").each do |el|
134
+ embell = el.xpath("embell").first.remove
135
+ char = el.clone
136
+ char.parent = embell
137
+ el.replace(embell)
138
+ end
139
+ end
140
+
141
+ def move
142
+ move_following_subsup
143
+ move_preceding_subsup
144
+ invert_char_embell
145
+ end
146
+ end
147
+ end
148
+
149
+
150
+
151
+
@@ -0,0 +1,3 @@
1
+ module MathTypeToMathML
2
+ VERSION = "0.0.8"
3
+ end
@@ -0,0 +1,45 @@
1
+ require "mathtype_to_mathml/version"
2
+ require "nokogiri"
3
+ require "mathtype"
4
+ require_relative "mathtype_to_mathml/mover"
5
+ require_relative "mathtype_to_mathml/char_replacer"
6
+
7
+ module MathTypeToMathML
8
+ class Converter
9
+ def initialize(mathtype)
10
+ @xslt = Nokogiri::XSLT(File.open(path_to_xslt))
11
+
12
+ converter = Mathtype::Converter.new(mathtype)
13
+ doc = converter.xml.doc
14
+
15
+ File.open("mathtype.log", "a") do |f|
16
+ f.puts "----- NEW LOG #{Time.now} -----"
17
+ f.puts doc.to_xml(indent: 2)
18
+ end
19
+
20
+ @mathtype = doc
21
+ # Addresses lack of scaning mode in our translator. See "Mover" for more.
22
+ mover = Mover.new(@mathtype)
23
+ mover.move
24
+
25
+ # Character ranges are tricky in XSLT 1.0, so we deal with them in Ruby
26
+ char_replacer = CharReplacer.new(@mathtype)
27
+ char_replacer.replace
28
+ end
29
+
30
+ def convert
31
+ out = @xslt.transform(@mathtype)
32
+ # This is a hack, but XML namespaces are such a pain to get
33
+ # right, especially in nokigiri, so... We assume all content is
34
+ # mathml, remove namespaces & set the root default namespace to
35
+ # mathml
36
+ out.remove_namespaces!
37
+ out.root.default_namespace = 'http://www.w3.org/1998/Math/MathML'
38
+ out.to_s
39
+ end
40
+
41
+ def path_to_xslt
42
+ File.join(File.dirname(File.expand_path(__FILE__)), "transform.xsl")
43
+ end
44
+ end
45
+ end