mathtype_to_mathml 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +14 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/.travis.yml +3 -0
  6. data/Gemfile +6 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +45 -0
  9. data/Rakefile +7 -0
  10. data/lib/mathtype_to_mathml.rb +28 -0
  11. data/lib/mathtype_to_mathml/char_replacer.rb +311 -0
  12. data/lib/mathtype_to_mathml/mover.rb +151 -0
  13. data/lib/mathtype_to_mathml/version.rb +3 -0
  14. data/lib/transform.xsl +104 -0
  15. data/lib/xsl/arrow.xsl +319 -0
  16. data/lib/xsl/brace.xsl +55 -0
  17. data/lib/xsl/char.xsl +35 -0
  18. data/lib/xsl/embellishment.xsl +389 -0
  19. data/lib/xsl/matrix.xsl +116 -0
  20. data/lib/xsl/pile.xsl +54 -0
  21. data/lib/xsl/subsup.xsl +55 -0
  22. data/lib/xsl/sum.xsl +57 -0
  23. data/lib/xsl/union_intersection.xsl +104 -0
  24. data/mathtype_to_mathml.gemspec +28 -0
  25. data/spec/fixtures/expected/arrows.xml +389 -0
  26. data/spec/fixtures/expected/embellishments.xml +178 -0
  27. data/spec/fixtures/expected/equation1.xml +52 -0
  28. data/spec/fixtures/expected/equation10.xml +19 -0
  29. data/spec/fixtures/expected/equation11.xml +17 -0
  30. data/spec/fixtures/expected/equation12.xml +34 -0
  31. data/spec/fixtures/expected/equation13.xml +113 -0
  32. data/spec/fixtures/expected/equation2.xml +33 -0
  33. data/spec/fixtures/expected/equation3.xml +324 -0
  34. data/spec/fixtures/expected/equation4.xml +14 -0
  35. data/spec/fixtures/expected/equation5.xml +23 -0
  36. data/spec/fixtures/expected/equation6.xml +13 -0
  37. data/spec/fixtures/expected/equation7.xml +19 -0
  38. data/spec/fixtures/expected/equation8.xml +17 -0
  39. data/spec/fixtures/expected/equation9.xml +15 -0
  40. data/spec/fixtures/input/arrows.bin +0 -0
  41. data/spec/fixtures/input/embellishments.bin +0 -0
  42. data/spec/fixtures/input/equation1.bin +0 -0
  43. data/spec/fixtures/input/equation10.bin +0 -0
  44. data/spec/fixtures/input/equation11.bin +0 -0
  45. data/spec/fixtures/input/equation12.bin +0 -0
  46. data/spec/fixtures/input/equation13.bin +0 -0
  47. data/spec/fixtures/input/equation2.bin +0 -0
  48. data/spec/fixtures/input/equation3.bin +0 -0
  49. data/spec/fixtures/input/equation4.bin +0 -0
  50. data/spec/fixtures/input/equation5.bin +0 -0
  51. data/spec/fixtures/input/equation6.bin +0 -0
  52. data/spec/fixtures/input/equation7.bin +0 -0
  53. data/spec/fixtures/input/equation8.bin +0 -0
  54. data/spec/fixtures/input/equation9.bin +0 -0
  55. data/spec/html_output.rb +28 -0
  56. data/spec/mathtype_to_mathml_spec.rb +19 -0
  57. data/spec/spec_helper.rb +2 -0
  58. metadata +220 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c01e41b7c35f3c71aa5cc88424320297a1d5366d
4
+ data.tar.gz: bd36ef37f1900eca7617503bcd6c84ab79ad7755
5
+ SHA512:
6
+ metadata.gz: 4d116d2674ee17634a20e1dbc02f9a92e188ab30940d6f5ffa8cf8f99863d0d857351d57474020f4b7c41bd1d422785683ec2f0a66fe683585a9145581933567
7
+ data.tar.gz: d9668ff0ccc786b75833175b953d086d881d0ab9230ba683d29fcac1dcc18b19c0419aa001fbb468d4f5e26acaee9d29330a1a81776b24747bc90c8af88fc18f
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+
2
+ source 'https://rubygems.org'
3
+
4
+ # Specify your gem's dependencies in mathtype_to_mathml.gemspec
5
+
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 PLOS
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,45 @@
1
+ # MathTypeToMathML
2
+
3
+ This gem can be used to convert MathType equations from a binary format (e.g. embedded in Word documents) to an open MathML representation. It achieves that in several stages, first using the "mathtype" gem to convert from a binary to an XML form of MTEF, and second, using XSLTs to convert XML to MathML.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'mathtype_to_mathml'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install mathtype_to_mathml
20
+
21
+ ## Usage
22
+
23
+ To convert a MathType equation embedded in a Word document (the file is usually named something like `oleObject1.bin`):
24
+
25
+ ```
26
+ mathml = MathTypeToMathML::Converter.new(`oleObject1.bin`).convert
27
+ ```
28
+
29
+ This will return a MathML string of the MathType equation.
30
+
31
+ # Testing
32
+
33
+ Run `bundle exec rspec` to run specs. Additionally, you can create a visual output using `html_output.rb`, like so:
34
+
35
+ ```
36
+ bundle exec ruby spec/html_output.rb > test.html
37
+ ```
38
+
39
+ ## Contributing
40
+
41
+ 1. Fork it ( https://github.com/[my-github-username]/mathtype_to_mathml/fork )
42
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
43
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
44
+ 4. Push to the branch (`git push origin my-new-feature`)
45
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,7 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
7
+
@@ -0,0 +1,28 @@
1
+ require "mathtype_to_mathml/version"
2
+ require "nokogiri"
3
+ require "mathtype"
4
+ require_relative "mathtype_to_mathml/mover"
5
+ require_relative "mathtype_to_mathml/char_replacer"
6
+ require "pry"
7
+
8
+ module MathTypeToMathML
9
+ class Converter
10
+ def initialize(mathtype)
11
+ @xslt = Nokogiri::XSLT(File.read("lib/transform.xsl"))
12
+
13
+ @mathtype = Mathtype::Converter.new(mathtype).xml.doc
14
+
15
+ # Addresses lack of scaning mode in our translator. See "Mover" for more.
16
+ mover = Mover.new(@mathtype)
17
+ mover.move
18
+
19
+ # Character ranges are tricky in XSLT 1.0, so we deal with them in Ruby
20
+ char_replacer = CharReplacer.new(@mathtype)
21
+ char_replacer.replace
22
+ end
23
+
24
+ def convert
25
+ @xslt.transform(@mathtype).to_s
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,311 @@
1
+ require "nokogiri"
2
+
3
+ # XSLT 1.0 has virtually non-existent capabilities for character ranges,
4
+ # codepoints, hex to decimal, etc., so we replace characters within character
5
+ # ranges with Ruby. Single character translations are still done with XSLT.
6
+
7
+ module MathTypeToMathML
8
+ class CharReplacer
9
+ UNSUPPORTED = "Unsupported (Char)"
10
+ REPLACEMENTS = {
11
+ 0x0000..0x0008 => {
12
+ mathmode: UNSUPPORTED,
13
+ textmode: UNSUPPORTED
14
+ },
15
+ 0x000B..0x001F => {
16
+ mathmode: UNSUPPORTED,
17
+ textmode: UNSUPPORTED
18
+ },
19
+ 0x0030..0x0039 => { # (range) DIGITS 0-9
20
+ mathmode: "<mn>(Char)</mn>",
21
+ number: "(Char)",
22
+ textmode: "(Char)"
23
+ },
24
+ 0x003A..0x003B => { # (range) COLON, SEMICOLON
25
+ mathmode: "<mo>(Char)</mo>",
26
+ textmode: "(Char)"
27
+ },
28
+ 0x0041..0x005A => { # range: Basic Latin
29
+ mathmode: "<mi>(Char)</mi>",
30
+ textmode: "(Char)"
31
+ },
32
+ 0x0061..0x007A => { # range: Basic Latin
33
+ mathmode: "<mi>(Char)</mi>",
34
+ textmode: "(Char)"
35
+ },
36
+ 0x0080..0x009F => { # range: C1 Controls
37
+ mathmode: UNSUPPORTED,
38
+ textmode: UNSUPPORTED
39
+ },
40
+ 0x00A0..0x00B0 => { # range: Latin-1 Supplement
41
+ mathmode: "<mo>(CharHex)</mo>"
42
+ },
43
+ 0x00B2..0x00BB => { # range: Latin-1 Supplement
44
+ mathmode: "<mo>(CharHex)</mo>"
45
+ },
46
+ 0x00BC..0x00BE => { # range: Latin-1 Supplement
47
+ mathmode: "<mn>(CharHex)</mn>"
48
+ },
49
+ 0x02C6..0x02FF => { # range: Spacing Modifier Letters
50
+ mathmode: "<mo>(CharHex)</mo>"
51
+ },
52
+ 0x0300..0x036F => { # range: Combining Diacritical Marks
53
+ mathmode: "<mo>(CharHex)</mo>"
54
+ },
55
+ 0x2000..0x200B => { # range: Spaces
56
+ mathmode: "<mtext>(CharHex)</mtext>"
57
+ },
58
+ 0x200C..0x200F => { # range: Formatting Characters
59
+ mathmode: UNSUPPORTED,
60
+ textmode: UNSUPPORTED
61
+ },
62
+ 0x2010..0x2027 => { # range: General Punctuation
63
+ mathmode: "<mo>(CharHex)</mo>"
64
+ },
65
+ 0x2028..0x202F => { # range: Formatting Characters
66
+ mathmode: UNSUPPORTED,
67
+ textmode: UNSUPPORTED
68
+ },
69
+ 0x2030..0x2069 => { # range: General Punctuation
70
+ mathmode: "<mo>(CharHex)</mo>"
71
+ },
72
+ 0x206A..0x206F => {
73
+ mathmode: UNSUPPORTED,
74
+ textmode: UNSUPPORTED
75
+ },
76
+ 0x2070..0x209F => { # range: Superscripts and Subscripts
77
+ mathmode: "<mo>(CharHex)</mo>"
78
+ },
79
+ 0x20A0..0x20CF => { # range: Currency Symbols
80
+ mathmode: "<mi>(CharHex)</mi>"
81
+ },
82
+ 0x20D0..0x20FF => { # range: Combining Diacritical Marks for Symbols
83
+ mathmode: "<mo>(CharHex)</mo>"
84
+ },
85
+ 0x2100..0x2101 => { # range: Letterlike Symbols
86
+ mathmode: "<mo>(CharHex)</mo>"
87
+ },
88
+ 0x2103..0x210A => { # range: CJK Symbols and Punctuation
89
+ mathmode: "<mo>(CharHex)</mo>"
90
+ },
91
+ 0x2116..0x2117 => { # range: Supplemental Mathematical Operators
92
+ mathmode: "<mo>(CharHex)</mo>"
93
+ },
94
+ 0x213C..0x2146 => { # range: Miscellaneous Mathematical Symbols-B
95
+ mathmode: "<mo>(CharHex)</mo>"
96
+ },
97
+ 0x2150..0x218F => { # range: Miscellaneous Mathematical Symbols-B
98
+ mathmode: "<mn>(CharHex)</mn>"
99
+ },
100
+ 0x2190..0x21FF => { # range: Miscellaneous Mathematical Symbols-B
101
+ mathmode: "<mo>(CharHex)</mo>"
102
+ },
103
+ 0x2200..0x2211 => { # range: Supplemental Arrows-B
104
+ mathmode: "<mo>(CharHex)</mo>"
105
+ },
106
+ 0x2213..0x221D => { # range: Supplemental Arrows-A
107
+ mathmode: "<mo>(CharHex)</mo>"
108
+ },
109
+ 0x221F..0x22FF => { # range: Dingbats
110
+ mathmode: "<mo>(CharHex)</mo>"
111
+ },
112
+ 0x2300..0x23FF => { # range: Miscellaneous Symbols
113
+ mathmode: "<mo>(CharHex)</mo>"
114
+ },
115
+ 0x2400..0x243F => { # range: Geometric Shapes
116
+ mathmode: "<mo>(CharHex)</mo>"
117
+ },
118
+ 0x2500..0x257F => { # range: Block Elements
119
+ mathmode: "<mo>(CharHex)</mo>"
120
+ },
121
+ 0x2580..0x259F => { # range: Box Drawing
122
+ mathmode: "<mo>(CharHex)</mo>"
123
+ },
124
+ 0x25A0..0x25FF => { # range: Control Pictures
125
+ mathmode: "<mo>(CharHex)</mo>"
126
+ },
127
+ 0x2600..0x267F => { # range: Miscellaneous Technical
128
+ mathmode: "<mo>(CharHex)</mo>"
129
+ },
130
+ 0x2700..0x27BF => { # range: Mathematical Operators
131
+ mathmode: "<mo>(CharHex)</mo>"
132
+ },
133
+ 0x27F0..0x27FF => { # range: Mathematical Operators
134
+ mathmode: "<mo>(CharHex)</mo>"
135
+ },
136
+ 0x2900..0x297F => { # range: Mathematical Operators
137
+ mathmode: "<mo>(CharHex)</mo>"
138
+ },
139
+ 0x2980..0x29AF => { # range: Arrows
140
+ mathmode: "<mo>(CharHex)</mo>"
141
+ },
142
+ 0x29B1..0x29DB => { # range: Number Forms
143
+ mathmode: "<mo>(CharHex)</mo>"
144
+ },
145
+ 0x29DD..0x29FF => { # range: Letterlike Symbols
146
+ mathmode: "<mo>(CharHex)</mo>"
147
+ },
148
+ 0x2A00..0x2AFF => { # range: Letterlike Symbols
149
+ mathmode: "<mo>(CharHex)</mo>"
150
+ },
151
+ 0x3000..0x303F => { # range: Letterlike Symbols
152
+ mathmode: "<mo>(CharHex)</mo>"
153
+ },
154
+ 0xE000..0xE900 => {
155
+ mathmode: UNSUPPORTED,
156
+ textmode: UNSUPPORTED
157
+ },
158
+ 0xE905..0xE90A => {
159
+ mathmode: UNSUPPORTED,
160
+ textmode: UNSUPPORTED
161
+ },
162
+ 0xE90D..0xE921 => {
163
+ mathmode: UNSUPPORTED,
164
+ textmode: UNSUPPORTED
165
+ },
166
+ 0xE926..0xE92C => {
167
+ mathmode: UNSUPPORTED,
168
+ textmode: UNSUPPORTED
169
+ },
170
+ 0xE92E..0xE931 => {
171
+ mathmode: UNSUPPORTED,
172
+ textmode: UNSUPPORTED
173
+ },
174
+ 0xE934..0xE939 => {
175
+ mathmode: UNSUPPORTED,
176
+ textmode: UNSUPPORTED
177
+ },
178
+ 0xE93C..0xE98E => {
179
+ mathmode: UNSUPPORTED,
180
+ textmode: UNSUPPORTED
181
+ },
182
+ 0xE990..0xEA05 => {
183
+ mathmode: UNSUPPORTED,
184
+ textmode: UNSUPPORTED
185
+ },
186
+ 0xEA08..0xEA0A => {
187
+ mathmode: UNSUPPORTED,
188
+ textmode: UNSUPPORTED
189
+ },
190
+ 0xEA0D..0xEA31 => {
191
+ mathmode: UNSUPPORTED,
192
+ textmode: UNSUPPORTED
193
+ },
194
+ 0xEA36..0xEA39 => {
195
+ mathmode: UNSUPPORTED,
196
+ textmode: UNSUPPORTED
197
+ },
198
+ 0xEA3C..0xEA3F => {
199
+ mathmode: UNSUPPORTED,
200
+ textmode: UNSUPPORTED
201
+ },
202
+ 0xEA46..0xEB00 => {
203
+ mathmode: UNSUPPORTED,
204
+ textmode: UNSUPPORTED
205
+ },
206
+ 0xEB03..0xEB04 => {
207
+ mathmode: UNSUPPORTED,
208
+ textmode: UNSUPPORTED
209
+ },
210
+ 0xEB07..0xED09 => {
211
+ mathmode: UNSUPPORTED,
212
+ textmode: UNSUPPORTED
213
+ },
214
+ 0xED14..0xED15 => {
215
+ mathmode: UNSUPPORTED,
216
+ textmode: UNSUPPORTED
217
+ },
218
+ 0xED17..0xEE03 => {
219
+ mathmode: UNSUPPORTED,
220
+ textmode: UNSUPPORTED
221
+ },
222
+ 0xEE04..0xEE0C => {
223
+ textmode: UNSUPPORTED
224
+ },
225
+ 0xEE0D..0xEE18 => {
226
+ mathmode: UNSUPPORTED,
227
+ textmode: UNSUPPORTED
228
+ },
229
+ 0xEE1A..0xEEFF => {
230
+ mathmode: UNSUPPORTED,
231
+ textmode: UNSUPPORTED
232
+ },
233
+ 0xEF09..0xEFFF => {
234
+ mathmode: UNSUPPORTED,
235
+ textmode: UNSUPPORTED
236
+ },
237
+ 0xF000..0xF033 => {
238
+ textmode: UNSUPPORTED
239
+ },
240
+ 0xF034..0xF07F => {
241
+ mathmode: UNSUPPORTED,
242
+ textmode: UNSUPPORTED
243
+ },
244
+ 0xF080..0xF0B3 => {
245
+ textmode: UNSUPPORTED
246
+ },
247
+ 0xF0B4..0xF0BF => {
248
+ mathmode: UNSUPPORTED,
249
+ textmode: UNSUPPORTED
250
+ },
251
+ 0xF0C0..0xF0C9 => {
252
+ textmode: UNSUPPORTED
253
+ },
254
+ 0xF0CA..0xF0FF => {
255
+ mathmode: UNSUPPORTED,
256
+ textmode: UNSUPPORTED
257
+ },
258
+ 0xF100..0xF133 => {
259
+ textmode: UNSUPPORTED
260
+ },
261
+ 0xF134..0xF8FF => {
262
+ mathmode: UNSUPPORTED,
263
+ textmode: UNSUPPORTED
264
+ },
265
+ 0xFB00..0xFB4F => { # range: Alphabetic Presentation Forms
266
+ mathmode: "<mtext>(CharHex)</mtext>"
267
+ },
268
+ 0xFE35..0xFE4F => { # range: CJK Compatibility Forms
269
+ mathmode: "<mo>(CharHex)</mo>"
270
+ }
271
+ }
272
+
273
+ attr_accessor :mathtype
274
+
275
+ def initialize(mathtype)
276
+ @mathtype = mathtype
277
+
278
+ end
279
+
280
+ def replace
281
+ @mathtype.css("char").each do |char|
282
+ replacement = REPLACEMENTS.find do |range, _|
283
+ range === char.xpath("mt_code_value").text.hex
284
+ end
285
+ replace_character(replacement, char) if replacement
286
+ end
287
+ end
288
+
289
+ def replace_character(replacement, char)
290
+ if char.xpath("variation = 'textmode'")
291
+ xml = replacement_xml(replacement[1][:textmode], char)
292
+ else
293
+ xml = replacement_xml(replacement[1][:mathmode], char)
294
+ end
295
+
296
+ char.replace Nokogiri::HTML::DocumentFragment.parse(xml)
297
+ end
298
+
299
+ def replacement_xml(string, char)
300
+ string.gsub("(Char)") do
301
+ char.xpath("mt_code_value").text.hex.chr # e.g. π
302
+ end.gsub("(CharHex)") do
303
+ "&#x#{char.xpath('mt_code_value').text[2..-1]};" # e.g. &#x2229;
304
+ end
305
+ end
306
+ end
307
+ end
308
+
309
+
310
+
311
+