latex-to-unicode 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ require 'treetop'
2
+ require File.dirname(__FILE__) + '/data.rb'
3
+ require File.dirname(__FILE__) + '/latex_grammar.rb'
4
+
5
+ module LatexToUnicode
6
+ def self.preprocess(str)
7
+ str = str.gsub(/\s/, '')
8
+ str = translate(str, ALIASES)
9
+ str = translate(str, WHITESPACE)
10
+ str = translate(str, SYMBOLS)
11
+ end
12
+ def self.convert(str)
13
+ m = LatexParser.new.parse(preprocess(str))
14
+ if m
15
+ m.value
16
+ else
17
+ raise ArgumentError, "Parsing failed."
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,815 @@
1
+ # Autogenerated from a Treetop grammar. Edits may be lost.
2
+
3
+
4
+ require File.dirname(__FILE__) + '/data.rb'
5
+ require File.dirname(__FILE__) + '/translate.rb'
6
+
7
+ module LatexToUnicode
8
+ module Latex
9
+ include Treetop::Runtime
10
+
11
+ def root
12
+ @root ||= :expression
13
+ end
14
+
15
+ module Expression0
16
+ def value
17
+ elements.map {|x| x.value }.join
18
+ end
19
+ end
20
+
21
+ def _nt_expression
22
+ start_index = index
23
+ if node_cache[:expression].has_key?(index)
24
+ cached = node_cache[:expression][index]
25
+ if cached
26
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
27
+ @index = cached.interval.end
28
+ end
29
+ return cached
30
+ end
31
+
32
+ s0, i0 = [], index
33
+ loop do
34
+ i1 = index
35
+ r2 = _nt_frac
36
+ if r2
37
+ r1 = r2
38
+ else
39
+ r3 = _nt_sqrt
40
+ if r3
41
+ r1 = r3
42
+ else
43
+ r4 = _nt_combining
44
+ if r4
45
+ r1 = r4
46
+ else
47
+ r5 = _nt_unary
48
+ if r5
49
+ r1 = r5
50
+ else
51
+ r6 = _nt_grouped
52
+ if r6
53
+ r1 = r6
54
+ else
55
+ r7 = _nt_atom
56
+ if r7
57
+ r1 = r7
58
+ else
59
+ @index = i1
60
+ r1 = nil
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ if r1
68
+ s0 << r1
69
+ else
70
+ break
71
+ end
72
+ end
73
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
74
+ r0.extend(Expression0)
75
+
76
+ node_cache[:expression][start_index] = r0
77
+
78
+ r0
79
+ end
80
+
81
+ module Frac0
82
+ def n
83
+ elements[1]
84
+ end
85
+
86
+ def d
87
+ elements[2]
88
+ end
89
+ end
90
+
91
+ module Frac1
92
+ def value
93
+ LatexToUnicode::translate_fraction(n.value, d.value)
94
+ end
95
+ end
96
+
97
+ def _nt_frac
98
+ start_index = index
99
+ if node_cache[:frac].has_key?(index)
100
+ cached = node_cache[:frac][index]
101
+ if cached
102
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
103
+ @index = cached.interval.end
104
+ end
105
+ return cached
106
+ end
107
+
108
+ i0, s0 = index, []
109
+ if has_terminal?('\frac', false, index)
110
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 5))
111
+ @index += 5
112
+ else
113
+ terminal_parse_failure('\frac')
114
+ r1 = nil
115
+ end
116
+ s0 << r1
117
+ if r1
118
+ r2 = _nt_element
119
+ s0 << r2
120
+ if r2
121
+ r3 = _nt_element
122
+ s0 << r3
123
+ end
124
+ end
125
+ if s0.last
126
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
127
+ r0.extend(Frac0)
128
+ r0.extend(Frac1)
129
+ else
130
+ @index = i0
131
+ r0 = nil
132
+ end
133
+
134
+ node_cache[:frac][start_index] = r0
135
+
136
+ r0
137
+ end
138
+
139
+ module Sqrt0
140
+ def atom
141
+ elements[1]
142
+ end
143
+ end
144
+
145
+ module Sqrt1
146
+ def n
147
+ elements[1]
148
+ end
149
+
150
+ def element
151
+ elements[3]
152
+ end
153
+ end
154
+
155
+ module Sqrt2
156
+ def value
157
+ LatexToUnicode::translate_sqrt(element.value, n.text_value)
158
+ end
159
+ end
160
+
161
+ module Sqrt3
162
+ def element
163
+ elements[1]
164
+ end
165
+ end
166
+
167
+ module Sqrt4
168
+ def value
169
+ LatexToUnicode::translate_sqrt(element.value, 2)
170
+ end
171
+ end
172
+
173
+ def _nt_sqrt
174
+ start_index = index
175
+ if node_cache[:sqrt].has_key?(index)
176
+ cached = node_cache[:sqrt][index]
177
+ if cached
178
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
179
+ @index = cached.interval.end
180
+ end
181
+ return cached
182
+ end
183
+
184
+ i0 = index
185
+ i1, s1 = index, []
186
+ if has_terminal?('\sqrt[', false, index)
187
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 6))
188
+ @index += 6
189
+ else
190
+ terminal_parse_failure('\sqrt[')
191
+ r2 = nil
192
+ end
193
+ s1 << r2
194
+ if r2
195
+ s3, i3 = [], index
196
+ loop do
197
+ i4, s4 = index, []
198
+ i5 = index
199
+ if has_terminal?(']', false, index)
200
+ r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
201
+ @index += 1
202
+ else
203
+ terminal_parse_failure(']')
204
+ r6 = nil
205
+ end
206
+ if r6
207
+ r5 = nil
208
+ else
209
+ @index = i5
210
+ r5 = instantiate_node(SyntaxNode,input, index...index)
211
+ end
212
+ s4 << r5
213
+ if r5
214
+ r7 = _nt_atom
215
+ s4 << r7
216
+ end
217
+ if s4.last
218
+ r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
219
+ r4.extend(Sqrt0)
220
+ else
221
+ @index = i4
222
+ r4 = nil
223
+ end
224
+ if r4
225
+ s3 << r4
226
+ else
227
+ break
228
+ end
229
+ end
230
+ if s3.empty?
231
+ @index = i3
232
+ r3 = nil
233
+ else
234
+ r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
235
+ end
236
+ s1 << r3
237
+ if r3
238
+ if has_terminal?(']', false, index)
239
+ r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
240
+ @index += 1
241
+ else
242
+ terminal_parse_failure(']')
243
+ r8 = nil
244
+ end
245
+ s1 << r8
246
+ if r8
247
+ r9 = _nt_element
248
+ s1 << r9
249
+ end
250
+ end
251
+ end
252
+ if s1.last
253
+ r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
254
+ r1.extend(Sqrt1)
255
+ r1.extend(Sqrt2)
256
+ else
257
+ @index = i1
258
+ r1 = nil
259
+ end
260
+ if r1
261
+ r0 = r1
262
+ else
263
+ i10, s10 = index, []
264
+ if has_terminal?('\sqrt', false, index)
265
+ r11 = instantiate_node(SyntaxNode,input, index...(index + 5))
266
+ @index += 5
267
+ else
268
+ terminal_parse_failure('\sqrt')
269
+ r11 = nil
270
+ end
271
+ s10 << r11
272
+ if r11
273
+ r12 = _nt_element
274
+ s10 << r12
275
+ end
276
+ if s10.last
277
+ r10 = instantiate_node(SyntaxNode,input, i10...index, s10)
278
+ r10.extend(Sqrt3)
279
+ r10.extend(Sqrt4)
280
+ else
281
+ @index = i10
282
+ r10 = nil
283
+ end
284
+ if r10
285
+ r0 = r10
286
+ else
287
+ @index = i0
288
+ r0 = nil
289
+ end
290
+ end
291
+
292
+ node_cache[:sqrt][start_index] = r0
293
+
294
+ r0
295
+ end
296
+
297
+ module Combining0
298
+ def combining_command
299
+ elements[0]
300
+ end
301
+
302
+ def element
303
+ elements[1]
304
+ end
305
+ end
306
+
307
+ module Combining1
308
+ def value
309
+ LatexToUnicode::translate_combining(element.value,
310
+ combining_command.text_value[1..-1].to_sym)
311
+ end
312
+ end
313
+
314
+ def _nt_combining
315
+ start_index = index
316
+ if node_cache[:combining].has_key?(index)
317
+ cached = node_cache[:combining][index]
318
+ if cached
319
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
320
+ @index = cached.interval.end
321
+ end
322
+ return cached
323
+ end
324
+
325
+ i0, s0 = index, []
326
+ r1 = _nt_combining_command
327
+ s0 << r1
328
+ if r1
329
+ r2 = _nt_element
330
+ s0 << r2
331
+ end
332
+ if s0.last
333
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
334
+ r0.extend(Combining0)
335
+ r0.extend(Combining1)
336
+ else
337
+ @index = i0
338
+ r0 = nil
339
+ end
340
+
341
+ node_cache[:combining][start_index] = r0
342
+
343
+ r0
344
+ end
345
+
346
+ def _nt_combining_command
347
+ start_index = index
348
+ if node_cache[:combining_command].has_key?(index)
349
+ cached = node_cache[:combining_command][index]
350
+ if cached
351
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
352
+ @index = cached.interval.end
353
+ end
354
+ return cached
355
+ end
356
+
357
+ i0 = index
358
+ if has_terminal?('\hat', false, index)
359
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 4))
360
+ @index += 4
361
+ else
362
+ terminal_parse_failure('\hat')
363
+ r1 = nil
364
+ end
365
+ if r1
366
+ r0 = r1
367
+ else
368
+ if has_terminal?('\breve', false, index)
369
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 6))
370
+ @index += 6
371
+ else
372
+ terminal_parse_failure('\breve')
373
+ r2 = nil
374
+ end
375
+ if r2
376
+ r0 = r2
377
+ else
378
+ if has_terminal?('\grave', false, index)
379
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 6))
380
+ @index += 6
381
+ else
382
+ terminal_parse_failure('\grave')
383
+ r3 = nil
384
+ end
385
+ if r3
386
+ r0 = r3
387
+ else
388
+ if has_terminal?('\bar', false, index)
389
+ r4 = instantiate_node(SyntaxNode,input, index...(index + 4))
390
+ @index += 4
391
+ else
392
+ terminal_parse_failure('\bar')
393
+ r4 = nil
394
+ end
395
+ if r4
396
+ r0 = r4
397
+ else
398
+ if has_terminal?('\check', false, index)
399
+ r5 = instantiate_node(SyntaxNode,input, index...(index + 6))
400
+ @index += 6
401
+ else
402
+ terminal_parse_failure('\check')
403
+ r5 = nil
404
+ end
405
+ if r5
406
+ r0 = r5
407
+ else
408
+ if has_terminal?('\acute', false, index)
409
+ r6 = instantiate_node(SyntaxNode,input, index...(index + 6))
410
+ @index += 6
411
+ else
412
+ terminal_parse_failure('\acute')
413
+ r6 = nil
414
+ end
415
+ if r6
416
+ r0 = r6
417
+ else
418
+ if has_terminal?('\tilde', false, index)
419
+ r7 = instantiate_node(SyntaxNode,input, index...(index + 6))
420
+ @index += 6
421
+ else
422
+ terminal_parse_failure('\tilde')
423
+ r7 = nil
424
+ end
425
+ if r7
426
+ r0 = r7
427
+ else
428
+ if has_terminal?('\vec', false, index)
429
+ r8 = instantiate_node(SyntaxNode,input, index...(index + 4))
430
+ @index += 4
431
+ else
432
+ terminal_parse_failure('\vec')
433
+ r8 = nil
434
+ end
435
+ if r8
436
+ r0 = r8
437
+ else
438
+ if has_terminal?('\dot', false, index)
439
+ r9 = instantiate_node(SyntaxNode,input, index...(index + 4))
440
+ @index += 4
441
+ else
442
+ terminal_parse_failure('\dot')
443
+ r9 = nil
444
+ end
445
+ if r9
446
+ r0 = r9
447
+ else
448
+ if has_terminal?('\ddot', false, index)
449
+ r10 = instantiate_node(SyntaxNode,input, index...(index + 5))
450
+ @index += 5
451
+ else
452
+ terminal_parse_failure('\ddot')
453
+ r10 = nil
454
+ end
455
+ if r10
456
+ r0 = r10
457
+ else
458
+ if has_terminal?('\mathring', false, index)
459
+ r11 = instantiate_node(SyntaxNode,input, index...(index + 9))
460
+ @index += 9
461
+ else
462
+ terminal_parse_failure('\mathring')
463
+ r11 = nil
464
+ end
465
+ if r11
466
+ r0 = r11
467
+ else
468
+ @index = i0
469
+ r0 = nil
470
+ end
471
+ end
472
+ end
473
+ end
474
+ end
475
+ end
476
+ end
477
+ end
478
+ end
479
+ end
480
+ end
481
+
482
+ node_cache[:combining_command][start_index] = r0
483
+
484
+ r0
485
+ end
486
+
487
+ module Unary0
488
+ def unary_command
489
+ elements[0]
490
+ end
491
+
492
+ def element
493
+ elements[1]
494
+ end
495
+ end
496
+
497
+ module Unary1
498
+ def value
499
+ set = case (cmd = unary_command.text_value)
500
+ when '_' then LatexToUnicode::SUBSCRIPTS
501
+ when '^' then LatexToUnicode::SUPERSCRIPTS
502
+ else
503
+ LatexToUnicode.const_get cmd[1..-1].upcase
504
+ end
505
+ LatexToUnicode::translate(element.value, set)
506
+ end
507
+ end
508
+
509
+ def _nt_unary
510
+ start_index = index
511
+ if node_cache[:unary].has_key?(index)
512
+ cached = node_cache[:unary][index]
513
+ if cached
514
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
515
+ @index = cached.interval.end
516
+ end
517
+ return cached
518
+ end
519
+
520
+ i0, s0 = index, []
521
+ r1 = _nt_unary_command
522
+ s0 << r1
523
+ if r1
524
+ r2 = _nt_element
525
+ s0 << r2
526
+ end
527
+ if s0.last
528
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
529
+ r0.extend(Unary0)
530
+ r0.extend(Unary1)
531
+ else
532
+ @index = i0
533
+ r0 = nil
534
+ end
535
+
536
+ node_cache[:unary][start_index] = r0
537
+
538
+ r0
539
+ end
540
+
541
+ def _nt_unary_command
542
+ start_index = index
543
+ if node_cache[:unary_command].has_key?(index)
544
+ cached = node_cache[:unary_command][index]
545
+ if cached
546
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
547
+ @index = cached.interval.end
548
+ end
549
+ return cached
550
+ end
551
+
552
+ i0 = index
553
+ if has_terminal?('^', false, index)
554
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
555
+ @index += 1
556
+ else
557
+ terminal_parse_failure('^')
558
+ r1 = nil
559
+ end
560
+ if r1
561
+ r0 = r1
562
+ else
563
+ if has_terminal?('_', false, index)
564
+ r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
565
+ @index += 1
566
+ else
567
+ terminal_parse_failure('_')
568
+ r2 = nil
569
+ end
570
+ if r2
571
+ r0 = r2
572
+ else
573
+ if has_terminal?('\bb', false, index)
574
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 3))
575
+ @index += 3
576
+ else
577
+ terminal_parse_failure('\bb')
578
+ r3 = nil
579
+ end
580
+ if r3
581
+ r0 = r3
582
+ else
583
+ if has_terminal?('\bf', false, index)
584
+ r4 = instantiate_node(SyntaxNode,input, index...(index + 3))
585
+ @index += 3
586
+ else
587
+ terminal_parse_failure('\bf')
588
+ r4 = nil
589
+ end
590
+ if r4
591
+ r0 = r4
592
+ else
593
+ if has_terminal?('\cal', false, index)
594
+ r5 = instantiate_node(SyntaxNode,input, index...(index + 4))
595
+ @index += 4
596
+ else
597
+ terminal_parse_failure('\cal')
598
+ r5 = nil
599
+ end
600
+ if r5
601
+ r0 = r5
602
+ else
603
+ if has_terminal?('\frak', false, index)
604
+ r6 = instantiate_node(SyntaxNode,input, index...(index + 5))
605
+ @index += 5
606
+ else
607
+ terminal_parse_failure('\frak')
608
+ r6 = nil
609
+ end
610
+ if r6
611
+ r0 = r6
612
+ else
613
+ if has_terminal?('\it', false, index)
614
+ r7 = instantiate_node(SyntaxNode,input, index...(index + 3))
615
+ @index += 3
616
+ else
617
+ terminal_parse_failure('\it')
618
+ r7 = nil
619
+ end
620
+ if r7
621
+ r0 = r7
622
+ else
623
+ if has_terminal?('\mono', false, index)
624
+ r8 = instantiate_node(SyntaxNode,input, index...(index + 5))
625
+ @index += 5
626
+ else
627
+ terminal_parse_failure('\mono')
628
+ r8 = nil
629
+ end
630
+ if r8
631
+ r0 = r8
632
+ else
633
+ @index = i0
634
+ r0 = nil
635
+ end
636
+ end
637
+ end
638
+ end
639
+ end
640
+ end
641
+ end
642
+ end
643
+
644
+ node_cache[:unary_command][start_index] = r0
645
+
646
+ r0
647
+ end
648
+
649
+ def _nt_element
650
+ start_index = index
651
+ if node_cache[:element].has_key?(index)
652
+ cached = node_cache[:element][index]
653
+ if cached
654
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
655
+ @index = cached.interval.end
656
+ end
657
+ return cached
658
+ end
659
+
660
+ i0 = index
661
+ r1 = _nt_grouped
662
+ if r1
663
+ r0 = r1
664
+ else
665
+ r2 = _nt_atom
666
+ if r2
667
+ r0 = r2
668
+ else
669
+ @index = i0
670
+ r0 = nil
671
+ end
672
+ end
673
+
674
+ node_cache[:element][start_index] = r0
675
+
676
+ r0
677
+ end
678
+
679
+ module Grouped0
680
+ def expression
681
+ elements[1]
682
+ end
683
+
684
+ end
685
+
686
+ module Grouped1
687
+ def value
688
+ expression.value
689
+ end
690
+ end
691
+
692
+ def _nt_grouped
693
+ start_index = index
694
+ if node_cache[:grouped].has_key?(index)
695
+ cached = node_cache[:grouped][index]
696
+ if cached
697
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
698
+ @index = cached.interval.end
699
+ end
700
+ return cached
701
+ end
702
+
703
+ i0, s0 = index, []
704
+ if has_terminal?('{', false, index)
705
+ r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
706
+ @index += 1
707
+ else
708
+ terminal_parse_failure('{')
709
+ r1 = nil
710
+ end
711
+ s0 << r1
712
+ if r1
713
+ r2 = _nt_expression
714
+ s0 << r2
715
+ if r2
716
+ if has_terminal?('}', false, index)
717
+ r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
718
+ @index += 1
719
+ else
720
+ terminal_parse_failure('}')
721
+ r3 = nil
722
+ end
723
+ s0 << r3
724
+ end
725
+ end
726
+ if s0.last
727
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
728
+ r0.extend(Grouped0)
729
+ r0.extend(Grouped1)
730
+ else
731
+ @index = i0
732
+ r0 = nil
733
+ end
734
+
735
+ node_cache[:grouped][start_index] = r0
736
+
737
+ r0
738
+ end
739
+
740
+ module Atoms0
741
+ def value
742
+ text_value
743
+ end
744
+ end
745
+
746
+ def _nt_atoms
747
+ start_index = index
748
+ if node_cache[:atoms].has_key?(index)
749
+ cached = node_cache[:atoms][index]
750
+ if cached
751
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
752
+ @index = cached.interval.end
753
+ end
754
+ return cached
755
+ end
756
+
757
+ s0, i0 = [], index
758
+ loop do
759
+ r1 = _nt_atom
760
+ if r1
761
+ s0 << r1
762
+ else
763
+ break
764
+ end
765
+ end
766
+ if s0.empty?
767
+ @index = i0
768
+ r0 = nil
769
+ else
770
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
771
+ r0.extend(Atoms0)
772
+ end
773
+
774
+ node_cache[:atoms][start_index] = r0
775
+
776
+ r0
777
+ end
778
+
779
+ module Atom0
780
+ def value
781
+ text_value
782
+ end
783
+ end
784
+
785
+ def _nt_atom
786
+ start_index = index
787
+ if node_cache[:atom].has_key?(index)
788
+ cached = node_cache[:atom][index]
789
+ if cached
790
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
791
+ @index = cached.interval.end
792
+ end
793
+ return cached
794
+ end
795
+
796
+ if has_terminal?('\G[^{}\\^_]', true, index)
797
+ r0 = instantiate_node(SyntaxNode,input, index...(index + 1))
798
+ r0.extend(Atom0)
799
+ @index += 1
800
+ else
801
+ r0 = nil
802
+ end
803
+
804
+ node_cache[:atom][start_index] = r0
805
+
806
+ r0
807
+ end
808
+
809
+ end
810
+
811
+ class LatexParser < Treetop::Runtime::CompiledParser
812
+ include Latex
813
+ end
814
+
815
+ end