html2doc 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +2 -2
- data/html2doc.gemspec +1 -1
- data/lib/html2doc/mime.rb +3 -1
- data/lib/html2doc/version.rb +1 -1
- metadata +5 -10
- data/lib/asciimath/cli.rb +0 -18
- data/lib/asciimath/html.rb +0 -222
- data/lib/asciimath/mathml.rb +0 -131
- data/lib/asciimath/parser.rb +0 -591
- data/lib/asciimath/version.rb +0 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c88de00bdeb2cbb88460c403e03cd10d623fb1152371129a6b9cf90c2f664a02
|
4
|
+
data.tar.gz: 0c61af5fa5eb93dcc4730328055b0914dfe03de8794909779c6d99e1308a0a22
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 73ddb8e6c7e4505df3127737c4302a364c1ade83b18c55274c7e0bc34a3640cbc5e4d02cd94eca0cd72eb6f2c505ee6b701388eaadbb1b45969d543df15b778d
|
7
|
+
data.tar.gz: 15556cf840a5fe4de804e5de8d0eb0f23d470d1a7e620814e8e0a15239e8efba761be31601ea5d1a30cf806c9740eabe2ac9a8ae1382ebad158562a7c1081420
|
data/Gemfile.lock
CHANGED
data/html2doc.gemspec
CHANGED
@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_dependency "thread_safe"
|
33
33
|
spec.add_dependency "uuidtools"
|
34
34
|
spec.add_dependency "ruby-xslt"
|
35
|
-
spec.add_dependency "asciimath"
|
35
|
+
spec.add_dependency "asciimath", "~> 1.0.7"
|
36
36
|
|
37
37
|
spec.add_development_dependency "bundler", "~> 2.0.1"
|
38
38
|
spec.add_development_dependency "byebug", "~> 9.1"
|
data/lib/html2doc/mime.rb
CHANGED
@@ -88,6 +88,7 @@ module Html2Doc
|
|
88
88
|
next unless i.element? && %w(img v:imagedata).include?(i.name)
|
89
89
|
warnsvg(i["src"])
|
90
90
|
next if /^http/.match i["src"]
|
91
|
+
next if %r{^data:image/[^;]+;base64}.match i["src"]
|
91
92
|
local_filename = File.join(localdir, i["src"])
|
92
93
|
new_filename = "#{mkuuid}#{File.extname(i["src"])}"
|
93
94
|
FileUtils.cp local_filename, File.join(dir, new_filename)
|
@@ -106,7 +107,8 @@ module Html2Doc
|
|
106
107
|
end
|
107
108
|
|
108
109
|
def self.header_image_cleanup1(a, dir, filename, localdir)
|
109
|
-
if a.size == 2 && !(/ src="https?:/.match a[1])
|
110
|
+
if a.size == 2 && !(/ src="https?:/.match a[1]) &&
|
111
|
+
!(%r{ src="data:image/[^;]+;base64}.match a[1])
|
110
112
|
m = / src=['"](?<src>[^"']+)['"]/.match a[1]
|
111
113
|
warnsvg(m[:src])
|
112
114
|
m2 = /\.(?<suffix>\S+)$/.match m[:src]
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -112,16 +112,16 @@ dependencies:
|
|
112
112
|
name: asciimath
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- - "
|
115
|
+
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version:
|
117
|
+
version: 1.0.7
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- - "
|
122
|
+
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version:
|
124
|
+
version: 1.0.7
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: bundler
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -306,11 +306,6 @@ files:
|
|
306
306
|
- bin/rspec
|
307
307
|
- bin/setup
|
308
308
|
- html2doc.gemspec
|
309
|
-
- lib/asciimath/cli.rb
|
310
|
-
- lib/asciimath/html.rb
|
311
|
-
- lib/asciimath/mathml.rb
|
312
|
-
- lib/asciimath/parser.rb
|
313
|
-
- lib/asciimath/version.rb
|
314
309
|
- lib/html2doc.rb
|
315
310
|
- lib/html2doc/base.rb
|
316
311
|
- lib/html2doc/lists.rb
|
data/lib/asciimath/cli.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
require_relative 'parser'
|
2
|
-
require_relative 'mathml'
|
3
|
-
require_relative 'html'
|
4
|
-
|
5
|
-
module AsciiMath
|
6
|
-
module CLI
|
7
|
-
def self.run(args)
|
8
|
-
asciimath = args.last
|
9
|
-
output = ''
|
10
|
-
if args.length == 1 || args.first == "mathml"
|
11
|
-
output = AsciiMath.parse(asciimath).to_mathml
|
12
|
-
elsif args.first == "html"
|
13
|
-
output = AsciiMath.parse(asciimath).to_html
|
14
|
-
end
|
15
|
-
puts output
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/asciimath/html.rb
DELETED
@@ -1,222 +0,0 @@
|
|
1
|
-
module AsciiMath
|
2
|
-
class HTMLBuilder
|
3
|
-
def initialize(prefix)
|
4
|
-
@prefix = prefix
|
5
|
-
@html = ''
|
6
|
-
end
|
7
|
-
|
8
|
-
def to_s
|
9
|
-
@html
|
10
|
-
end
|
11
|
-
|
12
|
-
def append_expression(expression, inline, attrs = {})
|
13
|
-
if inline
|
14
|
-
inline('', attrs) do
|
15
|
-
append(expression, :single_child => true)
|
16
|
-
end
|
17
|
-
else
|
18
|
-
block('', attrs) do
|
19
|
-
append(expression, :single_child => true)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
private
|
25
|
-
|
26
|
-
ZWJ = "\u8205"
|
27
|
-
|
28
|
-
def append(expression, opts = {})
|
29
|
-
case expression
|
30
|
-
when Array
|
31
|
-
row do
|
32
|
-
expression.each { |e| append(e) }
|
33
|
-
end
|
34
|
-
when Hash
|
35
|
-
case expression[:type]
|
36
|
-
when :operator
|
37
|
-
operator(expression[:c])
|
38
|
-
when :identifier
|
39
|
-
identifier(expression[:c])
|
40
|
-
when :number
|
41
|
-
number(expression[:c])
|
42
|
-
when :text
|
43
|
-
text(expression[:c])
|
44
|
-
when :paren
|
45
|
-
paren = !opts[:strip_paren]
|
46
|
-
if paren
|
47
|
-
if opts[:single_child]
|
48
|
-
brace(expression[:lparen]) if expression[:lparen]
|
49
|
-
append(expression[:e], :single_child => true)
|
50
|
-
brace(expression[:rparen]) if expression[:rparen]
|
51
|
-
else
|
52
|
-
row do
|
53
|
-
brace(expression[:lparen]) if expression[:lparen]
|
54
|
-
append(expression[:e], :single_child => true)
|
55
|
-
brace(expression[:rparen]) if expression[:rparen]
|
56
|
-
end
|
57
|
-
end
|
58
|
-
else
|
59
|
-
append(expression[:e])
|
60
|
-
end
|
61
|
-
when :font
|
62
|
-
#TODO - currently ignored
|
63
|
-
when :unary
|
64
|
-
operator = expression[:operator]
|
65
|
-
tag(operator) do
|
66
|
-
append(expression[:s], :single_child => true, :strip_paren => true)
|
67
|
-
end
|
68
|
-
when :binary
|
69
|
-
operator = expression[:operator]
|
70
|
-
if operator == :frac
|
71
|
-
append_fraction(expression[:s1],expression[:s2])
|
72
|
-
elsif operator == :sub
|
73
|
-
append_subsup(expression[:s1],expression[:s2],nil)
|
74
|
-
elsif operator == :sup
|
75
|
-
append_subsup(expression[:s1],nil,expression[:s2])
|
76
|
-
elsif operator == :under
|
77
|
-
append_underover(expression[:s1],expression[:s2],nil)
|
78
|
-
elsif operator == :over
|
79
|
-
append_underover(expression[:s1],nil,expression[:s2])
|
80
|
-
else
|
81
|
-
tag(operator) do
|
82
|
-
append(expression[:s1], :strip_paren => true)
|
83
|
-
append(expression[:s2], :strip_paren => true)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
when :ternary
|
87
|
-
operator = expression[:operator]
|
88
|
-
if operator == :subsup
|
89
|
-
append_subsup(expression[:s1],expression[:s2],expression[:s3])
|
90
|
-
elsif operator == :underover
|
91
|
-
# TODO: Handle over/under braces in some way? SVG maybe?
|
92
|
-
append_underover(expression[:s1],expression[:s2],expression[:s3])
|
93
|
-
end
|
94
|
-
when :matrix
|
95
|
-
row do
|
96
|
-
# Figures out a font size for the braces, based on the height of the matrix.
|
97
|
-
# NOTE: This does not currently consider the size of each element within the matrix.
|
98
|
-
brace_height = "font-size: " + expression[:rows].length.to_s + "00%;"
|
99
|
-
|
100
|
-
if expression[:lparen]
|
101
|
-
brace(expression[:lparen], {:style => brace_height})
|
102
|
-
else
|
103
|
-
blank(ZWJ)
|
104
|
-
end
|
105
|
-
matrix_width = "grid-template-columns:repeat(" + expression[:rows][0].length.to_s + ",1fr);"
|
106
|
-
matrix_height = "grid-template-rows:repeat(" + expression[:rows].length.to_s + ",1fr);"
|
107
|
-
|
108
|
-
matrix({:style => (matrix_width + matrix_height)}) do
|
109
|
-
expression[:rows].each do |row|
|
110
|
-
row.each do |col|
|
111
|
-
row do
|
112
|
-
append(col)
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
if expression[:rparen]
|
118
|
-
brace(expression[:rparen], {:style => brace_height})
|
119
|
-
else
|
120
|
-
blank(ZWJ)
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
|
127
|
-
def append_subsup(base, sub, sup)
|
128
|
-
append(base)
|
129
|
-
subsup do
|
130
|
-
if sup
|
131
|
-
smaller do
|
132
|
-
append(sup, :strip_paren => true)
|
133
|
-
end
|
134
|
-
else
|
135
|
-
smaller(ZWJ)
|
136
|
-
end
|
137
|
-
if sub
|
138
|
-
smaller do
|
139
|
-
append(sub, :strip_paren => true)
|
140
|
-
end
|
141
|
-
else
|
142
|
-
smaller(ZWJ)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
def append_underover(base, under, over)
|
148
|
-
blank(ZWJ)
|
149
|
-
underover do
|
150
|
-
smaller do
|
151
|
-
if over
|
152
|
-
append(over, :strip_paren => true)
|
153
|
-
else
|
154
|
-
blank(ZWJ)
|
155
|
-
end
|
156
|
-
end
|
157
|
-
append(base)
|
158
|
-
smaller do
|
159
|
-
if under
|
160
|
-
append(under, :strip_paren => true)
|
161
|
-
else
|
162
|
-
blank(ZWJ)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
def append_fraction(numerator, denominator)
|
169
|
-
blank(ZWJ)
|
170
|
-
fraction do
|
171
|
-
fraction_row do
|
172
|
-
fraction_cell do
|
173
|
-
smaller do
|
174
|
-
row do
|
175
|
-
append(numerator, :strip_paren => true)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
fraction_row do
|
181
|
-
fraction_cell do
|
182
|
-
smaller do
|
183
|
-
row do
|
184
|
-
append(denominator, :strip_paren => true)
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
def method_missing(meth, *args, &block)
|
193
|
-
tag(meth, *args, &block)
|
194
|
-
end
|
195
|
-
|
196
|
-
def tag(tag, *args)
|
197
|
-
attrs = args.last.is_a?(Hash) ? args.pop : {}
|
198
|
-
text = args.last.is_a?(String) ? args.pop : ''
|
199
|
-
|
200
|
-
@html << '<span class="math-' << @prefix << tag.to_s << '"'
|
201
|
-
|
202
|
-
attrs.each_pair do |key, value|
|
203
|
-
@html << ' ' << key.to_s << '="' << value.to_s << '"'
|
204
|
-
end
|
205
|
-
|
206
|
-
if block_given? || text
|
207
|
-
@html << '>'
|
208
|
-
@html << text.encode(Encoding::US_ASCII, :xml => :text) if text
|
209
|
-
yield if block_given?
|
210
|
-
@html << '</span>'
|
211
|
-
else
|
212
|
-
@html << '/>'
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
class Expression
|
218
|
-
def to_html(prefix = "", inline = true, attrs = {})
|
219
|
-
HTMLBuilder.new(prefix).append_expression(@parsed_expression, inline, attrs).to_s
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|
data/lib/asciimath/mathml.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
module AsciiMath
|
2
|
-
class MathMLBuilder
|
3
|
-
def initialize(prefix)
|
4
|
-
@prefix = prefix
|
5
|
-
@mathml = ''
|
6
|
-
end
|
7
|
-
|
8
|
-
def to_s
|
9
|
-
@mathml
|
10
|
-
end
|
11
|
-
|
12
|
-
def append_expression(expression, attrs = {})
|
13
|
-
math('', attrs) do
|
14
|
-
append(expression, :single_child => true)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def append(expression, opts = {})
|
21
|
-
case expression
|
22
|
-
when Array
|
23
|
-
if expression.length <= 1 || opts[:single_child]
|
24
|
-
expression.each { |e| append(e) }
|
25
|
-
else
|
26
|
-
mrow do
|
27
|
-
expression.each { |e| append(e) }
|
28
|
-
end
|
29
|
-
end
|
30
|
-
when Hash
|
31
|
-
case expression[:type]
|
32
|
-
when :operator
|
33
|
-
mo(expression[:c])
|
34
|
-
when :identifier
|
35
|
-
mi(expression[:c])
|
36
|
-
when :number
|
37
|
-
mn(expression[:c])
|
38
|
-
when :text
|
39
|
-
mtext(expression[:c])
|
40
|
-
when :paren
|
41
|
-
paren = !opts[:strip_paren]
|
42
|
-
if paren
|
43
|
-
if opts[:single_child]
|
44
|
-
mo(expression[:lparen]) if expression[:lparen]
|
45
|
-
append(expression[:e], :single_child => true)
|
46
|
-
mo(expression[:rparen]) if expression[:rparen]
|
47
|
-
else
|
48
|
-
mrow do
|
49
|
-
mo(expression[:lparen]) if expression[:lparen]
|
50
|
-
append(expression[:e], :single_child => true)
|
51
|
-
mo(expression[:rparen]) if expression[:rparen]
|
52
|
-
end
|
53
|
-
end
|
54
|
-
else
|
55
|
-
append(expression[:e])
|
56
|
-
end
|
57
|
-
when :font
|
58
|
-
style = expression[:operator]
|
59
|
-
tag("mstyle", :mathvariant => style.to_s.gsub('_', '-')) do
|
60
|
-
append(expression[:s], :single_child => true, :strip_paren => true)
|
61
|
-
end
|
62
|
-
when :unary
|
63
|
-
operator = expression[:operator]
|
64
|
-
tag("m#{operator}") do
|
65
|
-
append(expression[:s], :single_child => true, :strip_paren => true)
|
66
|
-
end
|
67
|
-
when :binary
|
68
|
-
operator = expression[:operator]
|
69
|
-
tag("m#{operator}") do
|
70
|
-
append(expression[:s1], :strip_paren => (operator != :sub && operator != :sup))
|
71
|
-
append(expression[:s2], :strip_paren => true)
|
72
|
-
end
|
73
|
-
when :ternary
|
74
|
-
operator = expression[:operator]
|
75
|
-
tag("m#{operator}") do
|
76
|
-
append(expression[:s1])
|
77
|
-
append(expression[:s2], :strip_paren => true)
|
78
|
-
append(expression[:s3], :strip_paren => true)
|
79
|
-
end
|
80
|
-
when :matrix
|
81
|
-
mrow do
|
82
|
-
mo(expression[:lparen]) if expression[:lparen]
|
83
|
-
mtable do
|
84
|
-
expression[:rows].each do |row|
|
85
|
-
mtr do
|
86
|
-
row.each do |col|
|
87
|
-
mtd do
|
88
|
-
append(col)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
mo(expression[:rparen]) if expression[:rparen]
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
def method_missing(meth, *args, &block)
|
101
|
-
tag(meth, *args, &block)
|
102
|
-
end
|
103
|
-
|
104
|
-
def tag(tag, *args)
|
105
|
-
attrs = args.last.is_a?(Hash) ? args.pop : {}
|
106
|
-
text = args.last.is_a?(String) ? args.pop : ''
|
107
|
-
|
108
|
-
@mathml << '<' << @prefix << tag.to_s
|
109
|
-
|
110
|
-
attrs.each_pair do |key, value|
|
111
|
-
@mathml << ' ' << key.to_s << '="' << value.to_s << '"'
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
if block_given? || text
|
116
|
-
@mathml << '>'
|
117
|
-
@mathml << text.encode(Encoding::US_ASCII, :xml => :text) if text
|
118
|
-
yield self if block_given?
|
119
|
-
@mathml << '</' << @prefix << tag.to_s << '>'
|
120
|
-
else
|
121
|
-
@mathml << '/>'
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
class Expression
|
127
|
-
def to_mathml(prefix = "", attrs = {})
|
128
|
-
MathMLBuilder.new(prefix).append_expression(@parsed_expression, attrs).to_s
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
data/lib/asciimath/parser.rb
DELETED
@@ -1,591 +0,0 @@
|
|
1
|
-
require 'strscan'
|
2
|
-
|
3
|
-
# Parser for ASCIIMath expressions.
|
4
|
-
#
|
5
|
-
# The syntax for ASCIIMath in EBNF style notation is
|
6
|
-
#
|
7
|
-
# expr = ( simp ( fraction | sub | super ) )+
|
8
|
-
# simp = constant | paren_expr | unary_expr | binary_expr | text
|
9
|
-
# fraction = '/' simp
|
10
|
-
# super = '^' simp
|
11
|
-
# sub = '_' simp super?
|
12
|
-
# paren_expr = lparen expr rparen
|
13
|
-
# lparen = '(' | '[' | '{' | '(:' | '{:'
|
14
|
-
# rparen = ')' | ']' | '}' | ':)' | ':}'
|
15
|
-
# unary_expr = unary_op simp
|
16
|
-
# unary_op = 'sqrt' | 'text'
|
17
|
-
# binary_expr = binary_op simp simp
|
18
|
-
# binary_op = 'frac' | 'root' | 'stackrel'
|
19
|
-
# text = '"' [^"]* '"'
|
20
|
-
# constant = number | symbol | identifier
|
21
|
-
# number = '-'? [0-9]+ ( '.' [0-9]+ )?
|
22
|
-
# symbol = /* any string in the symbol table */
|
23
|
-
# identifier = [A-z]
|
24
|
-
#
|
25
|
-
# ASCIIMath is parsed left to right without any form of operator precedence.
|
26
|
-
# When parsing the 'constant' the parser will try to find the longest matching string in the symbol
|
27
|
-
# table starting at the current position of the parser. If no matching string can be found the
|
28
|
-
# character at the current position of the parser is interpreted as an identifier instead.
|
29
|
-
module AsciiMath
|
30
|
-
# Internal: Splits an ASCIIMath expression into a sequence of tokens.
|
31
|
-
# Each token is represented as a Hash containing the keys :value and :type.
|
32
|
-
# The :value key is used to store the text associated with each token.
|
33
|
-
# The :type key indicates the semantics of the token. The value for :type will be one
|
34
|
-
# of the following symbols:
|
35
|
-
#
|
36
|
-
# - :identifier a symbolic name or a bit of text without any further semantics
|
37
|
-
# - :text a bit of arbitrary text
|
38
|
-
# - :number a number
|
39
|
-
# - :operator a mathematical operator symbol
|
40
|
-
# - :unary a unary operator (e.g., sqrt, text, ...)
|
41
|
-
# - :font a unary font command (e.g., bb, cc, ...)
|
42
|
-
# - :infix an infix operator (e.g, /, _, ^, ...)
|
43
|
-
# - :binary a binary operator (e.g., frac, root, ...)
|
44
|
-
# - :accent an accent character
|
45
|
-
# - :eof indicates no more tokens are available
|
46
|
-
#
|
47
|
-
# Each token type may also have an :underover modifier. When present and set to true
|
48
|
-
# sub- and superscript expressions associated with the token will be rendered as
|
49
|
-
# under- and overscriptabove and below rather than as sub- or superscript.
|
50
|
-
#
|
51
|
-
# :accent tokens additionally have a :postion value which is set to either :over or :under.
|
52
|
-
# This determines if the accent should be rendered over or under the expression to which
|
53
|
-
# it applies.
|
54
|
-
#
|
55
|
-
class Tokenizer
|
56
|
-
WHITESPACE = /^\s+/
|
57
|
-
NUMBER = /-?[0-9]+(?:\.[0-9]+)?/
|
58
|
-
QUOTED_TEXT = /"[^"]*"/
|
59
|
-
TEX_TEXT = /text\([^)]*\)/
|
60
|
-
|
61
|
-
# Public: Initializes an ASCIIMath tokenizer.
|
62
|
-
#
|
63
|
-
# string - The ASCIIMath expression to tokenize
|
64
|
-
# symbols - The symbol table to use while tokenizing
|
65
|
-
def initialize(string, symbols)
|
66
|
-
@string = StringScanner.new(string)
|
67
|
-
@symbols = symbols
|
68
|
-
lookahead = @symbols.keys.map { |k| k.length }.max
|
69
|
-
@symbol_regexp = /([^\s0-9]{1,#{lookahead}})/
|
70
|
-
@push_back = nil
|
71
|
-
end
|
72
|
-
|
73
|
-
# Public: Read the next token from the ASCIIMath expression and move the tokenizer
|
74
|
-
# ahead by one token.
|
75
|
-
#
|
76
|
-
# Returns the next token as a Hash
|
77
|
-
def next_token
|
78
|
-
if @push_back
|
79
|
-
t = @push_back
|
80
|
-
@push_back = nil
|
81
|
-
return t
|
82
|
-
end
|
83
|
-
|
84
|
-
@string.scan(WHITESPACE)
|
85
|
-
|
86
|
-
return {:value => nil, :type => :eof} if @string.eos?
|
87
|
-
|
88
|
-
case @string.peek(1)
|
89
|
-
when '"'
|
90
|
-
read_quoted_text
|
91
|
-
when 't'
|
92
|
-
case @string.peek(5)
|
93
|
-
when 'text('
|
94
|
-
read_tex_text
|
95
|
-
else
|
96
|
-
read_symbol
|
97
|
-
end
|
98
|
-
when '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
|
99
|
-
read_number || read_symbol
|
100
|
-
else
|
101
|
-
read_symbol
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
# Public: Pushes the given token back to the tokenizer. A subsequent call to next_token
|
106
|
-
# will return the given token rather than generating a new one. At most one
|
107
|
-
# token can be pushed back.
|
108
|
-
#
|
109
|
-
# token - The token to push back
|
110
|
-
def push_back(token)
|
111
|
-
@push_back = token unless token[:type] == :eof
|
112
|
-
end
|
113
|
-
|
114
|
-
private
|
115
|
-
|
116
|
-
# Private: Reads a text token from the input string
|
117
|
-
#
|
118
|
-
# Returns the text token or nil if a text token could not be matched at
|
119
|
-
# the current position
|
120
|
-
def read_quoted_text
|
121
|
-
read_value(QUOTED_TEXT) do |text|
|
122
|
-
{:value => text[1..-2], :type => :text}
|
123
|
-
end
|
124
|
-
end
|
125
|
-
|
126
|
-
# Private: Reads a text token from the input string
|
127
|
-
#
|
128
|
-
# Returns the text token or nil if a text token could not be matched at
|
129
|
-
# the current position
|
130
|
-
def read_tex_text
|
131
|
-
read_value(TEX_TEXT) do |text|
|
132
|
-
{:value => text[5..-2], :type => :text}
|
133
|
-
end
|
134
|
-
end
|
135
|
-
|
136
|
-
# Private: Reads a number token from the input string
|
137
|
-
#
|
138
|
-
# Returns the number token or nil if a number token could not be matched at
|
139
|
-
# the current position
|
140
|
-
def read_number
|
141
|
-
read_value(NUMBER) do |number|
|
142
|
-
{:value => number, :type => :number}
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
if String.method_defined?(:bytesize)
|
147
|
-
def bytesize(s)
|
148
|
-
s.bytesize
|
149
|
-
end
|
150
|
-
else
|
151
|
-
def bytesize(s)
|
152
|
-
s.length
|
153
|
-
end
|
154
|
-
end
|
155
|
-
|
156
|
-
|
157
|
-
# Private: Reads a symbol token from the input string. This method first creates
|
158
|
-
# a String from the input String starting from the current position with a length
|
159
|
-
# that matches that of the longest key in the symbol table. It then looks up that
|
160
|
-
# substring in the symbol table. If the substring is present in the symbol table, the
|
161
|
-
# associated value is returned and the position is moved ahead by the length of the
|
162
|
-
# substring. Otherwise this method chops one character off the end of the substring
|
163
|
-
# and repeats the symbol lookup. This continues until a single character is left.
|
164
|
-
# If that character can still not be found in the symbol table, then an identifier
|
165
|
-
# token is returned whose value is the remaining single character string.
|
166
|
-
#
|
167
|
-
# Returns the token that was read or nil if a token could not be matched at
|
168
|
-
# the current position
|
169
|
-
def read_symbol
|
170
|
-
position = @string.pos
|
171
|
-
read_value(@symbol_regexp) do |s|
|
172
|
-
until s.length == 1 || @symbols.include?(s)
|
173
|
-
s.chop!
|
174
|
-
end
|
175
|
-
@string.pos = position + bytesize(s)
|
176
|
-
@symbols[s] || {:value => s, :type => :identifier}
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
# Private: Reads a String from the input String that matches the given RegExp
|
181
|
-
#
|
182
|
-
# regexp - a RegExp that will be used to match the token
|
183
|
-
# block - if a block is provided the matched token will be passed to the block
|
184
|
-
#
|
185
|
-
# Returns the matched String or the value returned by the block if one was given
|
186
|
-
def read_value(regexp)
|
187
|
-
s = @string.scan(regexp)
|
188
|
-
if s
|
189
|
-
yield s
|
190
|
-
else
|
191
|
-
s
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
if String.respond_to?(:byte_size)
|
196
|
-
def byte_size(s)
|
197
|
-
s.byte_size
|
198
|
-
end
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
class Parser
|
203
|
-
SYMBOLS = {
|
204
|
-
# Operation symbols
|
205
|
-
'+' => {:value => '+', :type => :operator},
|
206
|
-
'-' => {:value => "\u2212", :type => :operator},
|
207
|
-
'*' => {:value => "\u22C5", :type => :operator},
|
208
|
-
'**' => {:value => "\u002A", :type => :operator},
|
209
|
-
'***' => {:value => "\u22C6", :type => :operator},
|
210
|
-
'//' => {:value => '/', :type => :operator},
|
211
|
-
'\\\\' => {:value => '\\', :type => :operator},
|
212
|
-
'xx' => {:value => "\u00D7", :type => :operator},
|
213
|
-
'-:' => {:value => "\u00F7", :type => :operator},
|
214
|
-
'|><' => {:value => "\u22C9", :type => :operator},
|
215
|
-
'><|' => {:value => "\u22CA", :type => :operator},
|
216
|
-
'|><|' => {:value => "\u22C8", :type => :operator},
|
217
|
-
'@' => {:value => "\u26AC", :type => :operator},
|
218
|
-
'o+' => {:value => "\u2295", :type => :operator},
|
219
|
-
'ox' => {:value => "\u2297", :type => :operator},
|
220
|
-
'o.' => {:value => "\u2299", :type => :operator},
|
221
|
-
'sum' => {:value => "\u2211", :type => :operator, :underover => true},
|
222
|
-
'prod' => {:value => "\u220F", :type => :operator, :underover => true},
|
223
|
-
'^^' => {:value => "\u2227", :type => :operator},
|
224
|
-
'^^^' => {:value => "\u22C0", :type => :operator, :underover => true},
|
225
|
-
'vv' => {:value => "\u2228", :type => :operator},
|
226
|
-
'vvv' => {:value => "\u22C1", :type => :operator, :underover => true},
|
227
|
-
'nn' => {:value => "\u2229", :type => :operator},
|
228
|
-
'nnn' => {:value => "\u22C2", :type => :operator, :underover => true},
|
229
|
-
'uu' => {:value => "\u222A", :type => :operator},
|
230
|
-
'uuu' => {:value => "\u22C3", :type => :operator, :underover => true},
|
231
|
-
|
232
|
-
# Relation symbols
|
233
|
-
'=' => {:value => '=', :type => :operator},
|
234
|
-
'!=' => {:value => "\u2260", :type => :operator},
|
235
|
-
':=' => {:value => ':=', :type => :operator},
|
236
|
-
'<' => {:value => "\u003C", :type => :operator},
|
237
|
-
'lt' => {:value => "\u003C", :type => :operator},
|
238
|
-
'>' => {:value => "\u003E", :type => :operator},
|
239
|
-
'gt' => {:value => "\u003E", :type => :operator},
|
240
|
-
'<=' => {:value => "\u2264", :type => :operator},
|
241
|
-
'le' => {:value => "\u2264", :type => :operator},
|
242
|
-
'>=' => {:value => "\u2265", :type => :operator},
|
243
|
-
'ge' => {:value => "\u2265", :type => :operator},
|
244
|
-
'-<' => {:value => "\u227A", :type => :operator},
|
245
|
-
'>-' => {:value => "\u227B", :type => :operator},
|
246
|
-
'-<=' => {:value => "\u2AAF", :type => :operator},
|
247
|
-
'>-=' => {:value => "\u2AB0", :type => :operator},
|
248
|
-
'in' => {:value => "\u2208", :type => :operator},
|
249
|
-
'!in' => {:value => "\u2209", :type => :operator},
|
250
|
-
'sub' => {:value => "\u2282", :type => :operator},
|
251
|
-
'sup' => {:value => "\u2283", :type => :operator},
|
252
|
-
'sube' => {:value => "\u2286", :type => :operator},
|
253
|
-
'supe' => {:value => "\u2287", :type => :operator},
|
254
|
-
'-=' => {:value => "\u2261", :type => :operator},
|
255
|
-
'~=' => {:value => "\u2245", :type => :operator},
|
256
|
-
'~~' => {:value => "\u2248", :type => :operator},
|
257
|
-
'prop' => {:value => "\u221D", :type => :operator},
|
258
|
-
|
259
|
-
# Logical symbols
|
260
|
-
'and' => {:value => 'and', :type => :text},
|
261
|
-
'or' => {:value => 'or', :type => :text},
|
262
|
-
'not' => {:value => "\u00AC", :type => :operator},
|
263
|
-
'=>' => {:value => "\u21D2", :type => :operator},
|
264
|
-
'if' => {:value => 'if', :type => :operator},
|
265
|
-
'<=>' => {:value => "\u21D4", :type => :operator},
|
266
|
-
'AA' => {:value => "\u2200", :type => :operator},
|
267
|
-
'EE' => {:value => "\u2203", :type => :operator},
|
268
|
-
'_|_' => {:value => "\u22A5", :type => :operator},
|
269
|
-
'TT' => {:value => "\u22A4", :type => :operator},
|
270
|
-
'|--' => {:value => "\u22A2", :type => :operator},
|
271
|
-
'|==' => {:value => "\u22A8", :type => :operator},
|
272
|
-
|
273
|
-
# Grouping brackets
|
274
|
-
'(' => {:value => '(', :type => :lparen},
|
275
|
-
')' => {:value => ')', :type => :rparen},
|
276
|
-
'[' => {:value => '[', :type => :lparen},
|
277
|
-
']' => {:value => ']', :type => :rparen},
|
278
|
-
'{' => {:value => '{', :type => :lparen},
|
279
|
-
'}' => {:value => '}', :type => :rparen},
|
280
|
-
'(:' => {:value => "\u2329", :type => :lparen},
|
281
|
-
':)' => {:value => "\u232A", :type => :rparen},
|
282
|
-
'<<' => {:value => "\u2329", :type => :lparen},
|
283
|
-
'>>' => {:value => "\u232A", :type => :rparen},
|
284
|
-
'|' => {:value => '|', :type => :lrparen},
|
285
|
-
'||' => {:value => '||', :type => :lrparen},
|
286
|
-
'{:' => {:value => nil, :type => :lparen},
|
287
|
-
':}' => {:value => nil, :type => :rparen},
|
288
|
-
|
289
|
-
# Miscellaneous symbols
|
290
|
-
'int' => {:value => "\u222B", :type => :operator},
|
291
|
-
'dx' => {:value => 'dx', :type => :identifier},
|
292
|
-
'dy' => {:value => 'dy', :type => :identifier},
|
293
|
-
'dz' => {:value => 'dz', :type => :identifier},
|
294
|
-
'dt' => {:value => 'dt', :type => :identifier},
|
295
|
-
'oint' => {:value => "\u222E", :type => :operator},
|
296
|
-
'del' => {:value => "\u2202", :type => :operator},
|
297
|
-
'grad' => {:value => "\u2207", :type => :operator},
|
298
|
-
'+-' => {:value => "\u00B1", :type => :operator},
|
299
|
-
'O/' => {:value => "\u2205", :type => :operator},
|
300
|
-
'oo' => {:value => "\u221E", :type => :operator},
|
301
|
-
'aleph' => {:value => "\u2135", :type => :operator},
|
302
|
-
'...' => {:value => '...', :type => :operator},
|
303
|
-
':.' => {:value => "\u2234", :type => :operator},
|
304
|
-
'/_' => {:value => "\u2220", :type => :operator},
|
305
|
-
'\\ ' => {:value => "\u00A0", :type => :operator},
|
306
|
-
'quad' => {:value => '\u00A0\u00A0', :type => :operator},
|
307
|
-
'qquad' => {:value => '\u00A0\u00A0\u00A0\u00A0', :type => :operator},
|
308
|
-
'cdots' => {:value => "\u22EF", :type => :operator},
|
309
|
-
'vdots' => {:value => "\u22EE", :type => :operator},
|
310
|
-
'ddots' => {:value => "\u22F1", :type => :operator},
|
311
|
-
'diamond' => {:value => "\u22C4", :type => :operator},
|
312
|
-
'square' => {:value => "\u25A1", :type => :operator},
|
313
|
-
'|__' => {:value => "\u230A", :type => :operator},
|
314
|
-
'__|' => {:value => "\u230B", :type => :operator},
|
315
|
-
'|~' => {:value => "\u2308", :type => :operator},
|
316
|
-
'~|' => {:value => "\u2309", :type => :operator},
|
317
|
-
'CC' => {:value => "\u2102", :type => :operator},
|
318
|
-
'NN' => {:value => "\u2115", :type => :operator},
|
319
|
-
'QQ' => {:value => "\u211A", :type => :operator},
|
320
|
-
'RR' => {:value => "\u211D", :type => :operator},
|
321
|
-
'ZZ' => {:value => "\u2124", :type => :operator},
|
322
|
-
|
323
|
-
'lim' => {:value => 'lim', :type => :operator, :underover => true},
|
324
|
-
'Lim' => {:value => 'Lim', :type => :operator, :underover => true},
|
325
|
-
|
326
|
-
# Standard functions
|
327
|
-
'sin' => {:value => 'sin', :type => :identifier},
|
328
|
-
'cos' => {:value => 'cos', :type => :identifier},
|
329
|
-
'tan' => {:value => 'tan', :type => :identifier},
|
330
|
-
'sec' => {:value => 'sec', :type => :identifier},
|
331
|
-
'csc' => {:value => 'csc', :type => :identifier},
|
332
|
-
'cot' => {:value => 'cot', :type => :identifier},
|
333
|
-
'arcsin' => {:value => 'arcsin', :type => :identifier},
|
334
|
-
'arccos' => {:value => 'arccos', :type => :identifier},
|
335
|
-
'arctan' => {:value => 'arctan', :type => :identifier},
|
336
|
-
'sinh' => {:value => 'sinh', :type => :identifier},
|
337
|
-
'cosh' => {:value => 'cosh', :type => :identifier},
|
338
|
-
'tanh' => {:value => 'tanh', :type => :identifier},
|
339
|
-
'sech' => {:value => 'sech', :type => :identifier},
|
340
|
-
'csch' => {:value => 'csch', :type => :identifier},
|
341
|
-
'coth' => {:value => 'coth', :type => :identifier},
|
342
|
-
'exp' => {:value => 'exp', :type => :identifier},
|
343
|
-
'log' => {:value => 'log', :type => :identifier},
|
344
|
-
'ln' => {:value => 'ln', :type => :identifier},
|
345
|
-
'det' => {:value => 'det', :type => :identifier},
|
346
|
-
'dim' => {:value => 'dim', :type => :identifier},
|
347
|
-
'mod' => {:value => 'mod', :type => :identifier},
|
348
|
-
'gcd' => {:value => 'gcd', :type => :identifier},
|
349
|
-
'lcm' => {:value => 'lcm', :type => :identifier},
|
350
|
-
'lub' => {:value => 'lub', :type => :identifier},
|
351
|
-
'glb' => {:value => 'glb', :type => :identifier},
|
352
|
-
'min' => {:value => 'min', :type => :identifier, :underover => true},
|
353
|
-
'max' => {:value => 'max', :type => :identifier, :underover => true},
|
354
|
-
'f' => {:value => 'f', :type => :identifier},
|
355
|
-
'g' => {:value => 'g', :type => :identifier},
|
356
|
-
|
357
|
-
# Accents
|
358
|
-
'hat' => {:value => "\u005E", :type => :accent, :position => :over},
|
359
|
-
'bar' => {:value => "\u00AF", :type => :accent, :position => :over},
|
360
|
-
'ul' => {:value => '_', :type => :accent, :position => :under},
|
361
|
-
'vec' => {:value => "\u2192", :type => :accent, :position => :over},
|
362
|
-
'dot' => {:value => '.', :type => :accent, :position => :over},
|
363
|
-
'ddot' => {:value => '..', :type => :accent, :position => :over},
|
364
|
-
'obrace' => {:value => "\u23DE", :type => :accent, :position => :over},
|
365
|
-
'ubrace' => {:value => "\u23DF", :type => :accent, :position => :under},
|
366
|
-
|
367
|
-
# Arrows
|
368
|
-
'uarr' => {:value => "\u2191", :type => :operator},
|
369
|
-
'darr' => {:value => "\u2193", :type => :operator},
|
370
|
-
'rarr' => {:value => "\u2192", :type => :operator},
|
371
|
-
'->' => {:value => "\u2192", :type => :operator},
|
372
|
-
'>->' => {:value => "\u21A3", :type => :operator},
|
373
|
-
'->>' => {:value => "\u21A0", :type => :operator},
|
374
|
-
'>->>' => {:value => "\u2916", :type => :operator},
|
375
|
-
'|->' => {:value => "\u21A6", :type => :operator},
|
376
|
-
'larr' => {:value => "\u2190", :type => :operator},
|
377
|
-
'harr' => {:value => "\u2194", :type => :operator},
|
378
|
-
'rArr' => {:value => "\u21D2", :type => :operator},
|
379
|
-
'lArr' => {:value => "\u21D0", :type => :operator},
|
380
|
-
'hArr' => {:value => "\u21D4", :type => :operator},
|
381
|
-
|
382
|
-
# Other
|
383
|
-
'sqrt' => {:value => :sqrt, :type => :unary},
|
384
|
-
'text' => {:value => :text, :type => :unary},
|
385
|
-
'bb' => {:value => :bold, :type => :font},
|
386
|
-
'bbb' => {:value => :double_struck, :type => :font},
|
387
|
-
'ii' => {:value => :italic, :type => :font},
|
388
|
-
'bii' => {:value => :bold_italic, :type => :font},
|
389
|
-
'cc' => {:value => :script, :type => :font},
|
390
|
-
'bcc' => {:value => :bold_script, :type => :font},
|
391
|
-
'tt' => {:value => :monospace, :type => :font},
|
392
|
-
'fr' => {:value => :fraktur, :type => :font},
|
393
|
-
'bfr' => {:value => :bold_fraktur, :type => :font},
|
394
|
-
'sf' => {:value => :sans_serif, :type => :font},
|
395
|
-
'bsf' => {:value => :bold_sans_serif, :type => :font},
|
396
|
-
'sfi' => {:value => :sans_serif_italic, :type => :font},
|
397
|
-
'sfbi' => {:value => :sans_serif_bold_italic, :type => :font},
|
398
|
-
'frac' => {:value => :frac, :type => :binary},
|
399
|
-
'root' => {:value => :root, :type => :binary},
|
400
|
-
'stackrel' => {:value => :over, :type => :binary},
|
401
|
-
'/' => {:value => :frac, :type => :infix},
|
402
|
-
'_' => {:value => :sub, :type => :infix},
|
403
|
-
'^' => {:value => :sup, :type => :infix},
|
404
|
-
|
405
|
-
# Greek letters
|
406
|
-
'alpha' => {:value => "\u03b1", :type => :identifier},
|
407
|
-
'Alpha' => {:value => "\u0391", :type => :identifier},
|
408
|
-
'beta' => {:value => "\u03b2", :type => :identifier},
|
409
|
-
'Beta' => {:value => "\u0392", :type => :identifier},
|
410
|
-
'gamma' => {:value => "\u03b3", :type => :identifier},
|
411
|
-
'Gamma' => {:value => "\u0393", :type => :operator},
|
412
|
-
'delta' => {:value => "\u03b4", :type => :identifier},
|
413
|
-
'Delta' => {:value => "\u0394", :type => :operator},
|
414
|
-
'epsilon' => {:value => "\u03b5", :type => :identifier},
|
415
|
-
'Epsilon' => {:value => "\u0395", :type => :identifier},
|
416
|
-
'varepsilon' => {:value => "\u025b", :type => :identifier},
|
417
|
-
'zeta' => {:value => "\u03b6", :type => :identifier},
|
418
|
-
'Zeta' => {:value => "\u0396", :type => :identifier},
|
419
|
-
'eta' => {:value => "\u03b7", :type => :identifier},
|
420
|
-
'Eta' => {:value => "\u0397", :type => :identifier},
|
421
|
-
'theta' => {:value => "\u03b8", :type => :identifier},
|
422
|
-
'Theta' => {:value => "\u0398", :type => :operator},
|
423
|
-
'vartheta' => {:value => "\u03d1", :type => :identifier},
|
424
|
-
'iota' => {:value => "\u03b9", :type => :identifier},
|
425
|
-
'Iota' => {:value => "\u0399", :type => :identifier},
|
426
|
-
'kappa' => {:value => "\u03ba", :type => :identifier},
|
427
|
-
'Kappa' => {:value => "\u039a", :type => :identifier},
|
428
|
-
'lambda' => {:value => "\u03bb", :type => :identifier},
|
429
|
-
'Lambda' => {:value => "\u039b", :type => :operator},
|
430
|
-
'mu' => {:value => "\u03bc", :type => :identifier},
|
431
|
-
'Mu' => {:value => "\u039c", :type => :identifier},
|
432
|
-
'nu' => {:value => "\u03bd", :type => :identifier},
|
433
|
-
'Nu' => {:value => "\u039d", :type => :identifier},
|
434
|
-
'xi' => {:value => "\u03be", :type => :identifier},
|
435
|
-
'Xi' => {:value => "\u039e", :type => :operator},
|
436
|
-
'omicron' => {:value => "\u03bf", :type => :identifier},
|
437
|
-
'Omicron' => {:value => "\u039f", :type => :identifier},
|
438
|
-
'pi' => {:value => "\u03c0", :type => :identifier},
|
439
|
-
'Pi' => {:value => "\u03a0", :type => :operator},
|
440
|
-
'rho' => {:value => "\u03c1", :type => :identifier},
|
441
|
-
'Rho' => {:value => "\u03a1", :type => :identifier},
|
442
|
-
'sigma' => {:value => "\u03c3", :type => :identifier},
|
443
|
-
'Sigma' => {:value => "\u03a3", :type => :operator},
|
444
|
-
'tau' => {:value => "\u03c4", :type => :identifier},
|
445
|
-
'Tau' => {:value => "\u03a4", :type => :identifier},
|
446
|
-
'upsilon' => {:value => "\u03c5", :type => :identifier},
|
447
|
-
'Upsilon' => {:value => "\u03a5", :type => :identifier},
|
448
|
-
'phi' => {:value => "\u03c6", :type => :identifier},
|
449
|
-
'Phi' => {:value => "\u03a6", :type => :identifier},
|
450
|
-
'varphi' => {:value => "\u03d5", :type => :identifier},
|
451
|
-
'chi' => {:value => '\u03b3c7', :type => :identifier},
|
452
|
-
'Chi' => {:value => '\u0393a7', :type => :identifier},
|
453
|
-
'psi' => {:value => "\u03c8", :type => :identifier},
|
454
|
-
'Psi' => {:value => "\u03a8", :type => :identifier},
|
455
|
-
'omega' => {:value => "\u03c9", :type => :identifier},
|
456
|
-
'Omega' => {:value => "\u03a9", :type => :operator},
|
457
|
-
}
|
458
|
-
|
459
|
-
def parse(input)
|
460
|
-
Expression.new(
|
461
|
-
input,
|
462
|
-
parse_expression(Tokenizer.new(input, SYMBOLS), 0)
|
463
|
-
)
|
464
|
-
end
|
465
|
-
|
466
|
-
private
|
467
|
-
def parse_expression(tok, depth)
|
468
|
-
e = []
|
469
|
-
|
470
|
-
while (s1 = parse_simple_expression(tok, depth))
|
471
|
-
t1 = tok.next_token
|
472
|
-
|
473
|
-
if t1[:type] == :infix
|
474
|
-
s2 = parse_simple_expression(tok, depth)
|
475
|
-
t2 = tok.next_token
|
476
|
-
if t1[:value] == :sub && t2[:value] == :sup
|
477
|
-
s3 = parse_simple_expression(tok, depth)
|
478
|
-
operator = s1[:underover] ? :underover : :subsup
|
479
|
-
e << {:type => :ternary, :operator => operator, :s1 => s1, :s2 => s2, :s3 => s3}
|
480
|
-
else
|
481
|
-
operator = s1[:underover] ? (t1[:value] == :sub ? :under : :over) : t1[:value]
|
482
|
-
e << {:type => :binary, :operator => operator, :s1 => s1, :s2 => s2}
|
483
|
-
tok.push_back(t2)
|
484
|
-
if (t2[:type] == :lrparen || t2[:type] == :rparen) && depth > 0
|
485
|
-
break
|
486
|
-
end
|
487
|
-
end
|
488
|
-
elsif t1[:type] == :eof
|
489
|
-
e << s1
|
490
|
-
break
|
491
|
-
else
|
492
|
-
e << s1
|
493
|
-
tok.push_back(t1)
|
494
|
-
if (t1[:type] == :lrparen || t1[:type] == :rparen) && depth > 0
|
495
|
-
break
|
496
|
-
end
|
497
|
-
end
|
498
|
-
end
|
499
|
-
|
500
|
-
e
|
501
|
-
end
|
502
|
-
|
503
|
-
def parse_simple_expression(tok, depth)
|
504
|
-
t1 = tok.next_token
|
505
|
-
|
506
|
-
case t1[:type]
|
507
|
-
when :lparen, :lrparen
|
508
|
-
t2 = tok.next_token
|
509
|
-
case t2[:type]
|
510
|
-
when :rparen, :lrparen
|
511
|
-
{:type => :paren, :e => nil, :lparen => t1[:value], :rparen => t2[:value]}
|
512
|
-
else
|
513
|
-
tok.push_back(t2)
|
514
|
-
|
515
|
-
e = parse_expression(tok, depth + 1)
|
516
|
-
|
517
|
-
t2 = tok.next_token
|
518
|
-
case t2[:type]
|
519
|
-
when :rparen, :lrparen
|
520
|
-
convert_to_matrix({:type => :paren, :e => e, :lparen => t1[:value], :rparen => t2[:value]})
|
521
|
-
else
|
522
|
-
tok.push_back(t2)
|
523
|
-
{:type => :paren, :e => e, :lparen => t1[:value]}
|
524
|
-
end
|
525
|
-
end
|
526
|
-
when :accent
|
527
|
-
s = parse_simple_expression(tok, depth)
|
528
|
-
{:type => :binary, :s1 => s, :s2 => {:type => :operator, :c => t1[:value]}, :operator => t1[:position]}
|
529
|
-
when :unary, :font
|
530
|
-
s = parse_simple_expression(tok, depth)
|
531
|
-
{:type => t1[:type], :s => s, :operator => t1[:value]}
|
532
|
-
when :binary
|
533
|
-
s1 = parse_simple_expression(tok, depth)
|
534
|
-
s2 = parse_simple_expression(tok, depth)
|
535
|
-
{:type => :binary, :s1 => s1, :s2 => s2, :operator => t1[:value]}
|
536
|
-
when :eof
|
537
|
-
nil
|
538
|
-
else
|
539
|
-
{:type => t1[:type], :c => t1[:value], :underover => t1[:underover]}
|
540
|
-
end
|
541
|
-
end
|
542
|
-
|
543
|
-
def convert_to_matrix(expression)
|
544
|
-
return expression unless matrix? expression
|
545
|
-
|
546
|
-
rows = expression[:e].select.with_index { |obj, i| i.even? }.map do |row|
|
547
|
-
row[:e].select.with_index { |obj, i| i.even? }
|
548
|
-
end
|
549
|
-
|
550
|
-
{:type => :matrix, :rows => rows, :lparen => expression[:lparen], :rparen => expression[:rparen]}
|
551
|
-
end
|
552
|
-
|
553
|
-
def matrix?(expression)
|
554
|
-
return false unless expression.is_a?(Hash) && expression[:type] == :paren
|
555
|
-
|
556
|
-
rows, separators = expression[:e].partition.with_index { |obj, i| i.even? }
|
557
|
-
|
558
|
-
rows.length > 1 &&
|
559
|
-
rows.length > separators.length &&
|
560
|
-
separators.all? { |item| item[:type] == :identifier && item[:c] == ',' } &&
|
561
|
-
(rows.all? { |item| item[:type] == :paren && item[:lparen] == '(' && item[:rparen] == ')' } ||
|
562
|
-
rows.all? { |item| item[:type] == :paren && item[:lparen] == '[' && item[:rparen] == ']' }) &&
|
563
|
-
rows.all? { |item| item[:e].length == rows[0][:e].length } &&
|
564
|
-
rows.all? { |item| matrix_cols?(item[:e]) }
|
565
|
-
end
|
566
|
-
|
567
|
-
def matrix_cols?(expression)
|
568
|
-
return false unless expression.is_a?(Array)
|
569
|
-
|
570
|
-
cols, separators = expression.partition.with_index { |obj, i| i.even? }
|
571
|
-
|
572
|
-
cols.all? { |item| item[:type] != :identifier || item[:c] != ',' } &&
|
573
|
-
separators.all? { |item| item[:type] == :identifier && item[:c] == ',' }
|
574
|
-
end
|
575
|
-
end
|
576
|
-
|
577
|
-
class Expression
|
578
|
-
def initialize(asciimath, parsed_expression)
|
579
|
-
@asciimath = asciimath
|
580
|
-
@parsed_expression = parsed_expression
|
581
|
-
end
|
582
|
-
|
583
|
-
def to_s
|
584
|
-
@asciimath
|
585
|
-
end
|
586
|
-
end
|
587
|
-
|
588
|
-
def self.parse(asciimath)
|
589
|
-
Parser.new.parse(asciimath)
|
590
|
-
end
|
591
|
-
end
|
data/lib/asciimath/version.rb
DELETED