rsyntaxtree 0.9.3 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.tags +203 -0
- data/Gemfile +2 -0
- data/README.md +0 -1
- data/Rakefile +7 -0
- data/bin/rsyntaxtree +38 -31
- data/fonts/OpenMoji-Black.ttf +0 -0
- data/fonts/OpenMoji-Color.ttf +0 -0
- data/lib/rsyntaxtree/base_graph.rb +262 -0
- data/lib/rsyntaxtree/element.rb +155 -25
- data/lib/rsyntaxtree/elementlist.rb +16 -13
- data/lib/rsyntaxtree/markup_parser.rb +208 -0
- data/lib/rsyntaxtree/string_parser.rb +190 -204
- data/lib/rsyntaxtree/svg_graph.rb +445 -299
- data/lib/rsyntaxtree/utils.rb +49 -6
- data/lib/rsyntaxtree/version.rb +1 -1
- data/lib/rsyntaxtree.rb +143 -161
- data/rsyntaxtree.gemspec +2 -0
- data/test/markup_parser_test.rb +207 -0
- metadata +37 -11
- data/fonts/latinmodern-math.otf +0 -0
- data/fonts/lmroman10-bold.otf +0 -0
- data/fonts/lmroman10-bolditalic.otf +0 -0
- data/fonts/lmroman10-italic.otf +0 -0
- data/fonts/lmroman10-regular.otf +0 -0
- data/lib/rsyntaxtree/error_message.rb +0 -68
- data/lib/rsyntaxtree/graph.rb +0 -312
- data/lib/rsyntaxtree/tree_graph.rb +0 -327
@@ -7,248 +7,234 @@
|
|
7
7
|
#
|
8
8
|
# Parses a phrase into leafs and nodes and store the result in an element list
|
9
9
|
# (see element_list.rb)
|
10
|
-
#
|
11
|
-
# This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's
|
12
|
-
# excellent program phpSyntaxTree.
|
13
|
-
#
|
14
10
|
# Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
|
15
|
-
# Copyright (c) 2003-2004 Andre Eisenbach <andre@ironcreek.net>
|
16
11
|
|
17
12
|
require 'elementlist'
|
18
13
|
require 'element'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
#
|
27
|
-
#
|
28
|
-
|
29
|
-
#
|
30
|
-
|
31
|
-
#
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
@elist = ElementList.new # Initialize internal element list
|
45
|
-
@pos = 0 # Position in the sentence
|
46
|
-
@id = 1 # ID for the next element
|
47
|
-
@level = 0 # Level in the diagram
|
48
|
-
@tncnt = Hash.new # Node type counts
|
49
|
-
end
|
50
|
-
|
51
|
-
# caution: quick and dirty solution
|
52
|
-
def valid?
|
53
|
-
if(@data.length < 1)
|
54
|
-
return false
|
55
|
-
end
|
56
|
-
|
57
|
-
if /\[\s*\]/m =~ @data
|
58
|
-
return false
|
59
|
-
end
|
60
|
-
|
61
|
-
if /\[\_/ =~ @data
|
62
|
-
return false
|
63
|
-
end
|
64
|
-
|
65
|
-
text = @data.strip
|
66
|
-
text_r = text.split(//)
|
67
|
-
open_br, close_br = [], []
|
68
|
-
escape = false
|
69
|
-
text_r.each do |chr|
|
70
|
-
if chr == "\\"
|
71
|
-
escape = true
|
72
|
-
elsif chr == '[' && !escape
|
73
|
-
open_br.push(chr)
|
74
|
-
elsif chr == ']' && !escape
|
75
|
-
close_br.push(chr)
|
76
|
-
if open_br.length < close_br.length
|
77
|
-
break
|
14
|
+
require 'utils'
|
15
|
+
|
16
|
+
module RSyntaxTree
|
17
|
+
class StringParser
|
18
|
+
|
19
|
+
attr_accessor :data, :elist, :pos, :id, :level
|
20
|
+
def initialize(str, fontset, fontsize)
|
21
|
+
# Clean up the data a little to make processing easier
|
22
|
+
# repeated newlines => a newline
|
23
|
+
string = str.gsub(/[\n\r]+/m, "\n")
|
24
|
+
# a backslash followed by a newline => a backslash followed by an 'n'
|
25
|
+
string.gsub!(/\\\n\s*/m, "\\n")
|
26
|
+
# repeated whitespace characters => " "
|
27
|
+
string.gsub!(/\s+/, " ")
|
28
|
+
string.gsub!(/\]\s+\[/, "][")
|
29
|
+
string.gsub!(/\s+\[/, "[")
|
30
|
+
string.gsub!(/\[\s+/, "[")
|
31
|
+
string.gsub!(/\s+\]/, "]")
|
32
|
+
string.gsub!(/\]\s+/, "]")
|
33
|
+
string.gsub!(/<(\d*)>/) do
|
34
|
+
num_padding = $1.to_i
|
35
|
+
if num_padding > 0
|
36
|
+
result = WHITESPACE_BLOCK * num_padding
|
37
|
+
else
|
38
|
+
result = WHITESPACE_BLOCK
|
78
39
|
end
|
79
|
-
|
80
|
-
escape = false
|
40
|
+
result
|
81
41
|
end
|
82
|
-
end
|
83
|
-
|
84
|
-
return false unless open_br.length == close_br.length
|
85
|
-
# make_tree(0)
|
86
|
-
# return false if @tncnt.empty?
|
87
|
-
# @tncnt.each do |key, value|
|
88
|
-
# return false if key == ""
|
89
|
-
# end
|
90
|
-
return true
|
91
|
-
end
|
92
|
-
|
93
|
-
def parse
|
94
|
-
make_tree(0);
|
95
|
-
end
|
96
42
|
|
97
|
-
|
98
|
-
|
99
|
-
|
43
|
+
@data = string # Store it for later...
|
44
|
+
if @data.contains_cjk?
|
45
|
+
fontset[:normal] = fontset[:cjk]
|
46
|
+
end
|
47
|
+
@elist = ElementList.new # Initialize internal element list
|
48
|
+
@pos = 0 # Position in the sentence
|
49
|
+
@id = 1 # ID for the next element
|
50
|
+
@level = 0 # Level in the diagram
|
51
|
+
@fontset = fontset
|
52
|
+
@fontsize = fontsize
|
53
|
+
end
|
100
54
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
if(element.type == ETYPE_NODE)
|
106
|
-
count = 1
|
107
|
-
content = element.content
|
55
|
+
def self.valid?(data)
|
56
|
+
if(data.length < 1)
|
57
|
+
raise RSTError, "Error: input text is empty"
|
58
|
+
end
|
108
59
|
|
109
|
-
|
110
|
-
|
111
|
-
|
60
|
+
if /\[\s*\]/m =~ data
|
61
|
+
raise RSTError, "Error: inside the brackets is empty"
|
62
|
+
end
|
112
63
|
|
113
|
-
|
114
|
-
|
115
|
-
|
64
|
+
text = data.strip
|
65
|
+
text_r = text.split(//)
|
66
|
+
open_br, close_br = [], []
|
67
|
+
escape = false
|
68
|
+
text_r.each do |chr|
|
69
|
+
if chr == "\\"
|
70
|
+
if escape
|
71
|
+
escape = false
|
116
72
|
else
|
117
|
-
|
73
|
+
escape = true
|
118
74
|
end
|
75
|
+
next
|
76
|
+
end
|
119
77
|
|
120
|
-
|
78
|
+
if escape && /[\[\]]/ =~ chr
|
79
|
+
escape = false
|
80
|
+
next
|
81
|
+
elsif chr == '['
|
82
|
+
open_br.push(chr)
|
83
|
+
elsif chr == ']'
|
84
|
+
close_br.push(chr)
|
85
|
+
if open_br.length < close_br.length
|
86
|
+
break
|
87
|
+
end
|
121
88
|
end
|
89
|
+
escape = false
|
90
|
+
end
|
122
91
|
|
92
|
+
if open_br.empty? && close_br.empty?
|
93
|
+
raise RSTError, "Error: input text does not contain paired brackets"
|
94
|
+
elsif open_br.length == close_br.length
|
95
|
+
return true
|
96
|
+
else
|
97
|
+
raise RSTError, "Error: open and close brackets do not match"
|
123
98
|
end
|
124
|
-
end
|
125
|
-
@tncnt
|
126
|
-
end
|
127
|
-
|
128
|
-
def count_node(name)
|
129
|
-
name = name.strip
|
130
|
-
if @tncnt[name]
|
131
|
-
@tncnt[name] += 1
|
132
|
-
else
|
133
|
-
@tncnt[name] = 1
|
134
99
|
end
|
135
|
-
end
|
136
100
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
i = 0
|
101
|
+
def parse
|
102
|
+
make_tree(0);
|
103
|
+
@elist.set_hierarchy
|
104
|
+
end
|
142
105
|
|
143
|
-
|
144
|
-
|
106
|
+
def get_elementlist
|
107
|
+
@elist;
|
145
108
|
end
|
146
109
|
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
110
|
+
def get_next_token
|
111
|
+
data = @data.split(//)
|
112
|
+
gottoken = false
|
113
|
+
token = ""
|
114
|
+
i = 0
|
115
|
+
|
116
|
+
if((@pos + 1) >= data.length)
|
117
|
+
return ""
|
118
|
+
end
|
119
|
+
|
120
|
+
escape = false
|
121
|
+
while(((@pos + i) < data.length) && !gottoken)
|
122
|
+
ch = data[@pos + i];
|
123
|
+
case ch
|
124
|
+
when "["
|
125
|
+
if escape
|
126
|
+
token += ch
|
127
|
+
escape = false
|
128
|
+
else
|
129
|
+
if(i > 0)
|
130
|
+
gottoken = true
|
131
|
+
else
|
132
|
+
token += ch
|
133
|
+
end
|
134
|
+
end
|
135
|
+
when "]"
|
136
|
+
if escape
|
137
|
+
token += ch
|
138
|
+
escape = false
|
139
|
+
else
|
140
|
+
if(i == 0 )
|
141
|
+
token += ch
|
142
|
+
end
|
157
143
|
gottoken = true
|
144
|
+
end
|
145
|
+
when "\\"
|
146
|
+
if escape
|
147
|
+
token += '\\\\'
|
148
|
+
escape = false
|
149
|
+
else
|
150
|
+
escape = true
|
151
|
+
end
|
152
|
+
when " "
|
153
|
+
if escape
|
154
|
+
token += '\\n'
|
155
|
+
escape = false
|
158
156
|
else
|
159
157
|
token += ch
|
160
158
|
end
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
else
|
167
|
-
if(i == 0 )
|
159
|
+
when /[n{}<>^+*_=~\|\-]/
|
160
|
+
if escape
|
161
|
+
token += '\\' + ch
|
162
|
+
escape = false
|
163
|
+
else
|
168
164
|
token += ch
|
169
165
|
end
|
170
|
-
gottoken = true
|
171
|
-
end
|
172
|
-
when "\\"
|
173
|
-
escape = true
|
174
|
-
when "n", " ", "+", "-", "=", "~", "#", "*"
|
175
|
-
if escape
|
176
|
-
token += "\\#{ch}"
|
177
|
-
escape = false
|
178
166
|
else
|
179
|
-
|
167
|
+
if escape
|
168
|
+
token += ch
|
169
|
+
escape = false
|
170
|
+
else
|
171
|
+
token += ch
|
172
|
+
end
|
180
173
|
end
|
181
|
-
|
182
|
-
# gottoken = false # same as do nothing
|
183
|
-
else
|
184
|
-
token += ch
|
185
|
-
escape = false if escape
|
174
|
+
i += 1
|
186
175
|
end
|
187
|
-
i += 1
|
188
|
-
end
|
189
176
|
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
177
|
+
if(i > 1)
|
178
|
+
@pos += (i - 1)
|
179
|
+
else
|
180
|
+
@pos += 1
|
181
|
+
end
|
182
|
+
return token
|
194
183
|
end
|
195
|
-
return token
|
196
|
-
end
|
197
184
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
end
|
185
|
+
def make_tree(parent)
|
186
|
+
token = get_next_token.strip
|
187
|
+
parts = Array.new
|
188
|
+
|
189
|
+
while(token != "" && token != "]" )
|
190
|
+
token_r = token.split(//)
|
191
|
+
case token_r[0]
|
192
|
+
when "["
|
193
|
+
tl = token_r.length
|
194
|
+
token_r = token_r[1, tl - 1]
|
195
|
+
spaceat = token_r.index(" ")
|
196
|
+
newparent = -1
|
197
|
+
|
198
|
+
if spaceat
|
199
|
+
parts[0] = token_r[0, spaceat].join
|
200
|
+
# parts[0] = parts[0].gsub("\\>", "﹥").gsub("\\<", "﹤")
|
201
|
+
|
202
|
+
tl =token_r.length
|
203
|
+
parts[1] = token_r[spaceat, tl - spaceat].join
|
204
|
+
# parts[1] = parts[1].gsub("\\>", "﹥").gsub("\\<", "﹤")
|
205
|
+
|
206
|
+
element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
|
207
|
+
@id += 1
|
208
|
+
@elist.add(element)
|
209
|
+
newparent = element.id
|
210
|
+
|
211
|
+
element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
|
212
|
+
@id += 1
|
213
|
+
@elist.add(element)
|
214
|
+
else
|
215
|
+
# joined = token_r.join.gsub("\\>", "﹥").gsub("\\<", "﹤")
|
216
|
+
joined = token_r.join
|
217
|
+
element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
|
218
|
+
@id += 1
|
219
|
+
newparent = element.id
|
220
|
+
@elist.add(element)
|
221
|
+
end
|
236
222
|
|
237
|
-
|
238
|
-
|
223
|
+
@level += 1
|
224
|
+
make_tree(newparent)
|
239
225
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
226
|
+
else
|
227
|
+
if token.strip != ""
|
228
|
+
element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
|
229
|
+
@id += 1
|
230
|
+
@elist.add(element)
|
231
|
+
end
|
246
232
|
end
|
247
|
-
end
|
248
233
|
|
249
|
-
|
234
|
+
token = get_next_token
|
235
|
+
end
|
236
|
+
@level -= 1
|
250
237
|
end
|
251
|
-
@level -= 1
|
252
238
|
end
|
253
239
|
end
|
254
240
|
|