rsyntaxtree 1.0.8 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +77 -0
- data/.solargraph.yml +22 -0
- data/.tags +211 -10
- data/Gemfile +10 -5
- data/README.md +3 -2
- data/Rakefile +3 -1
- data/bin/rsyntaxtree +42 -50
- data/docs/Gemfile +3 -1
- data/docs/_layouts/default.html +1 -1
- data/lib/rsyntaxtree/base_graph.rb +260 -264
- data/lib/rsyntaxtree/element.rb +167 -179
- data/lib/rsyntaxtree/elementlist.rb +105 -124
- data/lib/rsyntaxtree/markup_parser.rb +82 -93
- data/lib/rsyntaxtree/string_parser.rb +221 -237
- data/lib/rsyntaxtree/svg_graph.rb +158 -197
- data/lib/rsyntaxtree/utils.rb +59 -63
- data/lib/rsyntaxtree/version.rb +3 -2
- data/lib/rsyntaxtree.rb +174 -177
- data/rsyntaxtree.gemspec +10 -10
- data/test/markup_parser_test.rb +3 -2
- metadata +23 -21
@@ -1,237 +1,221 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
# (
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# Clean up the data a little to make processing easier
|
22
|
-
# repeated newlines => a newline
|
23
|
-
string = str.gsub(/[\n\r]+/m, "\n")
|
24
|
-
# a backslash followed by a newline => a backslash followed by an 'n'
|
25
|
-
string.gsub!(/\\\n\s*/m, "\\n")
|
26
|
-
# repeated whitespace characters => " "
|
27
|
-
string.gsub!(/\s+/, " ")
|
28
|
-
string.gsub!(/\]\s+\[/, "][")
|
29
|
-
string.gsub!(/\s+\[/, "[")
|
30
|
-
string.gsub!(/\[\s+/, "[")
|
31
|
-
string.gsub!(/\s+\]/, "]")
|
32
|
-
string.gsub!(/\]\s+/, "]")
|
33
|
-
string.gsub!(/<(\d*)>/) do
|
34
|
-
num_padding = $1.to_i
|
35
|
-
if num_padding
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
result
|
41
|
-
end
|
42
|
-
|
43
|
-
@data = string # Store it for later...
|
44
|
-
if @data.contains_cjk?
|
45
|
-
|
46
|
-
|
47
|
-
@
|
48
|
-
@
|
49
|
-
@
|
50
|
-
@
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if
|
57
|
-
raise RSTError, "Error:
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
text_r
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
open_br.
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
end
|
100
|
-
|
101
|
-
def
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
i
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
token += ch
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
else
|
224
|
-
if token.strip != ""
|
225
|
-
element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
|
226
|
-
@id += 1
|
227
|
-
@elist.add(element)
|
228
|
-
end
|
229
|
-
end
|
230
|
-
|
231
|
-
token = get_next_token
|
232
|
-
end
|
233
|
-
@level -= 1
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
237
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#==========================
|
4
|
+
# string_parser.rb
|
5
|
+
#==========================
|
6
|
+
#
|
7
|
+
# Parses a phrase into leafs and nodes and store the result in an element list
|
8
|
+
# (see element_list.rb)
|
9
|
+
# Copyright (c) 2007-2023 Yoichiro Hasebe <yohasebe@gmail.com>
|
10
|
+
|
11
|
+
require_relative 'elementlist'
|
12
|
+
require_relative 'element'
|
13
|
+
require_relative 'utils'
|
14
|
+
|
15
|
+
module RSyntaxTree
|
16
|
+
class StringParser
|
17
|
+
attr_accessor :data, :elist, :pos, :id, :level
|
18
|
+
|
19
|
+
def initialize(str, fontset, fontsize, global)
|
20
|
+
@global = global
|
21
|
+
# Clean up the data a little to make processing easier
|
22
|
+
# repeated newlines => a newline
|
23
|
+
string = str.gsub(/[\n\r]+/m, "\n")
|
24
|
+
# a backslash followed by a newline => a backslash followed by an 'n'
|
25
|
+
string.gsub!(/\\\n\s*/m, "\\n")
|
26
|
+
# repeated whitespace characters => " "
|
27
|
+
string.gsub!(/\s+/, " ")
|
28
|
+
string.gsub!(/\]\s+\[/, "][")
|
29
|
+
string.gsub!(/\s+\[/, "[")
|
30
|
+
string.gsub!(/\[\s+/, "[")
|
31
|
+
string.gsub!(/\s+\]/, "]")
|
32
|
+
string.gsub!(/\]\s+/, "]")
|
33
|
+
string.gsub!(/<(\d*)>/) do
|
34
|
+
num_padding = $1.to_i
|
35
|
+
result = if num_padding.positive?
|
36
|
+
WHITESPACE_BLOCK * num_padding
|
37
|
+
else
|
38
|
+
WHITESPACE_BLOCK
|
39
|
+
end
|
40
|
+
result
|
41
|
+
end
|
42
|
+
|
43
|
+
@data = string # Store it for later...
|
44
|
+
fontset[:normal] = fontset[:cjk] if @data.contains_cjk?
|
45
|
+
@elist = ElementList.new # Initialize internal element list
|
46
|
+
@pos = 0 # Position in the sentence
|
47
|
+
@id = 1 # ID for the next element
|
48
|
+
@level = 0 # Level in the diagram
|
49
|
+
@fontset = fontset
|
50
|
+
@fontsize = fontsize
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.valid?(data)
|
54
|
+
raise RSTError, "Error: input text is empty" if data.empty?
|
55
|
+
|
56
|
+
if /\[\s*\]/m =~ data
|
57
|
+
raise RSTError, "Error: inside the brackets is empty"
|
58
|
+
end
|
59
|
+
|
60
|
+
text = data.strip
|
61
|
+
text_r = text.split(//)
|
62
|
+
open_br = []
|
63
|
+
close_br = []
|
64
|
+
escape = false
|
65
|
+
text_r.each do |chr|
|
66
|
+
if chr == "\\"
|
67
|
+
escape = if escape
|
68
|
+
false
|
69
|
+
else
|
70
|
+
true
|
71
|
+
end
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
if escape && /[\[\]]/ =~ chr
|
76
|
+
escape = false
|
77
|
+
next
|
78
|
+
elsif chr == '['
|
79
|
+
open_br.push(chr)
|
80
|
+
elsif chr == ']'
|
81
|
+
close_br.push(chr)
|
82
|
+
break if open_br.length < close_br.length
|
83
|
+
end
|
84
|
+
escape = false
|
85
|
+
end
|
86
|
+
|
87
|
+
if open_br.empty? && close_br.empty?
|
88
|
+
raise RSTError, "Error: input text does not contain paired brackets"
|
89
|
+
elsif open_br.length == close_br.length
|
90
|
+
true
|
91
|
+
else
|
92
|
+
raise RSTError, "Error: open and close brackets do not match"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def parse
|
97
|
+
make_tree(0);
|
98
|
+
@elist.set_hierarchy
|
99
|
+
end
|
100
|
+
|
101
|
+
def get_elementlist
|
102
|
+
@elist;
|
103
|
+
end
|
104
|
+
|
105
|
+
def get_next_token
|
106
|
+
data = @data.split(//)
|
107
|
+
gottoken = false
|
108
|
+
token = ""
|
109
|
+
i = 0
|
110
|
+
|
111
|
+
return "" if (@pos + 1) >= data.length
|
112
|
+
|
113
|
+
escape = false
|
114
|
+
while ((@pos + i) < data.length) && !gottoken
|
115
|
+
ch = data[@pos + i];
|
116
|
+
case ch
|
117
|
+
when "["
|
118
|
+
if escape
|
119
|
+
token += ch
|
120
|
+
escape = false
|
121
|
+
elsif i.positive?
|
122
|
+
gottoken = true
|
123
|
+
else
|
124
|
+
token += ch
|
125
|
+
end
|
126
|
+
when "]"
|
127
|
+
if escape
|
128
|
+
token += ch
|
129
|
+
escape = false
|
130
|
+
else
|
131
|
+
token += ch if i.zero?
|
132
|
+
gottoken = true
|
133
|
+
end
|
134
|
+
when "\\"
|
135
|
+
if escape
|
136
|
+
token += '\\\\'
|
137
|
+
escape = false
|
138
|
+
else
|
139
|
+
escape = true
|
140
|
+
end
|
141
|
+
when " "
|
142
|
+
if escape
|
143
|
+
token += '\\n'
|
144
|
+
escape = false
|
145
|
+
else
|
146
|
+
token += ch
|
147
|
+
end
|
148
|
+
when /[n{}<>^+*_=~|-]/
|
149
|
+
if escape
|
150
|
+
token += '\\' + ch
|
151
|
+
escape = false
|
152
|
+
else
|
153
|
+
token += ch
|
154
|
+
end
|
155
|
+
else
|
156
|
+
if escape
|
157
|
+
token += ch
|
158
|
+
escape = false
|
159
|
+
else
|
160
|
+
token += ch
|
161
|
+
end
|
162
|
+
end
|
163
|
+
i += 1
|
164
|
+
end
|
165
|
+
|
166
|
+
@pos += if i > 1
|
167
|
+
i - 1
|
168
|
+
else
|
169
|
+
1
|
170
|
+
end
|
171
|
+
token
|
172
|
+
end
|
173
|
+
|
174
|
+
def make_tree(parent)
|
175
|
+
token = get_next_token.strip
|
176
|
+
parts = []
|
177
|
+
|
178
|
+
while token != "" && token != "]"
|
179
|
+
token_r = token.split(//)
|
180
|
+
case token_r[0]
|
181
|
+
when "["
|
182
|
+
tl = token_r.length
|
183
|
+
token_r = token_r[1, tl - 1]
|
184
|
+
spaceat = token_r.index(" ")
|
185
|
+
newparent = -1
|
186
|
+
|
187
|
+
if spaceat
|
188
|
+
parts[0] = token_r[0, spaceat].join
|
189
|
+
|
190
|
+
tl = token_r.length
|
191
|
+
parts[1] = token_r[spaceat, tl - spaceat].join
|
192
|
+
|
193
|
+
element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize, @global)
|
194
|
+
@id += 1
|
195
|
+
@elist.add(element)
|
196
|
+
newparent = element.id
|
197
|
+
|
198
|
+
element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize, @global)
|
199
|
+
@id += 1
|
200
|
+
else
|
201
|
+
joined = token_r.join
|
202
|
+
element = Element.new(@id, parent, joined, @level, @fontset, @fontsize, @global)
|
203
|
+
@id += 1
|
204
|
+
newparent = element.id
|
205
|
+
end
|
206
|
+
@elist.add(element)
|
207
|
+
@level += 1
|
208
|
+
make_tree(newparent)
|
209
|
+
else
|
210
|
+
if token.strip != ""
|
211
|
+
element = Element.new(@id, parent, token, @level, @fontset, @fontsize, @global)
|
212
|
+
@id += 1
|
213
|
+
@elist.add(element)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
token = get_next_token
|
217
|
+
end
|
218
|
+
@level -= 1
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|