rsyntaxtree 0.9.2 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,248 +7,231 @@
7
7
  #
8
8
  # Parses a phrase into leafs and nodes and store the result in an element list
9
9
  # (see element_list.rb)
10
- #
11
- # This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's
12
- # excellent program phpSyntaxTree.
13
- #
14
10
  # Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
15
- # Copyright (c) 2003-2004 Andre Eisenbach <andre@ironcreek.net>
16
11
 
17
12
  require 'elementlist'
18
13
  require 'element'
19
-
20
- # def escape_high_ascii(string)
21
- # html = ""
22
- # string.length.times do |i|
23
- # ch = string[i]
24
- # if(ch < 127)
25
- # html += ch.chr
26
- # else
27
- # html += sprintf("&#%d;", ch)
28
- # end
29
- # end
30
- # html
31
- # end
32
-
33
- class StringParser
34
-
35
- attr_accessor :data, :elist, :pos, :id, :level, :tncnt
36
- def initialize(str)
37
- # Clean up the data a little to make processing easier
38
- string = str.gsub(/\t/, "") rescue ""
39
- string.gsub!(/\s+/, " ")
40
- string.gsub!(/\] \[/, "][")
41
- string.gsub!(/ \[/, "[")
42
-
43
- @data = string # Store it for later...
44
- @elist = ElementList.new # Initialize internal element list
45
- @pos = 0 # Position in the sentence
46
- @id = 1 # ID for the next element
47
- @level = 0 # Level in the diagram
48
- @tncnt = Hash.new # Node type counts
49
- end
50
-
51
- # caution: quick and dirty solution
52
- def valid?
53
- if(@data.length < 1)
54
- return false
55
- end
56
-
57
- if /\[\s*\]/m =~ @data
58
- return false
59
- end
60
-
61
- if /\[\_/ =~ @data
62
- return false
63
- end
64
-
65
- text = @data.strip
66
- text_r = text.split(//)
67
- open_br, close_br = [], []
68
- escape = false
69
- text_r.each do |chr|
70
- if chr == "\\"
71
- escape = true
72
- elsif chr == '[' && !escape
73
- open_br.push(chr)
74
- elsif chr == ']' && !escape
75
- close_br.push(chr)
76
- if open_br.length < close_br.length
77
- break
14
+ require 'utils'
15
+
16
+ module RSyntaxTree
17
+ class StringParser
18
+
19
+ attr_accessor :data, :elist, :pos, :id, :level
20
+ def initialize(str, fontset, fontsize)
21
+ # Clean up the data a little to make processing easier
22
+ # repeated newlines => a newline
23
+ string = str.gsub(/[\n\r]+/m, "\n")
24
+ # a backslash followed by a newline => a backslash followed by an 'n'
25
+ string.gsub!(/\\\n\s*/m, "\\n")
26
+ # repeated whitespace characters => " "
27
+ string.gsub!(/\s+/, " ")
28
+ string.gsub!(/\]\s+\[/, "][")
29
+ string.gsub!(/\s+\[/, "[")
30
+ string.gsub!(/\[\s+/, "[")
31
+ string.gsub!(/\s+\]/, "]")
32
+ string.gsub!(/\]\s+/, "]")
33
+ string.gsub!(/<(\d*)>/) do
34
+ num_padding = $1.to_i
35
+ if num_padding > 0
36
+ result = WHITESPACE_BLOCK * num_padding
37
+ else
38
+ result = WHITESPACE_BLOCK
78
39
  end
79
- elsif escape
80
- escape = false
40
+ result
81
41
  end
82
- end
83
-
84
- return false unless open_br.length == close_br.length
85
- # make_tree(0)
86
- # return false if @tncnt.empty?
87
- # @tncnt.each do |key, value|
88
- # return false if key == ""
89
- # end
90
- return true
91
- end
92
-
93
- def parse
94
- make_tree(0);
95
- end
96
42
 
97
- def get_elementlist
98
- @elist;
99
- end
43
+ @data = string # Store it for later...
44
+ if @data.contains_cjk?
45
+ fontset[:normal] = fontset[:cjk]
46
+ end
47
+ @elist = ElementList.new # Initialize internal element list
48
+ @pos = 0 # Position in the sentence
49
+ @id = 1 # ID for the next element
50
+ @level = 0 # Level in the diagram
51
+ @fontset = fontset
52
+ @fontsize = fontsize
53
+ end
100
54
 
101
- def auto_subscript
102
- elements = @elist.get_elements
103
- tmpcnt = Hash.new
104
- elements.each do |element|
105
- if(element.type == ETYPE_NODE)
106
- count = 1
107
- content = element.content
55
+ def self.valid?(data)
56
+ if(data.length < 1)
57
+ raise RSTError, "Error: input text is empty"
58
+ end
108
59
 
109
- if @tncnt[content]
110
- count = @tncnt[content]
111
- end
60
+ if /\[\s*\]/m =~ data
61
+ raise RSTError, "Error: inside the brackets is empty"
62
+ end
112
63
 
113
- if(count > 1)
114
- if tmpcnt[content]
115
- tmpcnt[content] += 1
64
+ text = data.strip
65
+ text_r = text.split(//)
66
+ open_br, close_br = [], []
67
+ escape = false
68
+ text_r.each do |chr|
69
+ if chr == "\\"
70
+ if escape
71
+ escape = false
116
72
  else
117
- tmpcnt[content] = 1
73
+ escape = true
118
74
  end
75
+ next
76
+ end
119
77
 
120
- element.content += ("_" + tmpcnt[content].to_s)
78
+ if escape && /[\[\]]/ =~ chr
79
+ escape = false
80
+ next
81
+ elsif chr == '['
82
+ open_br.push(chr)
83
+ elsif chr == ']'
84
+ close_br.push(chr)
85
+ if open_br.length < close_br.length
86
+ break
87
+ end
121
88
  end
89
+ escape = false
90
+ end
122
91
 
92
+ if open_br.empty? && close_br.empty?
93
+ raise RSTError, "Error: input text does not contain paired brackets"
94
+ elsif open_br.length == close_br.length
95
+ return true
96
+ else
97
+ raise RSTError, "Error: open and close brackets do not match"
123
98
  end
124
- end
125
- @tncnt
126
- end
127
-
128
- def count_node(name)
129
- name = name.strip
130
- if @tncnt[name]
131
- @tncnt[name] += 1
132
- else
133
- @tncnt[name] = 1
134
99
  end
135
- end
136
100
 
137
- def get_next_token
138
- data = @data.split(//)
139
- gottoken = false
140
- token = ""
141
- i = 0
101
+ def parse
102
+ make_tree(0);
103
+ @elist.set_hierarchy
104
+ end
142
105
 
143
- if((@pos + 1) >= data.length)
144
- return ""
106
+ def get_elementlist
107
+ @elist;
145
108
  end
146
109
 
147
- escape = false
148
- while(((@pos + i) < data.length) && !gottoken)
149
- ch = data[@pos + i];
150
- case ch
151
- when "["
152
- if escape
153
- token += ch
154
- escape = false
155
- else
156
- if(i > 0)
110
+ def get_next_token
111
+ data = @data.split(//)
112
+ gottoken = false
113
+ token = ""
114
+ i = 0
115
+
116
+ if((@pos + 1) >= data.length)
117
+ return ""
118
+ end
119
+
120
+ escape = false
121
+ while(((@pos + i) < data.length) && !gottoken)
122
+ ch = data[@pos + i];
123
+ case ch
124
+ when "["
125
+ if escape
126
+ token += ch
127
+ escape = false
128
+ else
129
+ if(i > 0)
130
+ gottoken = true
131
+ else
132
+ token += ch
133
+ end
134
+ end
135
+ when "]"
136
+ if escape
137
+ token += ch
138
+ escape = false
139
+ else
140
+ if(i == 0 )
141
+ token += ch
142
+ end
157
143
  gottoken = true
144
+ end
145
+ when "\\"
146
+ if escape
147
+ token += '\\\\'
148
+ escape = false
149
+ else
150
+ escape = true
151
+ end
152
+ when " "
153
+ if escape
154
+ token += '\\n'
155
+ escape = false
158
156
  else
159
157
  token += ch
160
158
  end
161
- end
162
- when "]"
163
- if escape
164
- token += ch
165
- escape = false
166
- else
167
- if(i == 0 )
159
+ when /[n{}<>^+*_=~\|\-]/
160
+ if escape
161
+ token += '\\' + ch
162
+ escape = false
163
+ else
168
164
  token += ch
169
165
  end
170
- gottoken = true
171
- end
172
- when "\\"
173
- escape = true
174
- when "n", " ", "+", "-", "=", "~", "#", "*"
175
- if escape
176
- token += "\\#{ch}"
177
- escape = false
178
166
  else
179
- token += ch
167
+ if escape
168
+ token += ch
169
+ escape = false
170
+ else
171
+ token += ch
172
+ end
180
173
  end
181
- # when /[\n\r]/
182
- # gottoken = false # same as do nothing
183
- else
184
- token += ch
185
- escape = false if escape
174
+ i += 1
186
175
  end
187
- i += 1
188
- end
189
176
 
190
- if(i > 1)
191
- @pos += (i - 1)
192
- else
193
- @pos += 1
177
+ if(i > 1)
178
+ @pos += (i - 1)
179
+ else
180
+ @pos += 1
181
+ end
182
+ return token
194
183
  end
195
- return token
196
- end
197
184
 
198
- def make_tree(parent)
199
- token = get_next_token.strip
200
- parts = Array.new
201
-
202
- while(token != "" && token != "]" )
203
- token_r = token.split(//)
204
- case token_r[0]
205
- when "["
206
- tl = token_r.length
207
- token_r = token_r[1, tl - 1]
208
- spaceat = token_r.index(" ")
209
- newparent = -1
210
-
211
- if spaceat
212
- parts[0] = token_r[0, spaceat].join
213
- parts[0] = parts[0].gsub("<>", " ")
214
-
215
- tl =token_r.length
216
- parts[1] = token_r[spaceat, tl - spaceat].join
217
- parts[1] = parts[1].gsub("<>", " ")
218
-
219
- element = Element.new(@id, parent, parts[0], @level)
220
- @id += 1
221
- @elist.add(element)
222
- newparent = element.id
223
- count_node(parts[0])
224
-
225
- element = Element.new(@id, @id - 1, parts[1], @level + 1 )
226
- @id += 1
227
- @elist.add(element)
228
- else
229
- joined = token_r.join.gsub("<>", " ")
230
- element = Element.new(@id, parent, joined, @level)
231
- @id += 1
232
- newparent = element.id
233
- @elist.add(element)
234
- count_node(joined)
235
- end
185
+ def make_tree(parent)
186
+ token = get_next_token.strip
187
+ parts = Array.new
188
+
189
+ while(token != "" && token != "]" )
190
+ token_r = token.split(//)
191
+ case token_r[0]
192
+ when "["
193
+ tl = token_r.length
194
+ token_r = token_r[1, tl - 1]
195
+ spaceat = token_r.index(" ")
196
+ newparent = -1
197
+
198
+ if spaceat
199
+ parts[0] = token_r[0, spaceat].join
200
+
201
+ tl =token_r.length
202
+ parts[1] = token_r[spaceat, tl - spaceat].join
203
+
204
+ element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
205
+ @id += 1
206
+ @elist.add(element)
207
+ newparent = element.id
208
+
209
+ element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
210
+ @id += 1
211
+ @elist.add(element)
212
+ else
213
+ joined = token_r.join
214
+ element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
215
+ @id += 1
216
+ newparent = element.id
217
+ @elist.add(element)
218
+ end
236
219
 
237
- @level += 1
238
- make_tree(newparent)
220
+ @level += 1
221
+ make_tree(newparent)
239
222
 
240
- else
241
- if token.strip != ""
242
- element = Element.new(@id, parent, token, @level)
243
- @id += 1
244
- @elist.add(element)
245
- count_node(token)
223
+ else
224
+ if token.strip != ""
225
+ element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
226
+ @id += 1
227
+ @elist.add(element)
228
+ end
246
229
  end
247
- end
248
230
 
249
- token = get_next_token
231
+ token = get_next_token
232
+ end
233
+ @level -= 1
250
234
  end
251
- @level -= 1
252
235
  end
253
236
  end
254
237