rsyntaxtree 0.8.8 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,241 +7,234 @@
7
7
  #
8
8
  # Parses a phrase into leafs and nodes and store the result in an element list
9
9
  # (see element_list.rb)
10
- #
11
- # This file is part of RSyntaxTree, which is a ruby port of Andre Eisenbach's
12
- # excellent program phpSyntaxTree.
13
- #
14
10
  # Copyright (c) 2007-2021 Yoichiro Hasebe <yohasebe@gmail.com>
15
- # Copyright (c) 2003-2004 Andre Eisenbach <andre@ironcreek.net>
16
11
 
17
12
  require 'elementlist'
18
13
  require 'element'
19
-
20
- # def escape_high_ascii(string)
21
- # html = ""
22
- # string.length.times do |i|
23
- # ch = string[i]
24
- # if(ch < 127)
25
- # html += ch.chr
26
- # else
27
- # html += sprintf("&#%d;", ch)
28
- # end
29
- # end
30
- # html
31
- # end
32
-
33
- class StringParser
34
-
35
- attr_accessor :data, :elist, :pos, :id, :level, :tncnt
36
- def initialize(str)
37
- # Clean up the data a little to make processing easier
38
- string = str.gsub(/\t/, "") rescue ""
39
- string.gsub!(/\s+/, " ")
40
- string.gsub!(/\] \[/, "][")
41
- string.gsub!(/ \[/, "[")
42
-
43
- @data = string # Store it for later...
44
- @elist = ElementList.new # Initialize internal element list
45
- @pos = 0 # Position in the sentence
46
- @id = 1 # ID for the next element
47
- @level = 0 # Level in the diagram
48
- @tncnt = Hash.new # Node type counts
49
- end
50
-
51
- # caution: quick and dirty solution
52
- def valid?
53
- if(@data.length < 1)
54
- return false
55
- end
56
-
57
- if /\[\s*\]/m =~ @data
58
- return false
59
- end
60
-
61
- if /\[\_/ =~ @data
62
- return false
63
- end
64
-
65
- text = @data.strip
66
- text_r = text.split(//)
67
- open_br, close_br = [], []
68
- escape = false
69
- text_r.each do |chr|
70
- if chr == "\\"
71
- escape = true
72
- elsif chr == '[' && !escape
73
- open_br.push(chr)
74
- elsif chr == ']' && !escape
75
- close_br.push(chr)
76
- if open_br.length < close_br.length
77
- break
14
+ require 'utils'
15
+
16
+ module RSyntaxTree
17
+ class StringParser
18
+
19
+ attr_accessor :data, :elist, :pos, :id, :level
20
+ def initialize(str, fontset, fontsize)
21
+ # Clean up the data a little to make processing easier
22
+ # repeated newlines => a newline
23
+ string = str.gsub(/[\n\r]+/m, "\n")
24
+ # a backslash followed by a newline => a backslash followed by an 'n'
25
+ string.gsub!(/\\\n\s*/m, "\\n")
26
+ # repeated whitespace characters => " "
27
+ string.gsub!(/\s+/, " ")
28
+ string.gsub!(/\]\s+\[/, "][")
29
+ string.gsub!(/\s+\[/, "[")
30
+ string.gsub!(/\[\s+/, "[")
31
+ string.gsub!(/\s+\]/, "]")
32
+ string.gsub!(/\]\s+/, "]")
33
+ string.gsub!(/<(\d*)>/) do
34
+ num_padding = $1.to_i
35
+ if num_padding > 0
36
+ result = WHITESPACE_BLOCK * num_padding
37
+ else
38
+ result = WHITESPACE_BLOCK
78
39
  end
79
- elsif escape
80
- escape = false
40
+ result
81
41
  end
82
- end
83
-
84
- return false unless open_br.length == close_br.length
85
- # make_tree(0)
86
- # return false if @tncnt.empty?
87
- # @tncnt.each do |key, value|
88
- # return false if key == ""
89
- # end
90
- return true
91
- end
92
-
93
- def parse
94
- make_tree(0);
95
- end
96
42
 
97
- def get_elementlist
98
- @elist;
99
- end
43
+ @data = string # Store it for later...
44
+ if @data.contains_cjk?
45
+ fontset[:normal] = fontset[:cjk]
46
+ end
47
+ @elist = ElementList.new # Initialize internal element list
48
+ @pos = 0 # Position in the sentence
49
+ @id = 1 # ID for the next element
50
+ @level = 0 # Level in the diagram
51
+ @fontset = fontset
52
+ @fontsize = fontsize
53
+ end
100
54
 
101
- def auto_subscript
102
- elements = @elist.get_elements
103
- tmpcnt = Hash.new
104
- elements.each do |element|
105
- if(element.type == ETYPE_NODE)
106
- count = 1
107
- content = element.content
55
+ def self.valid?(data)
56
+ if(data.length < 1)
57
+ raise RSTError, "Error: input text is empty"
58
+ end
108
59
 
109
- if @tncnt[content]
110
- count = @tncnt[content]
111
- end
60
+ if /\[\s*\]/m =~ data
61
+ raise RSTError, "Error: inside the brackets is empty"
62
+ end
112
63
 
113
- if(count > 1)
114
- if tmpcnt[content]
115
- tmpcnt[content] += 1
64
+ text = data.strip
65
+ text_r = text.split(//)
66
+ open_br, close_br = [], []
67
+ escape = false
68
+ text_r.each do |chr|
69
+ if chr == "\\"
70
+ if escape
71
+ escape = false
116
72
  else
117
- tmpcnt[content] = 1
73
+ escape = true
118
74
  end
75
+ next
76
+ end
119
77
 
120
- element.content += ("_" + tmpcnt[content].to_s)
78
+ if escape && /[\[\]]/ =~ chr
79
+ escape = false
80
+ next
81
+ elsif chr == '['
82
+ open_br.push(chr)
83
+ elsif chr == ']'
84
+ close_br.push(chr)
85
+ if open_br.length < close_br.length
86
+ break
87
+ end
121
88
  end
89
+ escape = false
90
+ end
122
91
 
92
+ if open_br.empty? && close_br.empty?
93
+ raise RSTError, "Error: input text does not contain paired brackets"
94
+ elsif open_br.length == close_br.length
95
+ return true
96
+ else
97
+ raise RSTError, "Error: open and close brackets do not match"
123
98
  end
124
- end
125
- @tncnt
126
- end
127
-
128
- def count_node(name)
129
- name = name.strip
130
- if @tncnt[name]
131
- @tncnt[name] += 1
132
- else
133
- @tncnt[name] = 1
134
99
  end
135
- end
136
100
 
137
- def get_next_token
138
- data = @data.split(//)
139
- gottoken = false
140
- token = ""
141
- i = 0
101
+ def parse
102
+ make_tree(0);
103
+ @elist.set_hierarchy
104
+ end
142
105
 
143
- if((@pos + 1) >= data.length)
144
- return ""
106
+ def get_elementlist
107
+ @elist;
145
108
  end
146
109
 
147
- escape = false
148
- while(((@pos + i) < data.length) && !gottoken)
149
- ch = data[@pos + i];
150
- case ch
151
- when "["
152
- if escape
153
- token += ch
154
- escape = false
155
- else
156
- if(i > 0)
110
+ def get_next_token
111
+ data = @data.split(//)
112
+ gottoken = false
113
+ token = ""
114
+ i = 0
115
+
116
+ if((@pos + 1) >= data.length)
117
+ return ""
118
+ end
119
+
120
+ escape = false
121
+ while(((@pos + i) < data.length) && !gottoken)
122
+ ch = data[@pos + i];
123
+ case ch
124
+ when "["
125
+ if escape
126
+ token += ch
127
+ escape = false
128
+ else
129
+ if(i > 0)
130
+ gottoken = true
131
+ else
132
+ token += ch
133
+ end
134
+ end
135
+ when "]"
136
+ if escape
137
+ token += ch
138
+ escape = false
139
+ else
140
+ if(i == 0 )
141
+ token += ch
142
+ end
157
143
  gottoken = true
144
+ end
145
+ when "\\"
146
+ if escape
147
+ token += '\\\\'
148
+ escape = false
149
+ else
150
+ escape = true
151
+ end
152
+ when " "
153
+ if escape
154
+ token += '\\n'
155
+ escape = false
156
+ else
157
+ token += ch
158
+ end
159
+ when /[n{}<>^+*_=~\|\-]/
160
+ if escape
161
+ token += '\\' + ch
162
+ escape = false
158
163
  else
159
164
  token += ch
160
165
  end
161
- end
162
- when "]"
163
- if escape
164
- token += ch
165
- escape = false
166
166
  else
167
- if(i == 0 )
167
+ if escape
168
+ token += ch
169
+ escape = false
170
+ else
168
171
  token += ch
169
172
  end
170
- gottoken = true
171
173
  end
172
- when "\\"
173
- escape = true
174
- when /[\n\r]/
175
- gottoken = false # same as do nothing
176
- else
177
- token += ch
178
- escape = false if escape
174
+ i += 1
179
175
  end
180
- i += 1
181
- end
182
176
 
183
- if(i > 1)
184
- @pos += (i - 1)
185
- else
186
- @pos += 1
177
+ if(i > 1)
178
+ @pos += (i - 1)
179
+ else
180
+ @pos += 1
181
+ end
182
+ return token
187
183
  end
188
- return token
189
- end
190
184
 
191
- def make_tree(parent)
192
- token = get_next_token.strip
193
- parts = Array.new
194
-
195
- while(token != "" && token != "]" )
196
- token_r = token.split(//)
197
- case token_r[0]
198
- when "["
199
- tl = token_r.length
200
- token_r = token_r[1, tl - 1]
201
- spaceat = token_r.index(" ")
202
- newparent = -1
203
-
204
- if spaceat
205
- parts[0] = token_r[0, spaceat].join
206
- parts[0] = parts[0].gsub("<>", " ")
207
-
208
- tl =token_r.length
209
- parts[1] = token_r[spaceat, tl - spaceat].join
210
- parts[1] = parts[1].gsub("<>", " ")
211
-
212
- element = Element.new(@id, parent, parts[0], @level)
213
- @id += 1
214
- @elist.add(element)
215
- newparent = element.id
216
- count_node(parts[0])
217
-
218
- element = Element.new(@id, @id - 1, parts[1], @level + 1 )
219
- @id += 1
220
- @elist.add(element)
221
- else
222
- joined = token_r.join.gsub("<>", " ")
223
- element = Element.new(@id, parent, joined, @level)
224
- @id += 1
225
- newparent = element.id
226
- @elist.add(element)
227
- count_node(joined)
228
- end
185
+ def make_tree(parent)
186
+ token = get_next_token.strip
187
+ parts = Array.new
188
+
189
+ while(token != "" && token != "]" )
190
+ token_r = token.split(//)
191
+ case token_r[0]
192
+ when "["
193
+ tl = token_r.length
194
+ token_r = token_r[1, tl - 1]
195
+ spaceat = token_r.index(" ")
196
+ newparent = -1
197
+
198
+ if spaceat
199
+ parts[0] = token_r[0, spaceat].join
200
+ # parts[0] = parts[0].gsub("\\>", "﹥").gsub("\\<", "﹤")
201
+
202
+ tl =token_r.length
203
+ parts[1] = token_r[spaceat, tl - spaceat].join
204
+ # parts[1] = parts[1].gsub("\\>", "﹥").gsub("\\<", "﹤")
205
+
206
+ element = Element.new(@id, parent, parts[0], @level, @fontset, @fontsize)
207
+ @id += 1
208
+ @elist.add(element)
209
+ newparent = element.id
210
+
211
+ element = Element.new(@id, @id - 1, parts[1], @level + 1, @fontset, @fontsize)
212
+ @id += 1
213
+ @elist.add(element)
214
+ else
215
+ # joined = token_r.join.gsub("\\>", "﹥").gsub("\\<", "﹤")
216
+ joined = token_r.join
217
+ element = Element.new(@id, parent, joined, @level, @fontset, @fontsize)
218
+ @id += 1
219
+ newparent = element.id
220
+ @elist.add(element)
221
+ end
229
222
 
230
- @level += 1
231
- make_tree(newparent)
223
+ @level += 1
224
+ make_tree(newparent)
232
225
 
233
- else
234
- if token.strip != ""
235
- element = Element.new(@id, parent, token, @level)
236
- @id += 1
237
- @elist.add(element)
238
- count_node(token)
226
+ else
227
+ if token.strip != ""
228
+ element = Element.new(@id, parent, token, @level, @fontset, @fontsize)
229
+ @id += 1
230
+ @elist.add(element)
231
+ end
239
232
  end
240
- end
241
233
 
242
- token = get_next_token
234
+ token = get_next_token
235
+ end
236
+ @level -= 1
243
237
  end
244
- @level -= 1
245
238
  end
246
239
  end
247
240