rpdf2txt 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/LICENCE +515 -0
- data/Manifest.txt +126 -0
- data/README.txt +30 -0
- data/Rakefile +24 -0
- data/bin/rpdf2txt +58 -0
- data/config.save +12 -0
- data/install.rb +1098 -0
- data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
- data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
- data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
- data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
- data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
- data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
- data/lib/rpdf2txt-rockit/grammar.rb +644 -0
- data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
- data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
- data/lib/rpdf2txt-rockit/indexable.rb +53 -0
- data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
- data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
- data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
- data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
- data/lib/rpdf2txt-rockit/profiler.rb +168 -0
- data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
- data/lib/rpdf2txt-rockit/rockit.rb +76 -0
- data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
- data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
- data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
- data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
- data/lib/rpdf2txt-rockit/token.rb +364 -0
- data/lib/rpdf2txt-rockit/version.rb +3 -0
- data/lib/rpdf2txt/attributesparser.rb +42 -0
- data/lib/rpdf2txt/cmapparser.rb +65 -0
- data/lib/rpdf2txt/data/_cmap.grammar +11 -0
- data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/cmap.grammar +11 -0
- data/lib/rpdf2txt/data/cmap.rb +37 -0
- data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/cmap_range.rb +43 -0
- data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
- data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
- data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
- data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
- data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
- data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
- data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
- data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
- data/lib/rpdf2txt/data/pdftext.grammar +102 -0
- data/lib/rpdf2txt/data/pdftext.rb +146 -0
- data/lib/rpdf2txt/default_handler.rb +352 -0
- data/lib/rpdf2txt/lzw.rb +69 -0
- data/lib/rpdf2txt/object.rb +1114 -0
- data/lib/rpdf2txt/parser.rb +169 -0
- data/lib/rpdf2txt/symbol.rb +408 -0
- data/lib/rpdf2txt/text.rb +182 -0
- data/lib/rpdf2txt/text_state.rb +434 -0
- data/lib/rpdf2txt/textparser.rb +42 -0
- data/test/data/3392_obj +0 -0
- data/test/data/397_decrypted +15 -0
- data/test/data/450_decrypted +153 -0
- data/test/data/450_obj +0 -0
- data/test/data/452_decrypted +125 -0
- data/test/data/454_decrypted +108 -0
- data/test/data/456_decrypted +106 -0
- data/test/data/458_decrypted +111 -0
- data/test/data/458_obj +0 -0
- data/test/data/460_decrypted +118 -0
- data/test/data/460_obj +0 -0
- data/test/data/463_decrypted +117 -0
- data/test/data/465_decrypted +107 -0
- data/test/data/465_obj +0 -0
- data/test/data/90_obj +0 -0
- data/test/data/90_obj_comp +1 -0
- data/test/data/decrypted +0 -0
- data/test/data/encrypt_obj +0 -0
- data/test/data/encrypt_string +0 -0
- data/test/data/encrypt_string_128bit +0 -0
- data/test/data/encrypted_object_stream.pdf +0 -0
- data/test/data/firststream +1 -0
- data/test/data/index.pdfobj +0 -0
- data/test/data/index_2bit.pdfobj +0 -0
- data/test/data/index_masked.pdfobj +0 -0
- data/test/data/indexed.pdfobj +0 -0
- data/test/data/indexed_2bit.pdfobj +0 -0
- data/test/data/indexed_masked.pdfobj +0 -0
- data/test/data/inline.png +0 -0
- data/test/data/logo.png +0 -0
- data/test/data/lzw.pdfobj +0 -0
- data/test/data/lzw_index.pdfobj +0 -0
- data/test/data/page_tree.pdf +148 -0
- data/test/data/pdf_20.png +0 -0
- data/test/data/pdf_21.png +0 -0
- data/test/data/pdf_22.png +0 -0
- data/test/data/pdf_50.png +0 -0
- data/test/data/png.pdfobj +0 -0
- data/test/data/space_bug_stream.txt +119 -0
- data/test/data/stream.txt +292 -0
- data/test/data/stream_kerning_bug.txt +13 -0
- data/test/data/stream_kerning_bug2.txt +6 -0
- data/test/data/test.pdf +0 -0
- data/test/data/test.txt +8 -0
- data/test/data/test_text.txt +42 -0
- data/test/data/working_obj +0 -0
- data/test/data/working_obj2 +0 -0
- data/test/mock.rb +149 -0
- data/test/suite.rb +30 -0
- data/test/test_pdf_object.rb +1802 -0
- data/test/test_pdf_parser.rb +1340 -0
- data/test/test_pdf_text.rb +789 -0
- data/test/test_space_bug_05_2004.rb +87 -0
- data/test/test_stream.rb +194 -0
- data/test/test_text_state.rb +315 -0
- data/usage-en.txt +112 -0
- data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
- data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
- metadata +220 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
begin
|
2
|
+
require 'strscan'
|
3
|
+
$USING_STRSCAN = true
|
4
|
+
rescue LoadError
|
5
|
+
$USING_STRSCAN = false
|
6
|
+
end
|
7
|
+
|
8
|
+
unless $USING_STRSCAN
|
9
|
+
# Simple substitution for strscan in Ruby for users that lack strscan.
|
10
|
+
#
|
11
|
+
class StringScanner
|
12
|
+
attr_accessor :pointer
|
13
|
+
|
14
|
+
def initialize(string)
|
15
|
+
@orig, @pointer = string, 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def string
|
19
|
+
@orig
|
20
|
+
end
|
21
|
+
|
22
|
+
def rest
|
23
|
+
@orig[pointer..-1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def rest?
|
27
|
+
rest.length > 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def check(regexp)
|
31
|
+
apply_regexp(regexp, false)
|
32
|
+
end
|
33
|
+
|
34
|
+
def scan(regexp)
|
35
|
+
apply_regexp(regexp, true)
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def advance_pointer(matchData)
|
41
|
+
@pointer += matchData[0].length
|
42
|
+
end
|
43
|
+
|
44
|
+
def apply_regexp(regexp, advancePointer)
|
45
|
+
md = regexp.match(rest)
|
46
|
+
if md && md[0] && (md.begin(0) == 0)
|
47
|
+
advance_pointer(md) if advancePointer
|
48
|
+
return md[0]
|
49
|
+
else
|
50
|
+
return nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,452 @@
|
|
1
|
+
require 'rpdf2txt-rockit/sourcecode_dumpable'
|
2
|
+
require 'rpdf2txt-rockit/graphdrawing'
|
3
|
+
require 'rpdf2txt-rockit/base_extensions'
|
4
|
+
|
5
|
+
# Base class for objects that build the syntax tree. Each has a name of
|
6
|
+
# the node they build, a list of names of the children and a list of the
|
7
|
+
# childrens that are inactive. No value will be specified for inactive
|
8
|
+
# childrens even though their names are still available for the built node.
|
9
|
+
# Children named "_" are semi-inactive; values for the need to be specified
|
10
|
+
# when creating trees but the will be deleted when the tree is compacted.
|
11
|
+
# If a tree is not compacted their value can be accessed via their index
|
12
|
+
# number.
|
13
|
+
#
|
14
|
+
class SyntaxTreeBuilder
|
15
|
+
include SourceCodeDumpable
|
16
|
+
attr_accessor :node_name
|
17
|
+
attr_reader :children_names, :inactive_children_indices
|
18
|
+
|
19
|
+
def initialize(nodeName, childrenNames, inactiveChildrenIndices = [])
|
20
|
+
@node_name = nodeName.to_s
|
21
|
+
@inactive_children_indices = inactiveChildrenIndices.sort
|
22
|
+
init_children_names(childrenNames)
|
23
|
+
end
|
24
|
+
|
25
|
+
def init_children_names(childrenNames)
|
26
|
+
@children_names = Array.new
|
27
|
+
childrenNames.each do |name|
|
28
|
+
name = name.to_s
|
29
|
+
if @children_names.include?(name) and name != "_"
|
30
|
+
raise ArgumentError,
|
31
|
+
"there are duplicates in the children names #{childrenNames.inspect}"
|
32
|
+
else
|
33
|
+
@children_names.push name
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
protected :init_children_names
|
38
|
+
|
39
|
+
def copy
|
40
|
+
SyntaxTreeBuilder.new(node_name, children_names, inactive_children_indices)
|
41
|
+
end
|
42
|
+
|
43
|
+
def active_childrens
|
44
|
+
inactive = inactive_childrens
|
45
|
+
children_names.select {|c| not inactive.include?(c)}
|
46
|
+
end
|
47
|
+
|
48
|
+
def inactive_childrens
|
49
|
+
children_names.values_at(*@inactive_children_indices)
|
50
|
+
end
|
51
|
+
|
52
|
+
def inactivate_child(anIntegerOrString)
|
53
|
+
position, child = position_and_child(anIntegerOrString)
|
54
|
+
@inactive_children_indices.push(position)
|
55
|
+
@inactive_children_indices.uniq!
|
56
|
+
@inactive_children_indices.sort!
|
57
|
+
end
|
58
|
+
|
59
|
+
def position_and_child(anIntegerOrString)
|
60
|
+
if anIntegerOrString.kind_of?(Integer)
|
61
|
+
# Position is among currently active childrens
|
62
|
+
child = active_childrens[anIntegerOrString]
|
63
|
+
position = children_names.index(child)
|
64
|
+
return position, child
|
65
|
+
else
|
66
|
+
return children_names.index(anIntegerOrString), anIntegerOrString
|
67
|
+
end
|
68
|
+
end
|
69
|
+
protected :position_and_child
|
70
|
+
|
71
|
+
def activate_child(anIntegerOrString)
|
72
|
+
position, child = position_and_child(anIntegerOrString)
|
73
|
+
@inactive_children_indices.delete position
|
74
|
+
end
|
75
|
+
|
76
|
+
def ==(other)
|
77
|
+
other.class == self.class and
|
78
|
+
other.node_name == node_name and
|
79
|
+
other.children_names == children_names and
|
80
|
+
other.inactive_children_indices == inactive_children_indices
|
81
|
+
end
|
82
|
+
|
83
|
+
def create_tree(childrenValues)
|
84
|
+
childrenValues = insert_nil_for_inactive_children(childrenValues)
|
85
|
+
SyntaxTree.new(node_name, children_names, childrenValues)
|
86
|
+
end
|
87
|
+
|
88
|
+
def insert_nil_for_inactive_children(children)
|
89
|
+
new_children, count = Array.new, 0
|
90
|
+
@children_names.each_with_index do |child_name, i|
|
91
|
+
if @inactive_children_indices.include?(i)
|
92
|
+
new_children.push nil
|
93
|
+
else
|
94
|
+
new_children.push children[count]
|
95
|
+
count += 1
|
96
|
+
end
|
97
|
+
end
|
98
|
+
new_children
|
99
|
+
end
|
100
|
+
protected :insert_nil_for_inactive_children
|
101
|
+
|
102
|
+
def to_src(name = nil, nameHash = {})
|
103
|
+
assign_to(name,
|
104
|
+
new_of_my_type(node_name,
|
105
|
+
as_code(children_names.to_compact_src),
|
106
|
+
as_code(inactive_children_indices.to_compact_src)))
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class LiftingSyntaxTreeBuilder < SyntaxTreeBuilder
|
111
|
+
def initialize(childrenNames, inactiveChildrenIndices = [])
|
112
|
+
super("^", childrenNames, inactiveChildrenIndices)
|
113
|
+
@child_to_lift_index =
|
114
|
+
@children_names.index(@children_names.detect {|n| n != "_"})
|
115
|
+
end
|
116
|
+
|
117
|
+
def create_tree(childrenValues)
|
118
|
+
if @child_to_lift_index
|
119
|
+
return childrenValues[@child_to_lift_index]
|
120
|
+
else
|
121
|
+
# Use first children value which does not have a "lexeme" children
|
122
|
+
# ie. which represents a non-terminal
|
123
|
+
val = childrenValues.detect {|c| !c.children_names.include?("lexeme")}
|
124
|
+
val = childrenValues[0] unless val
|
125
|
+
return val
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def copy
|
130
|
+
LiftingSyntaxTreeBuilder.new(children_names, inactive_children_indices)
|
131
|
+
end
|
132
|
+
|
133
|
+
def to_src(name = nil, nameHash = {})
|
134
|
+
assign_to(name,
|
135
|
+
new_of_my_type(as_code(children_names.to_compact_src),
|
136
|
+
as_code(inactive_children_indices.to_compact_src)))
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# Short hand SyntaxTreeBuilder creator
|
141
|
+
def stb(nodeName, childrenNames = [], inactiveChildren = [])
|
142
|
+
nodeName = nodeName.to_s
|
143
|
+
if nodeName == "^"
|
144
|
+
LiftingSyntaxTreeBuilder.new(childrenNames, inactiveChildren)
|
145
|
+
else
|
146
|
+
SyntaxTreeBuilder.new(nodeName, childrenNames, inactiveChildren)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Builder for arrays. You specify the indices for the children values
|
151
|
+
# that should be collected into an Array element and (optionally)
|
152
|
+
# give the index to an Array to append the element to.
|
153
|
+
class ArrayNodeBuilder
|
154
|
+
include SourceCodeDumpable
|
155
|
+
attr_reader :indices, :array_index
|
156
|
+
attr_accessor :append_element, :shifting_insert
|
157
|
+
|
158
|
+
def initialize(indices = [], arrayIndex = nil,
|
159
|
+
chainedTreeBuilder = nil, insertAtIndex = nil,
|
160
|
+
deleteIndices = [], append_element = true)
|
161
|
+
@indices, @array_index = indices, arrayIndex
|
162
|
+
@append_element, @shifting_insert = append_element, false
|
163
|
+
chainedTreeBuilder = SyntaxTreeBuilder.new("ArrayNode", []) unless chainedTreeBuilder
|
164
|
+
chain_treebuilder(chainedTreeBuilder, insertAtIndex, deleteIndices)
|
165
|
+
end
|
166
|
+
|
167
|
+
def copy
|
168
|
+
n = ArrayNodeBuilder.new(@indices, @array_index, @chained_treebuilder.copy,
|
169
|
+
@insert_at_index, @delete_indices)
|
170
|
+
n.shifting_insert = @shifting_insert
|
171
|
+
n
|
172
|
+
end
|
173
|
+
|
174
|
+
def create_tree(childrenValues)
|
175
|
+
tree = create_tree_basic(childrenValues)
|
176
|
+
if @chained_treebuilder and @insert_at_index
|
177
|
+
if @shifting_insert
|
178
|
+
childrenValues[@insert_at_index, 0] = tree
|
179
|
+
else
|
180
|
+
childrenValues[@insert_at_index, 1] = tree
|
181
|
+
end
|
182
|
+
childrenValues = childrenValues.delete_at_indices(@delete_indices)
|
183
|
+
return @chained_treebuilder.create_tree(childrenValues)
|
184
|
+
else
|
185
|
+
return tree
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def inactivate_child(child)
|
190
|
+
if child.kind_of?(Integer) and
|
191
|
+
child >= @insert_at_index and @shifting_insert
|
192
|
+
@chained_treebuilder.inactivate_child(child+1)
|
193
|
+
else
|
194
|
+
@chained_treebuilder.inactivate_child(child)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def activate_child(child)
|
199
|
+
if child.kind_of?(Integer) and
|
200
|
+
child >= @insert_at_index
|
201
|
+
@chained_treebuilder.activate_child(child+1)
|
202
|
+
else
|
203
|
+
@chained_treebuilder.activate_child(child)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
def chain_treebuilder(chainedTreeBuilder, insertAtIndex, deleteIndices = [])
|
208
|
+
@chained_treebuilder = chainedTreeBuilder
|
209
|
+
@insert_at_index, @delete_indices = insertAtIndex, deleteIndices
|
210
|
+
end
|
211
|
+
|
212
|
+
def ==(other)
|
213
|
+
other.class == self.class and
|
214
|
+
other.indices == indices and other.array_index == array_index and
|
215
|
+
(@insert_at_index ?
|
216
|
+
(other.instance_eval("@chained_treebuilder") == @chained_treebuilder and
|
217
|
+
other.instance_eval("@insert_at_index") == @insert_at_index and
|
218
|
+
other.instance_eval("@delete_indices") == @delete_indices) :
|
219
|
+
true) and
|
220
|
+
other.append_element == @append_element
|
221
|
+
end
|
222
|
+
|
223
|
+
def to_src(name = nil, nameHash = {})
|
224
|
+
iai, di = @insert_at_index, @delete_indices
|
225
|
+
ctb = @insert_at_index ? @chained_treebuilder : nil
|
226
|
+
new_of_my_type(as_code(@indices.to_compact_src), @array_index,
|
227
|
+
ctb, iai, as_code(di.to_compact_src), @append_element)
|
228
|
+
end
|
229
|
+
|
230
|
+
def method_missing(methodId, *args)
|
231
|
+
begin
|
232
|
+
@chained_treebuilder.send(methodId, *args)
|
233
|
+
rescue Exception
|
234
|
+
super
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
protected
|
239
|
+
|
240
|
+
def create_tree_basic(childrenValues)
|
241
|
+
if @indices.length > 0
|
242
|
+
array_element = childrenValues.values_at(*@indices)
|
243
|
+
else
|
244
|
+
array_element = []
|
245
|
+
end
|
246
|
+
if @array_index and childrenValues[@array_index].kind_of?(ArrayNode)
|
247
|
+
array_element = array_element[0] if indices.length == 1
|
248
|
+
an = childrenValues[@array_index].deep_copy
|
249
|
+
an.add_value(array_element, @append_element)
|
250
|
+
return an
|
251
|
+
else
|
252
|
+
array_element = [array_element] if indices.length > 1
|
253
|
+
return ArrayNode.new(array_element)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# A GroupingSyntaxTreeBuilder is a transform that alters the children values
|
259
|
+
# and then passes them on to the another STB that creates the tree. Needed for
|
260
|
+
# denormalization of grouped elements.
|
261
|
+
class GroupingSyntaxTreeBuilder < SyntaxTreeBuilder
|
262
|
+
attr_reader :range, :chained_builder
|
263
|
+
|
264
|
+
def GroupingSyntaxTreeBuilder.new(startIndex, endIndex, syntaxTreeBuilder)
|
265
|
+
if startIndex == endIndex
|
266
|
+
syntaxTreeBuilder
|
267
|
+
else
|
268
|
+
super(startIndex, endIndex, syntaxTreeBuilder)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
def copy
|
273
|
+
GroupingSyntaxTreeBuilder.new(@range.first, @range.last,
|
274
|
+
@chained_builder.copy)
|
275
|
+
end
|
276
|
+
|
277
|
+
def initialize(startIndex, endIndex, syntaxTreeBuilder)
|
278
|
+
@range, @chained_builder = (startIndex..endIndex), syntaxTreeBuilder
|
279
|
+
end
|
280
|
+
|
281
|
+
def create_tree(childrenValues)
|
282
|
+
childrenValues[@range] = [childrenValues[@range]]
|
283
|
+
@chained_builder.create_tree childrenValues
|
284
|
+
end
|
285
|
+
|
286
|
+
def ==(other)
|
287
|
+
other.class == self.class and
|
288
|
+
other.range == range and other.chained_builder == chained_builder
|
289
|
+
end
|
290
|
+
|
291
|
+
def to_src(name = nil, nameHash = {})
|
292
|
+
new_of_my_type(@range.first, @range.last, @chained_builder)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# Nodes in a syntax tree have a name and can have childrens. The childrens
|
297
|
+
# are also named. The childrens can be accessed as methods with their name
|
298
|
+
# or via [] with their name or order number. Syntax tree nodes can also have
|
299
|
+
# attributes in a hash.
|
300
|
+
class SyntaxTree
|
301
|
+
attr_reader :attributes, :name, :childrens, :children_names
|
302
|
+
attr_reader :raw_src # ywesee
|
303
|
+
|
304
|
+
def initialize(name, childrenNames = [], childrens = [])
|
305
|
+
@name = name
|
306
|
+
unless childrenNames.length == childrens.length
|
307
|
+
raise ArgumentError, "All childrens must be given a name"
|
308
|
+
end
|
309
|
+
@children_names, @childrens = childrenNames, childrens
|
310
|
+
@attributes = Hash.new
|
311
|
+
end
|
312
|
+
|
313
|
+
def ==(other)
|
314
|
+
other.class == self.class and
|
315
|
+
other.name == name and
|
316
|
+
other.children_names == children_names and
|
317
|
+
other.childrens == childrens
|
318
|
+
end
|
319
|
+
|
320
|
+
def to_graph
|
321
|
+
syntaxtree_as_dot_digraph(self)
|
322
|
+
end
|
323
|
+
|
324
|
+
def [](anIntegerOrChildrenName)
|
325
|
+
if anIntegerOrChildrenName.kind_of?(Integer)
|
326
|
+
@childrens[anIntegerOrChildrenName]
|
327
|
+
else
|
328
|
+
child_with_name(anIntegerOrChildrenName)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def method_missing(methodId, *args, &block)
|
333
|
+
begin
|
334
|
+
child_with_name(methodId.id2name)
|
335
|
+
rescue ArgumentError
|
336
|
+
if @childrens.respond_to?(methodId)
|
337
|
+
@childrens.send(methodId, *args, &block)
|
338
|
+
else
|
339
|
+
super
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# ywesee
|
345
|
+
def raw_src=(str)
|
346
|
+
@raw_src = str.gsub(/\s+/n, ' ') if str
|
347
|
+
end
|
348
|
+
|
349
|
+
# (recursively) delete childrens whose name is "_" or nil
|
350
|
+
def compact!
|
351
|
+
new_childrens, new_children_names = Array.new, Array.new
|
352
|
+
@children_names.each_with_index do |childname, i|
|
353
|
+
if childname != "_" and childname != nil
|
354
|
+
new_childrens.push @childrens[i]
|
355
|
+
new_children_names.push @children_names[i]
|
356
|
+
end
|
357
|
+
end
|
358
|
+
@childrens, @children_names = new_childrens, new_children_names
|
359
|
+
compact_childrens
|
360
|
+
self
|
361
|
+
end
|
362
|
+
|
363
|
+
def compact_childrens
|
364
|
+
@childrens.each {|c| c.compact! if c.kind_of?(SyntaxTree)}
|
365
|
+
end
|
366
|
+
|
367
|
+
# Depth first node visiting
|
368
|
+
def each_node(&b)
|
369
|
+
@childrens.each {|c| c.each_node(&b) if c.kind_of?(SyntaxTree)}
|
370
|
+
b.call(self)
|
371
|
+
end
|
372
|
+
alias each_depth_first each_node
|
373
|
+
|
374
|
+
# Breadt first node visiting
|
375
|
+
def each_breadth_first(&b)
|
376
|
+
b.call(self)
|
377
|
+
@childrens.each {|c| c.each_node(&b) if c.kind_of?(SyntaxTree)}
|
378
|
+
end
|
379
|
+
|
380
|
+
# Compact inspect without newlines
|
381
|
+
def inspect_compact
|
382
|
+
if ["lexeme", "value"].sort == @children_names.sort
|
383
|
+
return @childrens[0].inspect
|
384
|
+
end
|
385
|
+
str = "#{name}"
|
386
|
+
if @childrens.length > 0
|
387
|
+
str += ":["
|
388
|
+
@childrens.each_with_index do |child, i|
|
389
|
+
str += "," if i > 0
|
390
|
+
str += child.inspect
|
391
|
+
end
|
392
|
+
str += "]"
|
393
|
+
end
|
394
|
+
str
|
395
|
+
end
|
396
|
+
alias inspect inspect_compact
|
397
|
+
|
398
|
+
# Print as multi-line string with children indented
|
399
|
+
def inspect_multi(indentLevel = 0)
|
400
|
+
str = "#{@name}"
|
401
|
+
@childrens.each_with_index do |child, i|
|
402
|
+
str += "\n" + (" " * (indentLevel+1)) + @children_names[i] + ": "
|
403
|
+
str += child.kind_of?(SyntaxTree) ? child.inspect_multi(indentLevel+1) :
|
404
|
+
child.inspect
|
405
|
+
#str += "\n"
|
406
|
+
end
|
407
|
+
str
|
408
|
+
end
|
409
|
+
|
410
|
+
protected
|
411
|
+
|
412
|
+
def child_with_name(anObject)
|
413
|
+
child_index = @children_names.index(anObject)
|
414
|
+
if child_index
|
415
|
+
@childrens[child_index]
|
416
|
+
else
|
417
|
+
raise ArgumentError, "There is no child named #{anObject.inspect}"
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
class ArrayNode < SyntaxTree
|
423
|
+
attr_reader :attributes, :name, :childrens
|
424
|
+
|
425
|
+
def initialize(values)
|
426
|
+
super("_ArrayNode", value_names(values), values)
|
427
|
+
end
|
428
|
+
|
429
|
+
def deep_copy
|
430
|
+
ArrayNode.new(@childrens.clone)
|
431
|
+
end
|
432
|
+
|
433
|
+
def as_a
|
434
|
+
@childrens
|
435
|
+
end
|
436
|
+
|
437
|
+
def add_value(value, append = false)
|
438
|
+
if append
|
439
|
+
@childrens.push value
|
440
|
+
@children_names.push "c#{@childrens.length}"
|
441
|
+
else
|
442
|
+
@childrens.unshift value
|
443
|
+
@children_names.unshift "c#{@childrens.length}"
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
protected
|
448
|
+
|
449
|
+
def value_names(values)
|
450
|
+
(1..values.length).to_a.map {|v| "c#{v}"}
|
451
|
+
end
|
452
|
+
end
|