rpdf2txt 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/LICENCE +515 -0
- data/Manifest.txt +126 -0
- data/README.txt +30 -0
- data/Rakefile +24 -0
- data/bin/rpdf2txt +58 -0
- data/config.save +12 -0
- data/install.rb +1098 -0
- data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
- data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
- data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
- data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
- data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
- data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
- data/lib/rpdf2txt-rockit/grammar.rb +644 -0
- data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
- data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
- data/lib/rpdf2txt-rockit/indexable.rb +53 -0
- data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
- data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
- data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
- data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
- data/lib/rpdf2txt-rockit/profiler.rb +168 -0
- data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
- data/lib/rpdf2txt-rockit/rockit.rb +76 -0
- data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
- data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
- data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
- data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
- data/lib/rpdf2txt-rockit/token.rb +364 -0
- data/lib/rpdf2txt-rockit/version.rb +3 -0
- data/lib/rpdf2txt/attributesparser.rb +42 -0
- data/lib/rpdf2txt/cmapparser.rb +65 -0
- data/lib/rpdf2txt/data/_cmap.grammar +11 -0
- data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/cmap.grammar +11 -0
- data/lib/rpdf2txt/data/cmap.rb +37 -0
- data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/cmap_range.rb +43 -0
- data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
- data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
- data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
- data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
- data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
- data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
- data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
- data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
- data/lib/rpdf2txt/data/pdftext.grammar +102 -0
- data/lib/rpdf2txt/data/pdftext.rb +146 -0
- data/lib/rpdf2txt/default_handler.rb +352 -0
- data/lib/rpdf2txt/lzw.rb +69 -0
- data/lib/rpdf2txt/object.rb +1114 -0
- data/lib/rpdf2txt/parser.rb +169 -0
- data/lib/rpdf2txt/symbol.rb +408 -0
- data/lib/rpdf2txt/text.rb +182 -0
- data/lib/rpdf2txt/text_state.rb +434 -0
- data/lib/rpdf2txt/textparser.rb +42 -0
- data/test/data/3392_obj +0 -0
- data/test/data/397_decrypted +15 -0
- data/test/data/450_decrypted +153 -0
- data/test/data/450_obj +0 -0
- data/test/data/452_decrypted +125 -0
- data/test/data/454_decrypted +108 -0
- data/test/data/456_decrypted +106 -0
- data/test/data/458_decrypted +111 -0
- data/test/data/458_obj +0 -0
- data/test/data/460_decrypted +118 -0
- data/test/data/460_obj +0 -0
- data/test/data/463_decrypted +117 -0
- data/test/data/465_decrypted +107 -0
- data/test/data/465_obj +0 -0
- data/test/data/90_obj +0 -0
- data/test/data/90_obj_comp +1 -0
- data/test/data/decrypted +0 -0
- data/test/data/encrypt_obj +0 -0
- data/test/data/encrypt_string +0 -0
- data/test/data/encrypt_string_128bit +0 -0
- data/test/data/encrypted_object_stream.pdf +0 -0
- data/test/data/firststream +1 -0
- data/test/data/index.pdfobj +0 -0
- data/test/data/index_2bit.pdfobj +0 -0
- data/test/data/index_masked.pdfobj +0 -0
- data/test/data/indexed.pdfobj +0 -0
- data/test/data/indexed_2bit.pdfobj +0 -0
- data/test/data/indexed_masked.pdfobj +0 -0
- data/test/data/inline.png +0 -0
- data/test/data/logo.png +0 -0
- data/test/data/lzw.pdfobj +0 -0
- data/test/data/lzw_index.pdfobj +0 -0
- data/test/data/page_tree.pdf +148 -0
- data/test/data/pdf_20.png +0 -0
- data/test/data/pdf_21.png +0 -0
- data/test/data/pdf_22.png +0 -0
- data/test/data/pdf_50.png +0 -0
- data/test/data/png.pdfobj +0 -0
- data/test/data/space_bug_stream.txt +119 -0
- data/test/data/stream.txt +292 -0
- data/test/data/stream_kerning_bug.txt +13 -0
- data/test/data/stream_kerning_bug2.txt +6 -0
- data/test/data/test.pdf +0 -0
- data/test/data/test.txt +8 -0
- data/test/data/test_text.txt +42 -0
- data/test/data/working_obj +0 -0
- data/test/data/working_obj2 +0 -0
- data/test/mock.rb +149 -0
- data/test/suite.rb +30 -0
- data/test/test_pdf_object.rb +1802 -0
- data/test/test_pdf_parser.rb +1340 -0
- data/test/test_pdf_text.rb +789 -0
- data/test/test_space_bug_05_2004.rb +87 -0
- data/test/test_stream.rb +194 -0
- data/test/test_text_state.rb +315 -0
- data/usage-en.txt +112 -0
- data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
- data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
- metadata +220 -0
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
begin
|
|
2
|
+
require 'strscan'
|
|
3
|
+
$USING_STRSCAN = true
|
|
4
|
+
rescue LoadError
|
|
5
|
+
$USING_STRSCAN = false
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
unless $USING_STRSCAN
|
|
9
|
+
# Simple substitution for strscan in Ruby for users that lack strscan.
|
|
10
|
+
#
|
|
11
|
+
class StringScanner
|
|
12
|
+
attr_accessor :pointer
|
|
13
|
+
|
|
14
|
+
def initialize(string)
|
|
15
|
+
@orig, @pointer = string, 0
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def string
|
|
19
|
+
@orig
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def rest
|
|
23
|
+
@orig[pointer..-1]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def rest?
|
|
27
|
+
rest.length > 0
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def check(regexp)
|
|
31
|
+
apply_regexp(regexp, false)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def scan(regexp)
|
|
35
|
+
apply_regexp(regexp, true)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def advance_pointer(matchData)
|
|
41
|
+
@pointer += matchData[0].length
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def apply_regexp(regexp, advancePointer)
|
|
45
|
+
md = regexp.match(rest)
|
|
46
|
+
if md && md[0] && (md.begin(0) == 0)
|
|
47
|
+
advance_pointer(md) if advancePointer
|
|
48
|
+
return md[0]
|
|
49
|
+
else
|
|
50
|
+
return nil
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
require 'rpdf2txt-rockit/sourcecode_dumpable'
|
|
2
|
+
require 'rpdf2txt-rockit/graphdrawing'
|
|
3
|
+
require 'rpdf2txt-rockit/base_extensions'
|
|
4
|
+
|
|
5
|
+
# Base class for objects that build the syntax tree. Each has a name of
|
|
6
|
+
# the node they build, a list of names of the children and a list of the
|
|
7
|
+
# childrens that are inactive. No value will be specified for inactive
|
|
8
|
+
# childrens even though their names are still available for the built node.
|
|
9
|
+
# Children named "_" are semi-inactive; values for the need to be specified
|
|
10
|
+
# when creating trees but the will be deleted when the tree is compacted.
|
|
11
|
+
# If a tree is not compacted their value can be accessed via their index
|
|
12
|
+
# number.
|
|
13
|
+
#
|
|
14
|
+
class SyntaxTreeBuilder
|
|
15
|
+
include SourceCodeDumpable
|
|
16
|
+
attr_accessor :node_name
|
|
17
|
+
attr_reader :children_names, :inactive_children_indices
|
|
18
|
+
|
|
19
|
+
def initialize(nodeName, childrenNames, inactiveChildrenIndices = [])
|
|
20
|
+
@node_name = nodeName.to_s
|
|
21
|
+
@inactive_children_indices = inactiveChildrenIndices.sort
|
|
22
|
+
init_children_names(childrenNames)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def init_children_names(childrenNames)
|
|
26
|
+
@children_names = Array.new
|
|
27
|
+
childrenNames.each do |name|
|
|
28
|
+
name = name.to_s
|
|
29
|
+
if @children_names.include?(name) and name != "_"
|
|
30
|
+
raise ArgumentError,
|
|
31
|
+
"there are duplicates in the children names #{childrenNames.inspect}"
|
|
32
|
+
else
|
|
33
|
+
@children_names.push name
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
protected :init_children_names
|
|
38
|
+
|
|
39
|
+
def copy
|
|
40
|
+
SyntaxTreeBuilder.new(node_name, children_names, inactive_children_indices)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def active_childrens
|
|
44
|
+
inactive = inactive_childrens
|
|
45
|
+
children_names.select {|c| not inactive.include?(c)}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def inactive_childrens
|
|
49
|
+
children_names.values_at(*@inactive_children_indices)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def inactivate_child(anIntegerOrString)
|
|
53
|
+
position, child = position_and_child(anIntegerOrString)
|
|
54
|
+
@inactive_children_indices.push(position)
|
|
55
|
+
@inactive_children_indices.uniq!
|
|
56
|
+
@inactive_children_indices.sort!
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def position_and_child(anIntegerOrString)
|
|
60
|
+
if anIntegerOrString.kind_of?(Integer)
|
|
61
|
+
# Position is among currently active childrens
|
|
62
|
+
child = active_childrens[anIntegerOrString]
|
|
63
|
+
position = children_names.index(child)
|
|
64
|
+
return position, child
|
|
65
|
+
else
|
|
66
|
+
return children_names.index(anIntegerOrString), anIntegerOrString
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
protected :position_and_child
|
|
70
|
+
|
|
71
|
+
def activate_child(anIntegerOrString)
|
|
72
|
+
position, child = position_and_child(anIntegerOrString)
|
|
73
|
+
@inactive_children_indices.delete position
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def ==(other)
|
|
77
|
+
other.class == self.class and
|
|
78
|
+
other.node_name == node_name and
|
|
79
|
+
other.children_names == children_names and
|
|
80
|
+
other.inactive_children_indices == inactive_children_indices
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def create_tree(childrenValues)
|
|
84
|
+
childrenValues = insert_nil_for_inactive_children(childrenValues)
|
|
85
|
+
SyntaxTree.new(node_name, children_names, childrenValues)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def insert_nil_for_inactive_children(children)
|
|
89
|
+
new_children, count = Array.new, 0
|
|
90
|
+
@children_names.each_with_index do |child_name, i|
|
|
91
|
+
if @inactive_children_indices.include?(i)
|
|
92
|
+
new_children.push nil
|
|
93
|
+
else
|
|
94
|
+
new_children.push children[count]
|
|
95
|
+
count += 1
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
new_children
|
|
99
|
+
end
|
|
100
|
+
protected :insert_nil_for_inactive_children
|
|
101
|
+
|
|
102
|
+
def to_src(name = nil, nameHash = {})
|
|
103
|
+
assign_to(name,
|
|
104
|
+
new_of_my_type(node_name,
|
|
105
|
+
as_code(children_names.to_compact_src),
|
|
106
|
+
as_code(inactive_children_indices.to_compact_src)))
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
class LiftingSyntaxTreeBuilder < SyntaxTreeBuilder
|
|
111
|
+
def initialize(childrenNames, inactiveChildrenIndices = [])
|
|
112
|
+
super("^", childrenNames, inactiveChildrenIndices)
|
|
113
|
+
@child_to_lift_index =
|
|
114
|
+
@children_names.index(@children_names.detect {|n| n != "_"})
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def create_tree(childrenValues)
|
|
118
|
+
if @child_to_lift_index
|
|
119
|
+
return childrenValues[@child_to_lift_index]
|
|
120
|
+
else
|
|
121
|
+
# Use first children value which does not have a "lexeme" children
|
|
122
|
+
# ie. which represents a non-terminal
|
|
123
|
+
val = childrenValues.detect {|c| !c.children_names.include?("lexeme")}
|
|
124
|
+
val = childrenValues[0] unless val
|
|
125
|
+
return val
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def copy
|
|
130
|
+
LiftingSyntaxTreeBuilder.new(children_names, inactive_children_indices)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def to_src(name = nil, nameHash = {})
|
|
134
|
+
assign_to(name,
|
|
135
|
+
new_of_my_type(as_code(children_names.to_compact_src),
|
|
136
|
+
as_code(inactive_children_indices.to_compact_src)))
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Short hand SyntaxTreeBuilder creator
|
|
141
|
+
def stb(nodeName, childrenNames = [], inactiveChildren = [])
|
|
142
|
+
nodeName = nodeName.to_s
|
|
143
|
+
if nodeName == "^"
|
|
144
|
+
LiftingSyntaxTreeBuilder.new(childrenNames, inactiveChildren)
|
|
145
|
+
else
|
|
146
|
+
SyntaxTreeBuilder.new(nodeName, childrenNames, inactiveChildren)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Builder for arrays. You specify the indices for the children values
|
|
151
|
+
# that should be collected into an Array element and (optionally)
|
|
152
|
+
# give the index to an Array to append the element to.
|
|
153
|
+
class ArrayNodeBuilder
|
|
154
|
+
include SourceCodeDumpable
|
|
155
|
+
attr_reader :indices, :array_index
|
|
156
|
+
attr_accessor :append_element, :shifting_insert
|
|
157
|
+
|
|
158
|
+
def initialize(indices = [], arrayIndex = nil,
|
|
159
|
+
chainedTreeBuilder = nil, insertAtIndex = nil,
|
|
160
|
+
deleteIndices = [], append_element = true)
|
|
161
|
+
@indices, @array_index = indices, arrayIndex
|
|
162
|
+
@append_element, @shifting_insert = append_element, false
|
|
163
|
+
chainedTreeBuilder = SyntaxTreeBuilder.new("ArrayNode", []) unless chainedTreeBuilder
|
|
164
|
+
chain_treebuilder(chainedTreeBuilder, insertAtIndex, deleteIndices)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def copy
|
|
168
|
+
n = ArrayNodeBuilder.new(@indices, @array_index, @chained_treebuilder.copy,
|
|
169
|
+
@insert_at_index, @delete_indices)
|
|
170
|
+
n.shifting_insert = @shifting_insert
|
|
171
|
+
n
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def create_tree(childrenValues)
|
|
175
|
+
tree = create_tree_basic(childrenValues)
|
|
176
|
+
if @chained_treebuilder and @insert_at_index
|
|
177
|
+
if @shifting_insert
|
|
178
|
+
childrenValues[@insert_at_index, 0] = tree
|
|
179
|
+
else
|
|
180
|
+
childrenValues[@insert_at_index, 1] = tree
|
|
181
|
+
end
|
|
182
|
+
childrenValues = childrenValues.delete_at_indices(@delete_indices)
|
|
183
|
+
return @chained_treebuilder.create_tree(childrenValues)
|
|
184
|
+
else
|
|
185
|
+
return tree
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def inactivate_child(child)
|
|
190
|
+
if child.kind_of?(Integer) and
|
|
191
|
+
child >= @insert_at_index and @shifting_insert
|
|
192
|
+
@chained_treebuilder.inactivate_child(child+1)
|
|
193
|
+
else
|
|
194
|
+
@chained_treebuilder.inactivate_child(child)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
def activate_child(child)
|
|
199
|
+
if child.kind_of?(Integer) and
|
|
200
|
+
child >= @insert_at_index
|
|
201
|
+
@chained_treebuilder.activate_child(child+1)
|
|
202
|
+
else
|
|
203
|
+
@chained_treebuilder.activate_child(child)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def chain_treebuilder(chainedTreeBuilder, insertAtIndex, deleteIndices = [])
|
|
208
|
+
@chained_treebuilder = chainedTreeBuilder
|
|
209
|
+
@insert_at_index, @delete_indices = insertAtIndex, deleteIndices
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def ==(other)
|
|
213
|
+
other.class == self.class and
|
|
214
|
+
other.indices == indices and other.array_index == array_index and
|
|
215
|
+
(@insert_at_index ?
|
|
216
|
+
(other.instance_eval("@chained_treebuilder") == @chained_treebuilder and
|
|
217
|
+
other.instance_eval("@insert_at_index") == @insert_at_index and
|
|
218
|
+
other.instance_eval("@delete_indices") == @delete_indices) :
|
|
219
|
+
true) and
|
|
220
|
+
other.append_element == @append_element
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def to_src(name = nil, nameHash = {})
|
|
224
|
+
iai, di = @insert_at_index, @delete_indices
|
|
225
|
+
ctb = @insert_at_index ? @chained_treebuilder : nil
|
|
226
|
+
new_of_my_type(as_code(@indices.to_compact_src), @array_index,
|
|
227
|
+
ctb, iai, as_code(di.to_compact_src), @append_element)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def method_missing(methodId, *args)
|
|
231
|
+
begin
|
|
232
|
+
@chained_treebuilder.send(methodId, *args)
|
|
233
|
+
rescue Exception
|
|
234
|
+
super
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
protected
|
|
239
|
+
|
|
240
|
+
def create_tree_basic(childrenValues)
|
|
241
|
+
if @indices.length > 0
|
|
242
|
+
array_element = childrenValues.values_at(*@indices)
|
|
243
|
+
else
|
|
244
|
+
array_element = []
|
|
245
|
+
end
|
|
246
|
+
if @array_index and childrenValues[@array_index].kind_of?(ArrayNode)
|
|
247
|
+
array_element = array_element[0] if indices.length == 1
|
|
248
|
+
an = childrenValues[@array_index].deep_copy
|
|
249
|
+
an.add_value(array_element, @append_element)
|
|
250
|
+
return an
|
|
251
|
+
else
|
|
252
|
+
array_element = [array_element] if indices.length > 1
|
|
253
|
+
return ArrayNode.new(array_element)
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# A GroupingSyntaxTreeBuilder is a transform that alters the children values
|
|
259
|
+
# and then passes them on to the another STB that creates the tree. Needed for
|
|
260
|
+
# denormalization of grouped elements.
|
|
261
|
+
class GroupingSyntaxTreeBuilder < SyntaxTreeBuilder
|
|
262
|
+
attr_reader :range, :chained_builder
|
|
263
|
+
|
|
264
|
+
def GroupingSyntaxTreeBuilder.new(startIndex, endIndex, syntaxTreeBuilder)
|
|
265
|
+
if startIndex == endIndex
|
|
266
|
+
syntaxTreeBuilder
|
|
267
|
+
else
|
|
268
|
+
super(startIndex, endIndex, syntaxTreeBuilder)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def copy
|
|
273
|
+
GroupingSyntaxTreeBuilder.new(@range.first, @range.last,
|
|
274
|
+
@chained_builder.copy)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
def initialize(startIndex, endIndex, syntaxTreeBuilder)
|
|
278
|
+
@range, @chained_builder = (startIndex..endIndex), syntaxTreeBuilder
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def create_tree(childrenValues)
|
|
282
|
+
childrenValues[@range] = [childrenValues[@range]]
|
|
283
|
+
@chained_builder.create_tree childrenValues
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def ==(other)
|
|
287
|
+
other.class == self.class and
|
|
288
|
+
other.range == range and other.chained_builder == chained_builder
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def to_src(name = nil, nameHash = {})
|
|
292
|
+
new_of_my_type(@range.first, @range.last, @chained_builder)
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Nodes in a syntax tree have a name and can have childrens. The childrens
|
|
297
|
+
# are also named. The childrens can be accessed as methods with their name
|
|
298
|
+
# or via [] with their name or order number. Syntax tree nodes can also have
|
|
299
|
+
# attributes in a hash.
|
|
300
|
+
class SyntaxTree
|
|
301
|
+
attr_reader :attributes, :name, :childrens, :children_names
|
|
302
|
+
attr_reader :raw_src # ywesee
|
|
303
|
+
|
|
304
|
+
def initialize(name, childrenNames = [], childrens = [])
|
|
305
|
+
@name = name
|
|
306
|
+
unless childrenNames.length == childrens.length
|
|
307
|
+
raise ArgumentError, "All childrens must be given a name"
|
|
308
|
+
end
|
|
309
|
+
@children_names, @childrens = childrenNames, childrens
|
|
310
|
+
@attributes = Hash.new
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
def ==(other)
|
|
314
|
+
other.class == self.class and
|
|
315
|
+
other.name == name and
|
|
316
|
+
other.children_names == children_names and
|
|
317
|
+
other.childrens == childrens
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def to_graph
|
|
321
|
+
syntaxtree_as_dot_digraph(self)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def [](anIntegerOrChildrenName)
|
|
325
|
+
if anIntegerOrChildrenName.kind_of?(Integer)
|
|
326
|
+
@childrens[anIntegerOrChildrenName]
|
|
327
|
+
else
|
|
328
|
+
child_with_name(anIntegerOrChildrenName)
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def method_missing(methodId, *args, &block)
|
|
333
|
+
begin
|
|
334
|
+
child_with_name(methodId.id2name)
|
|
335
|
+
rescue ArgumentError
|
|
336
|
+
if @childrens.respond_to?(methodId)
|
|
337
|
+
@childrens.send(methodId, *args, &block)
|
|
338
|
+
else
|
|
339
|
+
super
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# ywesee
|
|
345
|
+
def raw_src=(str)
|
|
346
|
+
@raw_src = str.gsub(/\s+/n, ' ') if str
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# (recursively) delete childrens whose name is "_" or nil
|
|
350
|
+
def compact!
|
|
351
|
+
new_childrens, new_children_names = Array.new, Array.new
|
|
352
|
+
@children_names.each_with_index do |childname, i|
|
|
353
|
+
if childname != "_" and childname != nil
|
|
354
|
+
new_childrens.push @childrens[i]
|
|
355
|
+
new_children_names.push @children_names[i]
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
@childrens, @children_names = new_childrens, new_children_names
|
|
359
|
+
compact_childrens
|
|
360
|
+
self
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def compact_childrens
|
|
364
|
+
@childrens.each {|c| c.compact! if c.kind_of?(SyntaxTree)}
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Depth first node visiting
|
|
368
|
+
def each_node(&b)
|
|
369
|
+
@childrens.each {|c| c.each_node(&b) if c.kind_of?(SyntaxTree)}
|
|
370
|
+
b.call(self)
|
|
371
|
+
end
|
|
372
|
+
alias each_depth_first each_node
|
|
373
|
+
|
|
374
|
+
# Breadt first node visiting
|
|
375
|
+
def each_breadth_first(&b)
|
|
376
|
+
b.call(self)
|
|
377
|
+
@childrens.each {|c| c.each_node(&b) if c.kind_of?(SyntaxTree)}
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Compact inspect without newlines
|
|
381
|
+
def inspect_compact
|
|
382
|
+
if ["lexeme", "value"].sort == @children_names.sort
|
|
383
|
+
return @childrens[0].inspect
|
|
384
|
+
end
|
|
385
|
+
str = "#{name}"
|
|
386
|
+
if @childrens.length > 0
|
|
387
|
+
str += ":["
|
|
388
|
+
@childrens.each_with_index do |child, i|
|
|
389
|
+
str += "," if i > 0
|
|
390
|
+
str += child.inspect
|
|
391
|
+
end
|
|
392
|
+
str += "]"
|
|
393
|
+
end
|
|
394
|
+
str
|
|
395
|
+
end
|
|
396
|
+
alias inspect inspect_compact
|
|
397
|
+
|
|
398
|
+
# Print as multi-line string with children indented
|
|
399
|
+
def inspect_multi(indentLevel = 0)
|
|
400
|
+
str = "#{@name}"
|
|
401
|
+
@childrens.each_with_index do |child, i|
|
|
402
|
+
str += "\n" + (" " * (indentLevel+1)) + @children_names[i] + ": "
|
|
403
|
+
str += child.kind_of?(SyntaxTree) ? child.inspect_multi(indentLevel+1) :
|
|
404
|
+
child.inspect
|
|
405
|
+
#str += "\n"
|
|
406
|
+
end
|
|
407
|
+
str
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
protected
|
|
411
|
+
|
|
412
|
+
def child_with_name(anObject)
|
|
413
|
+
child_index = @children_names.index(anObject)
|
|
414
|
+
if child_index
|
|
415
|
+
@childrens[child_index]
|
|
416
|
+
else
|
|
417
|
+
raise ArgumentError, "There is no child named #{anObject.inspect}"
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
class ArrayNode < SyntaxTree
|
|
423
|
+
attr_reader :attributes, :name, :childrens
|
|
424
|
+
|
|
425
|
+
def initialize(values)
|
|
426
|
+
super("_ArrayNode", value_names(values), values)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def deep_copy
|
|
430
|
+
ArrayNode.new(@childrens.clone)
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def as_a
|
|
434
|
+
@childrens
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
def add_value(value, append = false)
|
|
438
|
+
if append
|
|
439
|
+
@childrens.push value
|
|
440
|
+
@children_names.push "c#{@childrens.length}"
|
|
441
|
+
else
|
|
442
|
+
@childrens.unshift value
|
|
443
|
+
@children_names.unshift "c#{@childrens.length}"
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
protected
|
|
448
|
+
|
|
449
|
+
def value_names(values)
|
|
450
|
+
(1..values.length).to_a.map {|v| "c#{v}"}
|
|
451
|
+
end
|
|
452
|
+
end
|