rpdf2txt 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/LICENCE +515 -0
- data/Manifest.txt +126 -0
- data/README.txt +30 -0
- data/Rakefile +24 -0
- data/bin/rpdf2txt +58 -0
- data/config.save +12 -0
- data/install.rb +1098 -0
- data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
- data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
- data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
- data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
- data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
- data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
- data/lib/rpdf2txt-rockit/grammar.rb +644 -0
- data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
- data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
- data/lib/rpdf2txt-rockit/indexable.rb +53 -0
- data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
- data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
- data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
- data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
- data/lib/rpdf2txt-rockit/profiler.rb +168 -0
- data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
- data/lib/rpdf2txt-rockit/rockit.rb +76 -0
- data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
- data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
- data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
- data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
- data/lib/rpdf2txt-rockit/token.rb +364 -0
- data/lib/rpdf2txt-rockit/version.rb +3 -0
- data/lib/rpdf2txt/attributesparser.rb +42 -0
- data/lib/rpdf2txt/cmapparser.rb +65 -0
- data/lib/rpdf2txt/data/_cmap.grammar +11 -0
- data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/cmap.grammar +11 -0
- data/lib/rpdf2txt/data/cmap.rb +37 -0
- data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/cmap_range.rb +43 -0
- data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
- data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
- data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
- data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
- data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
- data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
- data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
- data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
- data/lib/rpdf2txt/data/pdftext.grammar +102 -0
- data/lib/rpdf2txt/data/pdftext.rb +146 -0
- data/lib/rpdf2txt/default_handler.rb +352 -0
- data/lib/rpdf2txt/lzw.rb +69 -0
- data/lib/rpdf2txt/object.rb +1114 -0
- data/lib/rpdf2txt/parser.rb +169 -0
- data/lib/rpdf2txt/symbol.rb +408 -0
- data/lib/rpdf2txt/text.rb +182 -0
- data/lib/rpdf2txt/text_state.rb +434 -0
- data/lib/rpdf2txt/textparser.rb +42 -0
- data/test/data/3392_obj +0 -0
- data/test/data/397_decrypted +15 -0
- data/test/data/450_decrypted +153 -0
- data/test/data/450_obj +0 -0
- data/test/data/452_decrypted +125 -0
- data/test/data/454_decrypted +108 -0
- data/test/data/456_decrypted +106 -0
- data/test/data/458_decrypted +111 -0
- data/test/data/458_obj +0 -0
- data/test/data/460_decrypted +118 -0
- data/test/data/460_obj +0 -0
- data/test/data/463_decrypted +117 -0
- data/test/data/465_decrypted +107 -0
- data/test/data/465_obj +0 -0
- data/test/data/90_obj +0 -0
- data/test/data/90_obj_comp +1 -0
- data/test/data/decrypted +0 -0
- data/test/data/encrypt_obj +0 -0
- data/test/data/encrypt_string +0 -0
- data/test/data/encrypt_string_128bit +0 -0
- data/test/data/encrypted_object_stream.pdf +0 -0
- data/test/data/firststream +1 -0
- data/test/data/index.pdfobj +0 -0
- data/test/data/index_2bit.pdfobj +0 -0
- data/test/data/index_masked.pdfobj +0 -0
- data/test/data/indexed.pdfobj +0 -0
- data/test/data/indexed_2bit.pdfobj +0 -0
- data/test/data/indexed_masked.pdfobj +0 -0
- data/test/data/inline.png +0 -0
- data/test/data/logo.png +0 -0
- data/test/data/lzw.pdfobj +0 -0
- data/test/data/lzw_index.pdfobj +0 -0
- data/test/data/page_tree.pdf +148 -0
- data/test/data/pdf_20.png +0 -0
- data/test/data/pdf_21.png +0 -0
- data/test/data/pdf_22.png +0 -0
- data/test/data/pdf_50.png +0 -0
- data/test/data/png.pdfobj +0 -0
- data/test/data/space_bug_stream.txt +119 -0
- data/test/data/stream.txt +292 -0
- data/test/data/stream_kerning_bug.txt +13 -0
- data/test/data/stream_kerning_bug2.txt +6 -0
- data/test/data/test.pdf +0 -0
- data/test/data/test.txt +8 -0
- data/test/data/test_text.txt +42 -0
- data/test/data/working_obj +0 -0
- data/test/data/working_obj2 +0 -0
- data/test/mock.rb +149 -0
- data/test/suite.rb +30 -0
- data/test/test_pdf_object.rb +1802 -0
- data/test/test_pdf_parser.rb +1340 -0
- data/test/test_pdf_text.rb +789 -0
- data/test/test_space_bug_05_2004.rb +87 -0
- data/test/test_stream.rb +194 -0
- data/test/test_text_state.rb +315 -0
- data/usage-en.txt +112 -0
- data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
- data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
- metadata +220 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Common classes used in many of the parse table generation classes.
|
|
2
|
+
#
|
|
3
|
+
require 'rpdf2txt-rockit/indexable'
|
|
4
|
+
require 'rpdf2txt-rockit/token'
|
|
5
|
+
require 'rpdf2txt-rockit/grammar'
|
|
6
|
+
|
|
7
|
+
class Item
|
|
8
|
+
include Indexable
|
|
9
|
+
attr_reader :symbol, :production, :position, :lookahead
|
|
10
|
+
|
|
11
|
+
def initialize(production, position, lookahead = nil, nextItem = nil)
|
|
12
|
+
@production, @position, @lookahead = production, position, lookahead
|
|
13
|
+
@symbol = production.elements[position]
|
|
14
|
+
@hash_value = [production, position, lookahead].hash
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def direct_following_symbols
|
|
18
|
+
@direct_following_symbols || (@direct_following_symbols = calc_followers)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def calc_followers
|
|
22
|
+
return @@empty_array if position >= production.elements.length-1
|
|
23
|
+
elements = production.elements[position+1..-1]
|
|
24
|
+
followers = Array.new
|
|
25
|
+
elements.each do |e|
|
|
26
|
+
followers.push e
|
|
27
|
+
return followers unless e.derives_epsilon?
|
|
28
|
+
end
|
|
29
|
+
followers
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
@@empty_array = Array.new
|
|
33
|
+
|
|
34
|
+
def suffix
|
|
35
|
+
if position < production.elements.length-1
|
|
36
|
+
production.elements[position+1..-1]
|
|
37
|
+
else
|
|
38
|
+
nil
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def final?
|
|
43
|
+
next_item == nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def next_item
|
|
47
|
+
@next_item || (@next_item = calc_next_item)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def lookahead_item(lookaheadSymbol)
|
|
51
|
+
make_new_item(production, position, lookaheadSymbol, next_item)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def inspect
|
|
55
|
+
endpos = ((@position >= production.elements.length) or
|
|
56
|
+
(@position == 0))
|
|
57
|
+
production.nonterminal.inspect + "->" +
|
|
58
|
+
inspect_elements(production.elements[0...@position]) +
|
|
59
|
+
(endpos ? "." : " .") +
|
|
60
|
+
inspect_elements(production.elements[@position..-1])
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
protected
|
|
64
|
+
|
|
65
|
+
def inspect_elements(elements)
|
|
66
|
+
elements ? elements.map {|e| e.inspect}.join(" ") : ""
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def make_new_item(*args)
|
|
70
|
+
@factory ? @factory.make(*args) : Item.new(*args)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def calc_next_item
|
|
74
|
+
if position < production.elements.length
|
|
75
|
+
make_new_item(production, position+1)
|
|
76
|
+
else
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
class LrState
|
|
83
|
+
include Indexable
|
|
84
|
+
attr_reader :kernel_items, :closure, :final_items
|
|
85
|
+
|
|
86
|
+
def initialize(kernelItems)
|
|
87
|
+
@kernel_items = kernelItems
|
|
88
|
+
calc_closure(kernelItems) # Do it lazily instead?
|
|
89
|
+
@final_items = kernelItems.select {|i| i.final?}
|
|
90
|
+
@reduce_state = @final_items.length > 0
|
|
91
|
+
@consistent = kernelItems.length == 1
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def reduce_state?
|
|
95
|
+
@reduce_state
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def consistent?
|
|
99
|
+
@consistent
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def length
|
|
103
|
+
@kernel_items.length
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def inspect
|
|
107
|
+
"State(#{@kernel_items.inspect})"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
protected
|
|
111
|
+
|
|
112
|
+
@@empty_array = Array.new
|
|
113
|
+
|
|
114
|
+
def calc_closure(itemset)
|
|
115
|
+
@closure, checked = itemset.clone, Hash.new
|
|
116
|
+
itemset.each {|i| checked = recursive_calc_closure(i.symbol, checked)}
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def recursive_calc_closure(symbol, checked)
|
|
120
|
+
return checked if !symbol or checked[symbol]
|
|
121
|
+
checked[symbol] = true
|
|
122
|
+
@closure.concat(new_items = symbol.nonkernel_items)
|
|
123
|
+
new_items.each {|i| checked = recursive_calc_closure(i.symbol, checked)}
|
|
124
|
+
checked
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# Decorate the grammar symbols with some additional info we're gonna need
|
|
129
|
+
class NonTerminal
|
|
130
|
+
include Indexable
|
|
131
|
+
attr_reader :nonkernel_items
|
|
132
|
+
def calc_nonkernel_items(grammar, itemFactory)
|
|
133
|
+
@nonkernel_items = Array.new
|
|
134
|
+
grammar.alternatives(self).each do |production|
|
|
135
|
+
@nonkernel_items.push itemFactory.make(production, 0)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def nonterminal?; true; end
|
|
140
|
+
def terminal?; false; end
|
|
141
|
+
|
|
142
|
+
attr_writer :derives_epsilon
|
|
143
|
+
|
|
144
|
+
def derives_epsilon?; @derives_epsilon; end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
class Token
|
|
148
|
+
include Indexable
|
|
149
|
+
def nonterminal?; false; end
|
|
150
|
+
def terminal?; true; end
|
|
151
|
+
|
|
152
|
+
@@empty_array = []
|
|
153
|
+
|
|
154
|
+
def nonkernel_items
|
|
155
|
+
@@empty_array
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def derives_epsilon?; false; end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
# Decorate Productions
|
|
162
|
+
class Production
|
|
163
|
+
include Indexable
|
|
164
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
require 'rpdf2txt-rockit/syntax_tree'
|
|
2
|
+
|
|
3
|
+
class AmbiguityNode < SyntaxTree
|
|
4
|
+
def initialize(alt1, alt2)
|
|
5
|
+
@ambigous_trees = [alt1, alt2]
|
|
6
|
+
super("_Ambiguity", ["ambigous_trees"], [@ambigous_trees])
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def add_ambigoustree(tree)
|
|
10
|
+
@ambigous_trees.push tree unless @ambigous_trees.include?(tree)
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class AmbigousParseException < Exception
|
|
15
|
+
attr_reader :alternatives, :substring
|
|
16
|
+
|
|
17
|
+
def initialize(stringBeingParsed, fullTree, *alternativeTrees)
|
|
18
|
+
super("Ambigous parse")
|
|
19
|
+
@alternatives, @full_tree = alternativeTrees, fullTree
|
|
20
|
+
init_substring(stringBeingParsed)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def inspect(prettyPrinter = nil)
|
|
24
|
+
str = "Ambiguity: The substring '#{substring}' can be parsed as:\n"
|
|
25
|
+
#return str + report_on_tree_differences(alternatives[0], alternatives[1], 0, 1)
|
|
26
|
+
strings_to_show, same_strings = Array.new, Array.new
|
|
27
|
+
alternatives.each_with_index {|alt, i|
|
|
28
|
+
s = prettyPrinter ? prettyPrinter.print(alt) : alt.inspect
|
|
29
|
+
if (j = strings_to_show.index(s))
|
|
30
|
+
same_strings.push [j,i]
|
|
31
|
+
end
|
|
32
|
+
strings_to_show.push s
|
|
33
|
+
}
|
|
34
|
+
alternatives.each_with_index do |alt,i|
|
|
35
|
+
str << " Alternative #{i+1}: #{strings_to_show[i]}"
|
|
36
|
+
str << ", or" if i < alternatives.length-1
|
|
37
|
+
str << "\n"
|
|
38
|
+
end
|
|
39
|
+
same_strings.each do |i,j|
|
|
40
|
+
str += report_on_tree_differences(alternatives[i], alternatives[j],
|
|
41
|
+
i, j)
|
|
42
|
+
end
|
|
43
|
+
@full_tree.compact!
|
|
44
|
+
str + "The full tree looks like:\n" + @full_tree.inspect
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def difference_description(i, j, str0, o1, o2, childPath = "")
|
|
48
|
+
child_str =
|
|
49
|
+
childPath.length > 0 ? "in the childrens '#{childPath[1..-1]}'" : ""
|
|
50
|
+
" Alternatives #{i+1} and #{j+1} differ #{child_str} by not having" +
|
|
51
|
+
" the same #{str0} (#{o1.inspect} and #{o2.inspect})"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def report_on_tree_differences(t1, t2, i, j, childPath = "")
|
|
55
|
+
if t1.class != t2.class
|
|
56
|
+
difference_description(i, j, "type", t1.class, t2.class, childPath)
|
|
57
|
+
elsif not t1.kind_of?(SyntaxTree)
|
|
58
|
+
if t1 != t2
|
|
59
|
+
" Alternatives #{i} and #{j} are not SyntaxTree's and differ"
|
|
60
|
+
else
|
|
61
|
+
""
|
|
62
|
+
end
|
|
63
|
+
elsif t1.name != t2.name
|
|
64
|
+
difference_description(i, j, "name", t1.name, t2.name, childPath)
|
|
65
|
+
elsif t1.children_names != t2.children_names
|
|
66
|
+
difference_description(i, j, "children_names",t1.children_names,
|
|
67
|
+
t2.children_names, childPath)
|
|
68
|
+
else
|
|
69
|
+
t1.childrens.each_with_index do |child, k|
|
|
70
|
+
report = report_on_tree_differences(child, t2[k], i, j,
|
|
71
|
+
childPath + "." +
|
|
72
|
+
t1.children_names[k])
|
|
73
|
+
return report if report.length > 0
|
|
74
|
+
end
|
|
75
|
+
return ""
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
protected
|
|
80
|
+
|
|
81
|
+
def init_substring(string)
|
|
82
|
+
@substring = string
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
unless TimesClass
|
|
2
|
+
TimesClass = ((RUBY_VERSION < "1.7") ? Time : Process)
|
|
3
|
+
end
|
|
4
|
+
|
|
5
|
+
module Profiler
|
|
6
|
+
@@start = TimesClass.times.utime
|
|
7
|
+
|
|
8
|
+
# Method invocation stack with one entry for each invocation:
|
|
9
|
+
# Time at entry, Total times in subfunction also being logged, MethodId
|
|
10
|
+
@@invocation_stack = [[0, 0, "#toplevel".intern]]
|
|
11
|
+
|
|
12
|
+
# One entry for each method: NumCalls, TotalTime, OnlyMyTime, Callers
|
|
13
|
+
@@map = {"#toplevel".intern => [1, 0, 0]}
|
|
14
|
+
|
|
15
|
+
# One entry for each method: hash mapping args.inspect to count
|
|
16
|
+
@@arguments = Hash.new
|
|
17
|
+
|
|
18
|
+
@@time_limit = 5 * 60
|
|
19
|
+
|
|
20
|
+
def start(timeLimitInMinutes = nil)
|
|
21
|
+
if timeLimitInMinutes
|
|
22
|
+
@@time_limit = timeLimitInMinutes * 60
|
|
23
|
+
else
|
|
24
|
+
@@time_limit = nil
|
|
25
|
+
end
|
|
26
|
+
@@start = Float(TimesClass.times.utime)
|
|
27
|
+
end
|
|
28
|
+
module_function :start
|
|
29
|
+
|
|
30
|
+
def __enter__(method, *args)
|
|
31
|
+
now = TimesClass.times.utime
|
|
32
|
+
if @@time_limit
|
|
33
|
+
if now - @@start > @@time_limit
|
|
34
|
+
STDERR.puts "Profiling time limit violated. Run terminated."
|
|
35
|
+
STDERR.puts profile_summary(true, true)
|
|
36
|
+
exit(-1)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
@@invocation_stack.push [now, 0.0, method]
|
|
40
|
+
begin
|
|
41
|
+
@@arguments[method][args.inspect] += 1
|
|
42
|
+
rescue Exception
|
|
43
|
+
@@arguments[method] = Hash.new(0)
|
|
44
|
+
retry
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
module_function :__enter__
|
|
48
|
+
|
|
49
|
+
def __leave__(method, *retargs)
|
|
50
|
+
now = TimesClass.times.utime
|
|
51
|
+
tick, data = @@invocation_stack.pop, @@map[method]
|
|
52
|
+
unless data
|
|
53
|
+
data = [0.0, 0.0, 0.0, Hash.new(0)]
|
|
54
|
+
@@map[method] = data
|
|
55
|
+
end
|
|
56
|
+
data[0] += 1
|
|
57
|
+
total_time_this_invocation = now - tick[0]
|
|
58
|
+
data[1] += total_time_this_invocation
|
|
59
|
+
data[2] += total_time_this_invocation - tick[1]
|
|
60
|
+
data[3][caller[1]] += 1
|
|
61
|
+
@@invocation_stack[-1][1] += total_time_this_invocation
|
|
62
|
+
return *retargs
|
|
63
|
+
end
|
|
64
|
+
module_function :__leave__
|
|
65
|
+
|
|
66
|
+
# Go through the invocation stack and leave all methods.
|
|
67
|
+
def unwind_invocation_stack
|
|
68
|
+
while @@invocation_stack.length > 1
|
|
69
|
+
__leave__(@@invocation_stack.pop[2])
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
module_function :unwind_invocation_stack
|
|
73
|
+
|
|
74
|
+
def Profiler.profile_summary(writeCallers = false, writeArguments = false)
|
|
75
|
+
total_elapsed = TimesClass.times.utime - @@start
|
|
76
|
+
str = "Profiling summary\n"
|
|
77
|
+
str += "*****************\n"
|
|
78
|
+
str += "Total elapsed time: #{total_elapsed} seconds\n"
|
|
79
|
+
unwind_invocation_stack if @@invocation_stack.length > 1
|
|
80
|
+
total = @@invocation_stack.last[1]
|
|
81
|
+
time_in_nonprofiled = total_elapsed - total
|
|
82
|
+
str += "Time spent in non-profiled methods: #{time_in_nonprofiled} sec\n"
|
|
83
|
+
str += "Time in profiled methods:\n"
|
|
84
|
+
if total == 0 then total = 0.01 end
|
|
85
|
+
@@map["#toplevel".intern][1] = total
|
|
86
|
+
data = @@map.to_a.sort{|a,b| b[1][2] <=> a[1][2]}
|
|
87
|
+
sum = 0
|
|
88
|
+
str += " %% cumulative self self total\n"
|
|
89
|
+
str += " time seconds seconds calls ms/call ms/call name\n"
|
|
90
|
+
str += " ---------------------------------------------------------\n"
|
|
91
|
+
for d in data
|
|
92
|
+
method = d[0]
|
|
93
|
+
next if method == "#toplevel".intern
|
|
94
|
+
d = d[1]
|
|
95
|
+
sum += d[2]
|
|
96
|
+
str += "%6.2f %8.2f %8.2f %8d " % [d[2]/total*100, sum, d[2], d[0]]
|
|
97
|
+
str += "%8.2f %8.2f %s\n" % [d[2]*1000/d[0], d[1]*1000/d[0],
|
|
98
|
+
method.id2name]
|
|
99
|
+
if writeCallers
|
|
100
|
+
str += " Call sites:\n"
|
|
101
|
+
d[3].to_a.sort {|a,b| b[1] <=> a[1]}.each do |callersite, count|
|
|
102
|
+
str += " #{count}: " + callersite.split("/").last + "\n"
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
if writeArguments and d[0] > 1
|
|
106
|
+
str += " Arguments:\n"
|
|
107
|
+
counts, num_prev_seen = Hash.new(0), 0
|
|
108
|
+
@@arguments[method].to_a.sort {|a,b| b[1] <=> a[1]}.each do |args, cnt|
|
|
109
|
+
# str += " #{cnt}: " + args + "\n" if cnt > 1
|
|
110
|
+
counts[cnt] += cnt
|
|
111
|
+
num_prev_seen += cnt if cnt > 1
|
|
112
|
+
end
|
|
113
|
+
proportion_prev_seen = num_prev_seen*100.0/d[0]
|
|
114
|
+
proportion_unique = 100.0 - proportion_prev_seen
|
|
115
|
+
str += " %3.2f%% (#{d[0].to_i - num_prev_seen}) of calls with unique args" % proportion_unique
|
|
116
|
+
if proportion_unique != 100.0
|
|
117
|
+
str += ", and\n"
|
|
118
|
+
str += " %3.2f%% (#{num_prev_seen}) of calls with args that were used several times\n" % proportion_prev_seen
|
|
119
|
+
str += " distr: #{counts.inspect}"
|
|
120
|
+
end
|
|
121
|
+
str += "\n"
|
|
122
|
+
end
|
|
123
|
+
str += "\n" if writeCallers or writeArguments
|
|
124
|
+
end
|
|
125
|
+
return str
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
#############################################################################
|
|
130
|
+
# Simple test
|
|
131
|
+
#############################################################################
|
|
132
|
+
if __FILE__ == $0
|
|
133
|
+
class ComplexTest
|
|
134
|
+
attr_reader :real, :imaginary
|
|
135
|
+
def initialize(real, imaginary)
|
|
136
|
+
Profiler.__enter__(:initialize, real, imaginary)
|
|
137
|
+
@real, @imaginary = real, imaginary
|
|
138
|
+
Profiler.__leave__(:initialize, self)
|
|
139
|
+
end
|
|
140
|
+
def add(other)
|
|
141
|
+
Profiler.__enter__(:add, other)
|
|
142
|
+
real_add(other)
|
|
143
|
+
Profiler.__leave__(:add, self)
|
|
144
|
+
end
|
|
145
|
+
def real_add(other)
|
|
146
|
+
Profiler.__enter__(:real_add, other)
|
|
147
|
+
@real += other.real
|
|
148
|
+
@imaginary += other.imaginary
|
|
149
|
+
Profiler.__leave__(:real_add, self)
|
|
150
|
+
end
|
|
151
|
+
def inspect
|
|
152
|
+
"#{real} + i*#{imaginary}"
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
Profiler.start
|
|
157
|
+
|
|
158
|
+
10.times do
|
|
159
|
+
c = ComplexTest.new(rand, rand)
|
|
160
|
+
100.times do
|
|
161
|
+
c.add(ComplexTest.new(rand, rand))
|
|
162
|
+
end
|
|
163
|
+
puts "It finished"
|
|
164
|
+
end
|
|
165
|
+
c = ComplexTest.new(1,1)
|
|
166
|
+
c = ComplexTest.new(1,1)
|
|
167
|
+
puts Profiler.profile_summary(true, true)
|
|
168
|
+
end
|
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
# Find the reductions in a parse table for a StateGraph.
|
|
2
|
+
#
|
|
3
|
+
# There can be reductions in all states with kernel items that are final, ie.
|
|
4
|
+
# where the position cannot be advanced any further
|
|
5
|
+
# (example: State(3, [S -> R.])).
|
|
6
|
+
#
|
|
7
|
+
# For states with only one kernel item (which is final which is always the
|
|
8
|
+
# case if its only one) we should reduce for
|
|
9
|
+
# all terminals in the follow set of the nonterminal (Follow(S) in the
|
|
10
|
+
# example above). These states are called consistent reduce states.
|
|
11
|
+
#
|
|
12
|
+
# When there are multiple kernel items and one (or several) of them is a final
|
|
13
|
+
# item we should reduce for terminals in Follow(state, nonterminal). These
|
|
14
|
+
# states are called inconsistent reduce states. An example is
|
|
15
|
+
# State(5, [S -> L .= R, R -> L.]) where we dont know whether to reduce
|
|
16
|
+
# or shift. Here, we should reduce by R -> L for all terminals in Follow(5, R).
|
|
17
|
+
#
|
|
18
|
+
# To reduce the amount of computation needed we use a hybrid, lazy approach.
|
|
19
|
+
# We calculate all first and follow sets for all symbols in the grammar.
|
|
20
|
+
# With this we can find the reductions for consistent reduce states. For the
|
|
21
|
+
# inconsistent ones we build the first to follow relations "from the back"
|
|
22
|
+
# (ie. from the actual follow sets we need) up to the first sets. This way
|
|
23
|
+
# we don't need to calculate relations that are not relevant.
|
|
24
|
+
#
|
|
25
|
+
require 'rpdf2txt-rockit/directed_graph'
|
|
26
|
+
require 'rpdf2txt-rockit/base_extensions'
|
|
27
|
+
require 'rpdf2txt-rockit/parsetable_generation'
|
|
28
|
+
|
|
29
|
+
require 'rpdf2txt-rockit/profiler'
|
|
30
|
+
|
|
31
|
+
#$PROFILE = true
|
|
32
|
+
|
|
33
|
+
class ReduceActionsGenerator
|
|
34
|
+
def initialize(stateGraph, grammar, parseTable, allItems)
|
|
35
|
+
@state_graph, @grammar, @parsetable = stateGraph, grammar, parseTable
|
|
36
|
+
set_index_numbers
|
|
37
|
+
precalc_items_at_nonterminal(allItems)
|
|
38
|
+
@lalr_pair_factory = IndexableFactory.new(LaLrPair, 0)
|
|
39
|
+
init_traverse_cache
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def add_reduce_actions
|
|
43
|
+
Profiler.start if $PROFILE
|
|
44
|
+
add_actions_for_consistent_states
|
|
45
|
+
add_actions_for_inconsistent_states
|
|
46
|
+
@parsetable.compact!
|
|
47
|
+
puts Profiler.profile_summary(true, true) if $PROFILE
|
|
48
|
+
@parsetable
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
protected
|
|
52
|
+
|
|
53
|
+
def init_traverse_cache
|
|
54
|
+
num_states = @state_graph.nodes.length
|
|
55
|
+
@traverse_cache = Array.new(num_states)
|
|
56
|
+
num_states.times {|i| @traverse_cache[i] = Hash.new}
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def set_index_numbers
|
|
60
|
+
cnt = -1
|
|
61
|
+
@grammar.terminals.each {|t| t.index_number = (cnt += 1)}
|
|
62
|
+
@grammar.nonterminals.each {|nt| nt.index_number = (cnt += 1)}
|
|
63
|
+
@grammar.productions.each_with_index {|p,i| p.index_number = i}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def add_actions_for_consistent_states
|
|
67
|
+
Profiler.__enter__(:add_actions_for_consistent_states) if $PROFILE
|
|
68
|
+
crs = @state_graph.consistent_reduce_states
|
|
69
|
+
build_follow_sets(crs).each do |state|
|
|
70
|
+
state.final_items.each do |item|
|
|
71
|
+
prod = item.production
|
|
72
|
+
add_reduce_action_for_terminalset(state,
|
|
73
|
+
follow_set(prod.nonterminal), prod)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
Profiler.__leave__(:add_actions_for_consistent_states) if $PROFILE
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def build_follow_sets(states)
|
|
80
|
+
create_first_sets
|
|
81
|
+
precalc_epsilon_derivation
|
|
82
|
+
create_follow_sets
|
|
83
|
+
build_relations # Currently we build the full relations graphs
|
|
84
|
+
update_first_sets
|
|
85
|
+
update_follow_sets
|
|
86
|
+
states
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def epsilon
|
|
90
|
+
return @epsilon if @epsilon
|
|
91
|
+
@epsilon = EpsilonToken.new
|
|
92
|
+
@epsilon.index_number = @grammar.terminals.map{|t| t.index_number}.max+1
|
|
93
|
+
@epsilon
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def all_terminals
|
|
97
|
+
@all_terminals || (@all_terminals = @grammar.terminals + [epsilon])
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def precalc_epsilon_derivation
|
|
101
|
+
@i2i_relations = DirectedGraph.new
|
|
102
|
+
@grammar.productions.each do |production|
|
|
103
|
+
first_element = production.elements.first
|
|
104
|
+
unless first_element
|
|
105
|
+
i2i_relation(@epsilon, production.nonterminal)
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
propagate_terminal_sets([@epsilon], @i2i_relations) {|n| first_set(n)}
|
|
109
|
+
@grammar.nonterminals.each do |nt|
|
|
110
|
+
nt.derives_epsilon = first_set(nt).include?(@epsilon)
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def update_first_sets
|
|
115
|
+
propagate_terminal_sets(@i2i_relations.roots, @i2i_relations) {|n| first_set(n)}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def update_follow_sets
|
|
119
|
+
@i2o_relations.each do |src, dests|
|
|
120
|
+
next unless dests
|
|
121
|
+
source_set = first_set(src)
|
|
122
|
+
dests.uniq.each {|d| follow_set(d).update source_set}
|
|
123
|
+
end
|
|
124
|
+
propagate_terminal_sets(@o2o_relations.roots, @o2o_relations) {|n| follow_set(n)}
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def create_first_sets
|
|
128
|
+
nonterms = @grammar.nonterminals
|
|
129
|
+
ntfs = @nonterminal_first_sets = Array.new(nonterms.length)
|
|
130
|
+
nonterms.each {|nt| ntfs[nt.index_number] = new_terminal_set}
|
|
131
|
+
terms = @grammar.terminals
|
|
132
|
+
tfs = @terminal_first_sets = Array.new(nonterms.length)
|
|
133
|
+
terms.each {|t| tfs[t.index_number] = new_terminal_set(t)}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def create_follow_sets
|
|
137
|
+
nonterms = @grammar.nonterminals
|
|
138
|
+
fs = @follow_sets = Array.new(nonterms.length)
|
|
139
|
+
nonterms.each {|nt| fs[nt.index_number] = new_terminal_set}
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def build_relations
|
|
143
|
+
@i2o_relations = Hash.new
|
|
144
|
+
@o2o_relations = DirectedGraph.new
|
|
145
|
+
@productions_ending_with = Array.new
|
|
146
|
+
i = elements = nonterminal = last_nonterm = len = update_first = nil
|
|
147
|
+
@grammar.productions.each do |production|
|
|
148
|
+
elements = production.elements
|
|
149
|
+
nonterminal = production.nonterminal
|
|
150
|
+
i, updating, len, last_nonterm = 0, true, elements.length, nil
|
|
151
|
+
while i < len
|
|
152
|
+
x = elements[i]
|
|
153
|
+
i2i_relation(x, nonterminal) if updating
|
|
154
|
+
i2o_relation(x, last_nonterm) if last_nonterm
|
|
155
|
+
last_nonterm = x.nonterminal? ? x : nil
|
|
156
|
+
updating = updating and x.nonterminal? and x.derives_epsilon?
|
|
157
|
+
i += 1
|
|
158
|
+
end
|
|
159
|
+
i, updating = len-1, true
|
|
160
|
+
while updating and i >= 0
|
|
161
|
+
x = elements[i]
|
|
162
|
+
if x.nonterminal?
|
|
163
|
+
o2o_relation(nonterminal, x)
|
|
164
|
+
update_productions_ending_with(x, production, elements[0...i])
|
|
165
|
+
end
|
|
166
|
+
updating = x.nonterminal? and x.derives_epsilon?
|
|
167
|
+
i -= 1
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
# Should be @grammar.original_start_symbol but this will always work
|
|
171
|
+
# since Follow(S) includes Follow(S'). It will take care of the accept.
|
|
172
|
+
i2o_relation(@grammar.eof_terminal, @grammar.start_symbol)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def i2i_relation(src, dest)
|
|
176
|
+
@i2i_relations.link_nodes(src, dest)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def o2o_relation(src, dest)
|
|
180
|
+
@o2o_relations.link_nodes(src, dest)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def i2o_relation(src, dest)
|
|
184
|
+
a = @i2o_relations[src]
|
|
185
|
+
if a
|
|
186
|
+
a.push dest
|
|
187
|
+
else
|
|
188
|
+
@i2o_relations[src] = [dest]
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def new_terminal_set(terminal = nil)
|
|
193
|
+
if terminal
|
|
194
|
+
TerminalSet.new(all_terminals, [terminal])
|
|
195
|
+
else
|
|
196
|
+
TerminalSet.new(all_terminals)
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def first_set(symbol)
|
|
201
|
+
if symbol.nonterminal?
|
|
202
|
+
@nonterminal_first_sets[symbol.index_number]
|
|
203
|
+
else
|
|
204
|
+
@terminal_first_sets[symbol.index_number]
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def add_actions_for_inconsistent_states
|
|
209
|
+
Profiler.__enter__(:add_actions_for_inconsistent_states) if $PROFILE
|
|
210
|
+
irs = @state_graph.inconsistent_reduce_states
|
|
211
|
+
build_lalr_follow_sets(irs).each do |state, pair|
|
|
212
|
+
state.final_items.each do |item|
|
|
213
|
+
add_reduce_action_for_terminalset(state, lalr_follow_set(pair),
|
|
214
|
+
item.production)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
Profiler.__leave__(:add_actions_for_inconsistent_states) if $PROFILE
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def add_reduce_action_for_terminalset(state, terminalSet, production)
|
|
221
|
+
Profiler.__enter__(:add_reduce_action_for_terminalset, state, terminalSet, production) if $PROFILE
|
|
222
|
+
if production.nonterminal == @grammar.start_symbol
|
|
223
|
+
a = [:ACCEPT, 0]
|
|
224
|
+
else
|
|
225
|
+
a = [:REDUCE, production.index_number]
|
|
226
|
+
end
|
|
227
|
+
@parsetable.add_action_for_terminalset(state.index_number, a, terminalSet)
|
|
228
|
+
Profiler.__leave__(:add_reduce_action_for_terminalset) if $PROFILE
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def propagate_terminal_sets(roots, graph = @relations, &getTerminalSet)
|
|
232
|
+
Profiler.__enter__(:propagate_terminal_sets, roots, graph) if $PROFILE
|
|
233
|
+
# Logothetis-Bermudez-style propagation. Does NOT exploit the fact that
|
|
234
|
+
# many propagations share a majority of paths. Exploit if speed needed!
|
|
235
|
+
# Note that this is not garantueed to work if there is a strongly connected
|
|
236
|
+
# component with more than two nodes. The intermediate nodes (only linked
|
|
237
|
+
# within the component) will not be updated after the backlink.
|
|
238
|
+
# Solve by doing a real reachability graph instead.
|
|
239
|
+
roots.each do |root|
|
|
240
|
+
graph.each_reachable_node_once_breadth_first(root) do |parent|
|
|
241
|
+
parent_fset = getTerminalSet.call(parent)
|
|
242
|
+
graph.children(parent).each do |child|
|
|
243
|
+
getTerminalSet.call(child).update parent_fset
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
247
|
+
Profiler.__leave__(:propagate_terminal_sets) if $PROFILE
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
def build_lalr_follow_sets(states)
|
|
251
|
+
Profiler.__enter__(:build_lalr_follow_sets) if $PROFILE
|
|
252
|
+
follow_sets_needed = Array.new
|
|
253
|
+
states.each do |state|
|
|
254
|
+
state.final_items.each do |item|
|
|
255
|
+
production = item.production
|
|
256
|
+
src_state = back_traverse(state, production.elements)
|
|
257
|
+
follow_sets_needed.push [state,
|
|
258
|
+
lalr_pair(src_state, production.nonterminal)]
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
build_o2o_lalr_relations
|
|
262
|
+
create_lalr_follow_sets
|
|
263
|
+
update_lalr_follow_sets_with_direct_followers
|
|
264
|
+
propagate_terminal_sets(@o2o_lalr_relations.roots, @o2o_lalr_relations) {|n| lalr_follow_set(n)}
|
|
265
|
+
Profiler.__leave__(:build_lalr_follow_sets) if $PROFILE
|
|
266
|
+
follow_sets_needed
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def o2o_lalr_relation(src, dest)
|
|
270
|
+
@o2o_lalr_relations.link_nodes(src, dest)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
LaLrPair = Struct.new("LaLrPair", :state, :nonterminal)
|
|
274
|
+
class LaLrPair
|
|
275
|
+
include Indexable
|
|
276
|
+
def inspect
|
|
277
|
+
"(#{state.index_number.inspect}, #{nonterminal.inspect})"
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def lalr_pair(state, nonterminal)
|
|
282
|
+
@lalr_pair_factory.instance_with_args(state, nonterminal)
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def precalc_items_at_nonterminal(allItems) # O(I)
|
|
286
|
+
@items_at_nonterminal = Array.new
|
|
287
|
+
allItems.each do |item|
|
|
288
|
+
symbol = item.symbol
|
|
289
|
+
if symbol and symbol.nonterminal?
|
|
290
|
+
update_items_at_nonterminal(symbol, item)
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
@items_at_nonterminal
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def update_items_at_nonterminal(nonterminal, item)
|
|
297
|
+
i = nonterminal.index_number
|
|
298
|
+
if (a = @items_at_nonterminal[i])
|
|
299
|
+
a.push item
|
|
300
|
+
else
|
|
301
|
+
@items_at_nonterminal[i] = [item]
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def items_at_nonterminal(nonterminal)
|
|
306
|
+
@items_at_nonterminal[nonterminal.index_number]
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def create_lalr_follow_sets
|
|
310
|
+
Profiler.__enter__(:create_lalr_follow_sets) if $PROFILE
|
|
311
|
+
lalr_pairs = @lalr_pair_factory.instances
|
|
312
|
+
@lalr_follow_sets = Array.new(lalr_pairs.length)
|
|
313
|
+
lalr_pairs.each do |pair|
|
|
314
|
+
@lalr_follow_sets[pair.index_number] = new_terminal_set
|
|
315
|
+
end
|
|
316
|
+
Profiler.__leave__(:create_lalr_follow_sets) if $PROFILE
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def lalr_follow_set(lalrPair)
|
|
320
|
+
@lalr_follow_sets[lalrPair.index_number]
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def update_lalr_follow_sets_with_direct_followers
|
|
324
|
+
Profiler.__enter__(:update_lalr_follow_sets_with_direct_followers) if $PROFILE
|
|
325
|
+
@lalr_pair_factory.instances.each do |pair|
|
|
326
|
+
if pair.nonterminal == @grammar.start_symbol
|
|
327
|
+
lalr_follow_set(pair).update first_set(@grammar.eof_terminal)
|
|
328
|
+
end
|
|
329
|
+
items = items_at_nonterminal(pair.nonterminal)
|
|
330
|
+
next unless items
|
|
331
|
+
items.each do |item|
|
|
332
|
+
item.direct_following_symbols.each do |symbol|
|
|
333
|
+
lalr_follow_set(pair).update first_set(symbol)
|
|
334
|
+
end
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
Profiler.__leave__(:update_lalr_follow_sets_with_direct_followers) if $PROFILE
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
def productions_ending_with2(nonterminal, withLastPrefix = nil)
|
|
341
|
+
if withLastPrefix
|
|
342
|
+
@productions_ending_with[nonterminal.index_number][withLastPrefix.index_number]
|
|
343
|
+
else
|
|
344
|
+
@productions_ending_with_and_having_empty_prefix
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def update_productions_ending_with2(nonterminal, production, prefix)
|
|
349
|
+
ntindex = nonterminal.index_number
|
|
350
|
+
pindex = prefix.last.index_number
|
|
351
|
+
new_element = [production, prefix]
|
|
352
|
+
if a = @productions_ending_with[ntindex]
|
|
353
|
+
if a2 = @productions_ending_with[ntindex][pindex]
|
|
354
|
+
a2.push(new_element) unless a2.include?(new_element)
|
|
355
|
+
else
|
|
356
|
+
@productions_ending_with[ntindex][pindex] = [new_element]
|
|
357
|
+
end
|
|
358
|
+
else
|
|
359
|
+
@productions_ending_with[ntindex] = Array.new
|
|
360
|
+
@productions_ending_with[ntindex][pindex] = [new_element]
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def productions_ending_with(nonterminal)
|
|
365
|
+
@productions_ending_with[nonterminal.index_number]
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def update_productions_ending_with(nonterminal, production, prefix)
|
|
369
|
+
ntindex = nonterminal.index_number
|
|
370
|
+
new_element = [production, prefix]
|
|
371
|
+
if a = @productions_ending_with[ntindex]
|
|
372
|
+
a.push(new_element) unless a.index(new_element)
|
|
373
|
+
else
|
|
374
|
+
@productions_ending_with[ntindex] = [new_element]
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
@@empty_array = Array.new
|
|
379
|
+
|
|
380
|
+
# We discard prefixes where the first symbol is not valid directly
|
|
381
|
+
# to save time in back_traverse.
|
|
382
|
+
def productions_with_valid_prefixes(pair)
|
|
383
|
+
Profiler.__enter__(:productions_with_valid_prefixes) if $PROFILE
|
|
384
|
+
prods = productions_ending_with(pair.nonterminal)
|
|
385
|
+
unless prods
|
|
386
|
+
Profiler.__leave__(:productions_with_valid_prefixes) if $PROFILE
|
|
387
|
+
return @@empty_array
|
|
388
|
+
end
|
|
389
|
+
incoming = @state_graph.incoming_links_info(pair.state)
|
|
390
|
+
res = prods.select do |prod, prefix|
|
|
391
|
+
prefix.length == 0 or incoming.include?(prefix.last)
|
|
392
|
+
end
|
|
393
|
+
Profiler.__leave__(:productions_with_valid_prefixes) if $PROFILE
|
|
394
|
+
res
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def build_o2o_lalr_relations
|
|
398
|
+
Profiler.__enter__(:build_o2o_lalr_relations) if $PROFILE
|
|
399
|
+
@o2o_lalr_relations = DirectedGraph.new
|
|
400
|
+
current = 0
|
|
401
|
+
while current < @lalr_pair_factory.instances.length
|
|
402
|
+
pair = @lalr_pair_factory.instances[current]
|
|
403
|
+
productions = productions_with_valid_prefixes(pair)
|
|
404
|
+
productions.each do |prod, prefix|
|
|
405
|
+
q = back_traverse(pair.state, prefix)
|
|
406
|
+
if q
|
|
407
|
+
src_pair = lalr_pair(q, prod.nonterminal)
|
|
408
|
+
o2o_lalr_relation(src_pair, pair)
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
current += 1
|
|
412
|
+
end
|
|
413
|
+
#puts "Num LaLrPairs = #{current.inspect}"
|
|
414
|
+
Profiler.__leave__(:build_o2o_lalr_relations) if $PROFILE
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
def back_traverse(state, elements)
|
|
418
|
+
Profiler.__enter__(:back_traverse, state, elements) if $PROFILE
|
|
419
|
+
index = state.index_number
|
|
420
|
+
dest = @traverse_cache[index][elements]
|
|
421
|
+
unless dest
|
|
422
|
+
begin
|
|
423
|
+
dest = @state_graph.back_traverse(state, elements)
|
|
424
|
+
@traverse_cache[index][elements] = dest
|
|
425
|
+
rescue GraphTraversalException => e
|
|
426
|
+
dest = nil
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
Profiler.__leave__(:back_traverse) if $PROFILE
|
|
430
|
+
dest
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def follow_set(symbol)
|
|
434
|
+
@follow_sets[symbol.index_number]
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
class TerminalSet
|
|
439
|
+
# With fast union and add operations. Actually its actually a general
|
|
440
|
+
# implementation and should probably be called IndexableObjectSet.
|
|
441
|
+
|
|
442
|
+
def initialize(allPossibleMembers, members = [], all = nil, max = nil)
|
|
443
|
+
@all_possible_members = allPossibleMembers
|
|
444
|
+
@max_index = max || allPossibleMembers.map{|m| m.index_number}.max
|
|
445
|
+
@all = all || (2**(@max_index+1))-1
|
|
446
|
+
@included = 0 # We represent as bit vector in Integer.
|
|
447
|
+
members.each {|t| add(t)}
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
@@empty_array = Array.new
|
|
451
|
+
|
|
452
|
+
def TerminalSet.new_from_integer(allPossibleMembers, included, all = nil,
|
|
453
|
+
max = nil)
|
|
454
|
+
ts = new(allPossibleMembers, @@empty_array, all, max)
|
|
455
|
+
ts.set_include_vector(included)
|
|
456
|
+
ts
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
def add(terminal)
|
|
460
|
+
raise ArgumentError unless @all_possible_members.include?(terminal)
|
|
461
|
+
begin
|
|
462
|
+
@included |= mask(terminal.index_number)
|
|
463
|
+
rescue Exception
|
|
464
|
+
puts "TerminalSet: #{@all_possible_members.inspect} but was #{terminal.inspect}"
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def update(other)
|
|
469
|
+
@included |= other.to_i
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def -(other)
|
|
473
|
+
# 1 in result if 1 in @included and 0 in other =>
|
|
474
|
+
#
|
|
475
|
+
TerminalSet.new_from_integer(@all_possible_members,
|
|
476
|
+
@included & (@all - other.to_i), @all,
|
|
477
|
+
@max_index)
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def empty?
|
|
481
|
+
@included == 0
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
def terminals
|
|
485
|
+
@all_possible_members.select {|t| index_included?(t.index_number)}
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
def index_included?(index)
|
|
489
|
+
@included & mask(index) > 0
|
|
490
|
+
end
|
|
491
|
+
|
|
492
|
+
def include?(terminal)
|
|
493
|
+
index_included?(terminal.index_number)
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def inspect
|
|
497
|
+
terminals.inspect
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def each
|
|
501
|
+
Profiler.__enter__(:TerminalSet_each) if $PROFILE
|
|
502
|
+
@all_possible_members.each do |t|
|
|
503
|
+
yield(t) if index_included?(t.index_number)
|
|
504
|
+
end
|
|
505
|
+
Profiler.__leave__(:TerminalSet_each) if $PROFILE
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
def set_include_vector(newVector)
|
|
509
|
+
@included = newVector
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
def to_i
|
|
513
|
+
@included
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
protected
|
|
517
|
+
|
|
518
|
+
@@masks = Array.new
|
|
519
|
+
|
|
520
|
+
def mask(index)
|
|
521
|
+
@@masks[index] || (@@masks[index] = (1<<index))
|
|
522
|
+
end
|
|
523
|
+
end
|