rpdf2txt 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/LICENCE +515 -0
- data/Manifest.txt +126 -0
- data/README.txt +30 -0
- data/Rakefile +24 -0
- data/bin/rpdf2txt +58 -0
- data/config.save +12 -0
- data/install.rb +1098 -0
- data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
- data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
- data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
- data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
- data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
- data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
- data/lib/rpdf2txt-rockit/grammar.rb +644 -0
- data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
- data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
- data/lib/rpdf2txt-rockit/indexable.rb +53 -0
- data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
- data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
- data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
- data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
- data/lib/rpdf2txt-rockit/profiler.rb +168 -0
- data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
- data/lib/rpdf2txt-rockit/rockit.rb +76 -0
- data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
- data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
- data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
- data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
- data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
- data/lib/rpdf2txt-rockit/token.rb +364 -0
- data/lib/rpdf2txt-rockit/version.rb +3 -0
- data/lib/rpdf2txt/attributesparser.rb +42 -0
- data/lib/rpdf2txt/cmapparser.rb +65 -0
- data/lib/rpdf2txt/data/_cmap.grammar +11 -0
- data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/cmap.grammar +11 -0
- data/lib/rpdf2txt/data/cmap.rb +37 -0
- data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
- data/lib/rpdf2txt/data/cmap_range.rb +43 -0
- data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
- data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
- data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
- data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
- data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
- data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
- data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
- data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
- data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
- data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
- data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
- data/lib/rpdf2txt/data/pdftext.grammar +102 -0
- data/lib/rpdf2txt/data/pdftext.rb +146 -0
- data/lib/rpdf2txt/default_handler.rb +352 -0
- data/lib/rpdf2txt/lzw.rb +69 -0
- data/lib/rpdf2txt/object.rb +1114 -0
- data/lib/rpdf2txt/parser.rb +169 -0
- data/lib/rpdf2txt/symbol.rb +408 -0
- data/lib/rpdf2txt/text.rb +182 -0
- data/lib/rpdf2txt/text_state.rb +434 -0
- data/lib/rpdf2txt/textparser.rb +42 -0
- data/test/data/3392_obj +0 -0
- data/test/data/397_decrypted +15 -0
- data/test/data/450_decrypted +153 -0
- data/test/data/450_obj +0 -0
- data/test/data/452_decrypted +125 -0
- data/test/data/454_decrypted +108 -0
- data/test/data/456_decrypted +106 -0
- data/test/data/458_decrypted +111 -0
- data/test/data/458_obj +0 -0
- data/test/data/460_decrypted +118 -0
- data/test/data/460_obj +0 -0
- data/test/data/463_decrypted +117 -0
- data/test/data/465_decrypted +107 -0
- data/test/data/465_obj +0 -0
- data/test/data/90_obj +0 -0
- data/test/data/90_obj_comp +1 -0
- data/test/data/decrypted +0 -0
- data/test/data/encrypt_obj +0 -0
- data/test/data/encrypt_string +0 -0
- data/test/data/encrypt_string_128bit +0 -0
- data/test/data/encrypted_object_stream.pdf +0 -0
- data/test/data/firststream +1 -0
- data/test/data/index.pdfobj +0 -0
- data/test/data/index_2bit.pdfobj +0 -0
- data/test/data/index_masked.pdfobj +0 -0
- data/test/data/indexed.pdfobj +0 -0
- data/test/data/indexed_2bit.pdfobj +0 -0
- data/test/data/indexed_masked.pdfobj +0 -0
- data/test/data/inline.png +0 -0
- data/test/data/logo.png +0 -0
- data/test/data/lzw.pdfobj +0 -0
- data/test/data/lzw_index.pdfobj +0 -0
- data/test/data/page_tree.pdf +148 -0
- data/test/data/pdf_20.png +0 -0
- data/test/data/pdf_21.png +0 -0
- data/test/data/pdf_22.png +0 -0
- data/test/data/pdf_50.png +0 -0
- data/test/data/png.pdfobj +0 -0
- data/test/data/space_bug_stream.txt +119 -0
- data/test/data/stream.txt +292 -0
- data/test/data/stream_kerning_bug.txt +13 -0
- data/test/data/stream_kerning_bug2.txt +6 -0
- data/test/data/test.pdf +0 -0
- data/test/data/test.txt +8 -0
- data/test/data/test_text.txt +42 -0
- data/test/data/working_obj +0 -0
- data/test/data/working_obj2 +0 -0
- data/test/mock.rb +149 -0
- data/test/suite.rb +30 -0
- data/test/test_pdf_object.rb +1802 -0
- data/test/test_pdf_parser.rb +1340 -0
- data/test/test_pdf_text.rb +789 -0
- data/test/test_space_bug_05_2004.rb +87 -0
- data/test/test_stream.rb +194 -0
- data/test/test_text_state.rb +315 -0
- data/usage-en.txt +112 -0
- data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
- data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
- metadata +220 -0
@@ -0,0 +1,164 @@
|
|
1
|
+
# Common classes used in many of the parse table generation classes.
|
2
|
+
#
|
3
|
+
require 'rpdf2txt-rockit/indexable'
|
4
|
+
require 'rpdf2txt-rockit/token'
|
5
|
+
require 'rpdf2txt-rockit/grammar'
|
6
|
+
|
7
|
+
class Item
|
8
|
+
include Indexable
|
9
|
+
attr_reader :symbol, :production, :position, :lookahead
|
10
|
+
|
11
|
+
def initialize(production, position, lookahead = nil, nextItem = nil)
|
12
|
+
@production, @position, @lookahead = production, position, lookahead
|
13
|
+
@symbol = production.elements[position]
|
14
|
+
@hash_value = [production, position, lookahead].hash
|
15
|
+
end
|
16
|
+
|
17
|
+
def direct_following_symbols
|
18
|
+
@direct_following_symbols || (@direct_following_symbols = calc_followers)
|
19
|
+
end
|
20
|
+
|
21
|
+
def calc_followers
|
22
|
+
return @@empty_array if position >= production.elements.length-1
|
23
|
+
elements = production.elements[position+1..-1]
|
24
|
+
followers = Array.new
|
25
|
+
elements.each do |e|
|
26
|
+
followers.push e
|
27
|
+
return followers unless e.derives_epsilon?
|
28
|
+
end
|
29
|
+
followers
|
30
|
+
end
|
31
|
+
|
32
|
+
@@empty_array = Array.new
|
33
|
+
|
34
|
+
def suffix
|
35
|
+
if position < production.elements.length-1
|
36
|
+
production.elements[position+1..-1]
|
37
|
+
else
|
38
|
+
nil
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def final?
|
43
|
+
next_item == nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def next_item
|
47
|
+
@next_item || (@next_item = calc_next_item)
|
48
|
+
end
|
49
|
+
|
50
|
+
def lookahead_item(lookaheadSymbol)
|
51
|
+
make_new_item(production, position, lookaheadSymbol, next_item)
|
52
|
+
end
|
53
|
+
|
54
|
+
def inspect
|
55
|
+
endpos = ((@position >= production.elements.length) or
|
56
|
+
(@position == 0))
|
57
|
+
production.nonterminal.inspect + "->" +
|
58
|
+
inspect_elements(production.elements[0...@position]) +
|
59
|
+
(endpos ? "." : " .") +
|
60
|
+
inspect_elements(production.elements[@position..-1])
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
def inspect_elements(elements)
|
66
|
+
elements ? elements.map {|e| e.inspect}.join(" ") : ""
|
67
|
+
end
|
68
|
+
|
69
|
+
def make_new_item(*args)
|
70
|
+
@factory ? @factory.make(*args) : Item.new(*args)
|
71
|
+
end
|
72
|
+
|
73
|
+
def calc_next_item
|
74
|
+
if position < production.elements.length
|
75
|
+
make_new_item(production, position+1)
|
76
|
+
else
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class LrState
|
83
|
+
include Indexable
|
84
|
+
attr_reader :kernel_items, :closure, :final_items
|
85
|
+
|
86
|
+
def initialize(kernelItems)
|
87
|
+
@kernel_items = kernelItems
|
88
|
+
calc_closure(kernelItems) # Do it lazily instead?
|
89
|
+
@final_items = kernelItems.select {|i| i.final?}
|
90
|
+
@reduce_state = @final_items.length > 0
|
91
|
+
@consistent = kernelItems.length == 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def reduce_state?
|
95
|
+
@reduce_state
|
96
|
+
end
|
97
|
+
|
98
|
+
def consistent?
|
99
|
+
@consistent
|
100
|
+
end
|
101
|
+
|
102
|
+
def length
|
103
|
+
@kernel_items.length
|
104
|
+
end
|
105
|
+
|
106
|
+
def inspect
|
107
|
+
"State(#{@kernel_items.inspect})"
|
108
|
+
end
|
109
|
+
|
110
|
+
protected
|
111
|
+
|
112
|
+
@@empty_array = Array.new
|
113
|
+
|
114
|
+
def calc_closure(itemset)
|
115
|
+
@closure, checked = itemset.clone, Hash.new
|
116
|
+
itemset.each {|i| checked = recursive_calc_closure(i.symbol, checked)}
|
117
|
+
end
|
118
|
+
|
119
|
+
def recursive_calc_closure(symbol, checked)
|
120
|
+
return checked if !symbol or checked[symbol]
|
121
|
+
checked[symbol] = true
|
122
|
+
@closure.concat(new_items = symbol.nonkernel_items)
|
123
|
+
new_items.each {|i| checked = recursive_calc_closure(i.symbol, checked)}
|
124
|
+
checked
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Decorate the grammar symbols with some additional info we're gonna need
|
129
|
+
class NonTerminal
|
130
|
+
include Indexable
|
131
|
+
attr_reader :nonkernel_items
|
132
|
+
def calc_nonkernel_items(grammar, itemFactory)
|
133
|
+
@nonkernel_items = Array.new
|
134
|
+
grammar.alternatives(self).each do |production|
|
135
|
+
@nonkernel_items.push itemFactory.make(production, 0)
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
def nonterminal?; true; end
|
140
|
+
def terminal?; false; end
|
141
|
+
|
142
|
+
attr_writer :derives_epsilon
|
143
|
+
|
144
|
+
def derives_epsilon?; @derives_epsilon; end
|
145
|
+
end
|
146
|
+
|
147
|
+
class Token
|
148
|
+
include Indexable
|
149
|
+
def nonterminal?; false; end
|
150
|
+
def terminal?; true; end
|
151
|
+
|
152
|
+
@@empty_array = []
|
153
|
+
|
154
|
+
def nonkernel_items
|
155
|
+
@@empty_array
|
156
|
+
end
|
157
|
+
|
158
|
+
def derives_epsilon?; false; end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Decorate Productions
|
162
|
+
class Production
|
163
|
+
include Indexable
|
164
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'rpdf2txt-rockit/syntax_tree'
|
2
|
+
|
3
|
+
class AmbiguityNode < SyntaxTree
|
4
|
+
def initialize(alt1, alt2)
|
5
|
+
@ambigous_trees = [alt1, alt2]
|
6
|
+
super("_Ambiguity", ["ambigous_trees"], [@ambigous_trees])
|
7
|
+
end
|
8
|
+
|
9
|
+
def add_ambigoustree(tree)
|
10
|
+
@ambigous_trees.push tree unless @ambigous_trees.include?(tree)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class AmbigousParseException < Exception
|
15
|
+
attr_reader :alternatives, :substring
|
16
|
+
|
17
|
+
def initialize(stringBeingParsed, fullTree, *alternativeTrees)
|
18
|
+
super("Ambigous parse")
|
19
|
+
@alternatives, @full_tree = alternativeTrees, fullTree
|
20
|
+
init_substring(stringBeingParsed)
|
21
|
+
end
|
22
|
+
|
23
|
+
def inspect(prettyPrinter = nil)
|
24
|
+
str = "Ambiguity: The substring '#{substring}' can be parsed as:\n"
|
25
|
+
#return str + report_on_tree_differences(alternatives[0], alternatives[1], 0, 1)
|
26
|
+
strings_to_show, same_strings = Array.new, Array.new
|
27
|
+
alternatives.each_with_index {|alt, i|
|
28
|
+
s = prettyPrinter ? prettyPrinter.print(alt) : alt.inspect
|
29
|
+
if (j = strings_to_show.index(s))
|
30
|
+
same_strings.push [j,i]
|
31
|
+
end
|
32
|
+
strings_to_show.push s
|
33
|
+
}
|
34
|
+
alternatives.each_with_index do |alt,i|
|
35
|
+
str << " Alternative #{i+1}: #{strings_to_show[i]}"
|
36
|
+
str << ", or" if i < alternatives.length-1
|
37
|
+
str << "\n"
|
38
|
+
end
|
39
|
+
same_strings.each do |i,j|
|
40
|
+
str += report_on_tree_differences(alternatives[i], alternatives[j],
|
41
|
+
i, j)
|
42
|
+
end
|
43
|
+
@full_tree.compact!
|
44
|
+
str + "The full tree looks like:\n" + @full_tree.inspect
|
45
|
+
end
|
46
|
+
|
47
|
+
def difference_description(i, j, str0, o1, o2, childPath = "")
|
48
|
+
child_str =
|
49
|
+
childPath.length > 0 ? "in the childrens '#{childPath[1..-1]}'" : ""
|
50
|
+
" Alternatives #{i+1} and #{j+1} differ #{child_str} by not having" +
|
51
|
+
" the same #{str0} (#{o1.inspect} and #{o2.inspect})"
|
52
|
+
end
|
53
|
+
|
54
|
+
def report_on_tree_differences(t1, t2, i, j, childPath = "")
|
55
|
+
if t1.class != t2.class
|
56
|
+
difference_description(i, j, "type", t1.class, t2.class, childPath)
|
57
|
+
elsif not t1.kind_of?(SyntaxTree)
|
58
|
+
if t1 != t2
|
59
|
+
" Alternatives #{i} and #{j} are not SyntaxTree's and differ"
|
60
|
+
else
|
61
|
+
""
|
62
|
+
end
|
63
|
+
elsif t1.name != t2.name
|
64
|
+
difference_description(i, j, "name", t1.name, t2.name, childPath)
|
65
|
+
elsif t1.children_names != t2.children_names
|
66
|
+
difference_description(i, j, "children_names",t1.children_names,
|
67
|
+
t2.children_names, childPath)
|
68
|
+
else
|
69
|
+
t1.childrens.each_with_index do |child, k|
|
70
|
+
report = report_on_tree_differences(child, t2[k], i, j,
|
71
|
+
childPath + "." +
|
72
|
+
t1.children_names[k])
|
73
|
+
return report if report.length > 0
|
74
|
+
end
|
75
|
+
return ""
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
protected
|
80
|
+
|
81
|
+
def init_substring(string)
|
82
|
+
@substring = string
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
unless TimesClass
|
2
|
+
TimesClass = ((RUBY_VERSION < "1.7") ? Time : Process)
|
3
|
+
end
|
4
|
+
|
5
|
+
module Profiler
|
6
|
+
@@start = TimesClass.times.utime
|
7
|
+
|
8
|
+
# Method invocation stack with one entry for each invocation:
|
9
|
+
# Time at entry, Total times in subfunction also being logged, MethodId
|
10
|
+
@@invocation_stack = [[0, 0, "#toplevel".intern]]
|
11
|
+
|
12
|
+
# One entry for each method: NumCalls, TotalTime, OnlyMyTime, Callers
|
13
|
+
@@map = {"#toplevel".intern => [1, 0, 0]}
|
14
|
+
|
15
|
+
# One entry for each method: hash mapping args.inspect to count
|
16
|
+
@@arguments = Hash.new
|
17
|
+
|
18
|
+
@@time_limit = 5 * 60
|
19
|
+
|
20
|
+
def start(timeLimitInMinutes = nil)
|
21
|
+
if timeLimitInMinutes
|
22
|
+
@@time_limit = timeLimitInMinutes * 60
|
23
|
+
else
|
24
|
+
@@time_limit = nil
|
25
|
+
end
|
26
|
+
@@start = Float(TimesClass.times.utime)
|
27
|
+
end
|
28
|
+
module_function :start
|
29
|
+
|
30
|
+
def __enter__(method, *args)
|
31
|
+
now = TimesClass.times.utime
|
32
|
+
if @@time_limit
|
33
|
+
if now - @@start > @@time_limit
|
34
|
+
STDERR.puts "Profiling time limit violated. Run terminated."
|
35
|
+
STDERR.puts profile_summary(true, true)
|
36
|
+
exit(-1)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
@@invocation_stack.push [now, 0.0, method]
|
40
|
+
begin
|
41
|
+
@@arguments[method][args.inspect] += 1
|
42
|
+
rescue Exception
|
43
|
+
@@arguments[method] = Hash.new(0)
|
44
|
+
retry
|
45
|
+
end
|
46
|
+
end
|
47
|
+
module_function :__enter__
|
48
|
+
|
49
|
+
def __leave__(method, *retargs)
|
50
|
+
now = TimesClass.times.utime
|
51
|
+
tick, data = @@invocation_stack.pop, @@map[method]
|
52
|
+
unless data
|
53
|
+
data = [0.0, 0.0, 0.0, Hash.new(0)]
|
54
|
+
@@map[method] = data
|
55
|
+
end
|
56
|
+
data[0] += 1
|
57
|
+
total_time_this_invocation = now - tick[0]
|
58
|
+
data[1] += total_time_this_invocation
|
59
|
+
data[2] += total_time_this_invocation - tick[1]
|
60
|
+
data[3][caller[1]] += 1
|
61
|
+
@@invocation_stack[-1][1] += total_time_this_invocation
|
62
|
+
return *retargs
|
63
|
+
end
|
64
|
+
module_function :__leave__
|
65
|
+
|
66
|
+
# Go through the invocation stack and leave all methods.
|
67
|
+
def unwind_invocation_stack
|
68
|
+
while @@invocation_stack.length > 1
|
69
|
+
__leave__(@@invocation_stack.pop[2])
|
70
|
+
end
|
71
|
+
end
|
72
|
+
module_function :unwind_invocation_stack
|
73
|
+
|
74
|
+
def Profiler.profile_summary(writeCallers = false, writeArguments = false)
|
75
|
+
total_elapsed = TimesClass.times.utime - @@start
|
76
|
+
str = "Profiling summary\n"
|
77
|
+
str += "*****************\n"
|
78
|
+
str += "Total elapsed time: #{total_elapsed} seconds\n"
|
79
|
+
unwind_invocation_stack if @@invocation_stack.length > 1
|
80
|
+
total = @@invocation_stack.last[1]
|
81
|
+
time_in_nonprofiled = total_elapsed - total
|
82
|
+
str += "Time spent in non-profiled methods: #{time_in_nonprofiled} sec\n"
|
83
|
+
str += "Time in profiled methods:\n"
|
84
|
+
if total == 0 then total = 0.01 end
|
85
|
+
@@map["#toplevel".intern][1] = total
|
86
|
+
data = @@map.to_a.sort{|a,b| b[1][2] <=> a[1][2]}
|
87
|
+
sum = 0
|
88
|
+
str += " %% cumulative self self total\n"
|
89
|
+
str += " time seconds seconds calls ms/call ms/call name\n"
|
90
|
+
str += " ---------------------------------------------------------\n"
|
91
|
+
for d in data
|
92
|
+
method = d[0]
|
93
|
+
next if method == "#toplevel".intern
|
94
|
+
d = d[1]
|
95
|
+
sum += d[2]
|
96
|
+
str += "%6.2f %8.2f %8.2f %8d " % [d[2]/total*100, sum, d[2], d[0]]
|
97
|
+
str += "%8.2f %8.2f %s\n" % [d[2]*1000/d[0], d[1]*1000/d[0],
|
98
|
+
method.id2name]
|
99
|
+
if writeCallers
|
100
|
+
str += " Call sites:\n"
|
101
|
+
d[3].to_a.sort {|a,b| b[1] <=> a[1]}.each do |callersite, count|
|
102
|
+
str += " #{count}: " + callersite.split("/").last + "\n"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
if writeArguments and d[0] > 1
|
106
|
+
str += " Arguments:\n"
|
107
|
+
counts, num_prev_seen = Hash.new(0), 0
|
108
|
+
@@arguments[method].to_a.sort {|a,b| b[1] <=> a[1]}.each do |args, cnt|
|
109
|
+
# str += " #{cnt}: " + args + "\n" if cnt > 1
|
110
|
+
counts[cnt] += cnt
|
111
|
+
num_prev_seen += cnt if cnt > 1
|
112
|
+
end
|
113
|
+
proportion_prev_seen = num_prev_seen*100.0/d[0]
|
114
|
+
proportion_unique = 100.0 - proportion_prev_seen
|
115
|
+
str += " %3.2f%% (#{d[0].to_i - num_prev_seen}) of calls with unique args" % proportion_unique
|
116
|
+
if proportion_unique != 100.0
|
117
|
+
str += ", and\n"
|
118
|
+
str += " %3.2f%% (#{num_prev_seen}) of calls with args that were used several times\n" % proportion_prev_seen
|
119
|
+
str += " distr: #{counts.inspect}"
|
120
|
+
end
|
121
|
+
str += "\n"
|
122
|
+
end
|
123
|
+
str += "\n" if writeCallers or writeArguments
|
124
|
+
end
|
125
|
+
return str
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#############################################################################
|
130
|
+
# Simple test
|
131
|
+
#############################################################################
|
132
|
+
if __FILE__ == $0
|
133
|
+
class ComplexTest
|
134
|
+
attr_reader :real, :imaginary
|
135
|
+
def initialize(real, imaginary)
|
136
|
+
Profiler.__enter__(:initialize, real, imaginary)
|
137
|
+
@real, @imaginary = real, imaginary
|
138
|
+
Profiler.__leave__(:initialize, self)
|
139
|
+
end
|
140
|
+
def add(other)
|
141
|
+
Profiler.__enter__(:add, other)
|
142
|
+
real_add(other)
|
143
|
+
Profiler.__leave__(:add, self)
|
144
|
+
end
|
145
|
+
def real_add(other)
|
146
|
+
Profiler.__enter__(:real_add, other)
|
147
|
+
@real += other.real
|
148
|
+
@imaginary += other.imaginary
|
149
|
+
Profiler.__leave__(:real_add, self)
|
150
|
+
end
|
151
|
+
def inspect
|
152
|
+
"#{real} + i*#{imaginary}"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
Profiler.start
|
157
|
+
|
158
|
+
10.times do
|
159
|
+
c = ComplexTest.new(rand, rand)
|
160
|
+
100.times do
|
161
|
+
c.add(ComplexTest.new(rand, rand))
|
162
|
+
end
|
163
|
+
puts "It finished"
|
164
|
+
end
|
165
|
+
c = ComplexTest.new(1,1)
|
166
|
+
c = ComplexTest.new(1,1)
|
167
|
+
puts Profiler.profile_summary(true, true)
|
168
|
+
end
|
@@ -0,0 +1,523 @@
|
|
1
|
+
# Find the reductions in a parse table for a StateGraph.
|
2
|
+
#
|
3
|
+
# There can be reductions in all states with kernel items that are final, ie.
|
4
|
+
# where the position cannot be advanced any further
|
5
|
+
# (example: State(3, [S -> R.])).
|
6
|
+
#
|
7
|
+
# For states with only one kernel item (which is final which is always the
|
8
|
+
# case if its only one) we should reduce for
|
9
|
+
# all terminals in the follow set of the nonterminal (Follow(S) in the
|
10
|
+
# example above). These states are called consistent reduce states.
|
11
|
+
#
|
12
|
+
# When there are multiple kernel items and one (or several) of them is a final
|
13
|
+
# item we should reduce for terminals in Follow(state, nonterminal). These
|
14
|
+
# states are called inconsistent reduce states. An example is
|
15
|
+
# State(5, [S -> L .= R, R -> L.]) where we dont know whether to reduce
|
16
|
+
# or shift. Here, we should reduce by R -> L for all terminals in Follow(5, R).
|
17
|
+
#
|
18
|
+
# To reduce the amount of computation needed we use a hybrid, lazy approach.
|
19
|
+
# We calculate all first and follow sets for all symbols in the grammar.
|
20
|
+
# With this we can find the reductions for consistent reduce states. For the
|
21
|
+
# inconsistent ones we build the first to follow relations "from the back"
|
22
|
+
# (ie. from the actual follow sets we need) up to the first sets. This way
|
23
|
+
# we don't need to calculate relations that are not relevant.
|
24
|
+
#
|
25
|
+
require 'rpdf2txt-rockit/directed_graph'
|
26
|
+
require 'rpdf2txt-rockit/base_extensions'
|
27
|
+
require 'rpdf2txt-rockit/parsetable_generation'
|
28
|
+
|
29
|
+
require 'rpdf2txt-rockit/profiler'
|
30
|
+
|
31
|
+
#$PROFILE = true
|
32
|
+
|
33
|
+
class ReduceActionsGenerator
|
34
|
+
def initialize(stateGraph, grammar, parseTable, allItems)
|
35
|
+
@state_graph, @grammar, @parsetable = stateGraph, grammar, parseTable
|
36
|
+
set_index_numbers
|
37
|
+
precalc_items_at_nonterminal(allItems)
|
38
|
+
@lalr_pair_factory = IndexableFactory.new(LaLrPair, 0)
|
39
|
+
init_traverse_cache
|
40
|
+
end
|
41
|
+
|
42
|
+
def add_reduce_actions
|
43
|
+
Profiler.start if $PROFILE
|
44
|
+
add_actions_for_consistent_states
|
45
|
+
add_actions_for_inconsistent_states
|
46
|
+
@parsetable.compact!
|
47
|
+
puts Profiler.profile_summary(true, true) if $PROFILE
|
48
|
+
@parsetable
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
def init_traverse_cache
|
54
|
+
num_states = @state_graph.nodes.length
|
55
|
+
@traverse_cache = Array.new(num_states)
|
56
|
+
num_states.times {|i| @traverse_cache[i] = Hash.new}
|
57
|
+
end
|
58
|
+
|
59
|
+
def set_index_numbers
|
60
|
+
cnt = -1
|
61
|
+
@grammar.terminals.each {|t| t.index_number = (cnt += 1)}
|
62
|
+
@grammar.nonterminals.each {|nt| nt.index_number = (cnt += 1)}
|
63
|
+
@grammar.productions.each_with_index {|p,i| p.index_number = i}
|
64
|
+
end
|
65
|
+
|
66
|
+
def add_actions_for_consistent_states
|
67
|
+
Profiler.__enter__(:add_actions_for_consistent_states) if $PROFILE
|
68
|
+
crs = @state_graph.consistent_reduce_states
|
69
|
+
build_follow_sets(crs).each do |state|
|
70
|
+
state.final_items.each do |item|
|
71
|
+
prod = item.production
|
72
|
+
add_reduce_action_for_terminalset(state,
|
73
|
+
follow_set(prod.nonterminal), prod)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
Profiler.__leave__(:add_actions_for_consistent_states) if $PROFILE
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_follow_sets(states)
|
80
|
+
create_first_sets
|
81
|
+
precalc_epsilon_derivation
|
82
|
+
create_follow_sets
|
83
|
+
build_relations # Currently we build the full relations graphs
|
84
|
+
update_first_sets
|
85
|
+
update_follow_sets
|
86
|
+
states
|
87
|
+
end
|
88
|
+
|
89
|
+
def epsilon
|
90
|
+
return @epsilon if @epsilon
|
91
|
+
@epsilon = EpsilonToken.new
|
92
|
+
@epsilon.index_number = @grammar.terminals.map{|t| t.index_number}.max+1
|
93
|
+
@epsilon
|
94
|
+
end
|
95
|
+
|
96
|
+
def all_terminals
|
97
|
+
@all_terminals || (@all_terminals = @grammar.terminals + [epsilon])
|
98
|
+
end
|
99
|
+
|
100
|
+
def precalc_epsilon_derivation
|
101
|
+
@i2i_relations = DirectedGraph.new
|
102
|
+
@grammar.productions.each do |production|
|
103
|
+
first_element = production.elements.first
|
104
|
+
unless first_element
|
105
|
+
i2i_relation(@epsilon, production.nonterminal)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
propagate_terminal_sets([@epsilon], @i2i_relations) {|n| first_set(n)}
|
109
|
+
@grammar.nonterminals.each do |nt|
|
110
|
+
nt.derives_epsilon = first_set(nt).include?(@epsilon)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def update_first_sets
|
115
|
+
propagate_terminal_sets(@i2i_relations.roots, @i2i_relations) {|n| first_set(n)}
|
116
|
+
end
|
117
|
+
|
118
|
+
def update_follow_sets
|
119
|
+
@i2o_relations.each do |src, dests|
|
120
|
+
next unless dests
|
121
|
+
source_set = first_set(src)
|
122
|
+
dests.uniq.each {|d| follow_set(d).update source_set}
|
123
|
+
end
|
124
|
+
propagate_terminal_sets(@o2o_relations.roots, @o2o_relations) {|n| follow_set(n)}
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_first_sets
|
128
|
+
nonterms = @grammar.nonterminals
|
129
|
+
ntfs = @nonterminal_first_sets = Array.new(nonterms.length)
|
130
|
+
nonterms.each {|nt| ntfs[nt.index_number] = new_terminal_set}
|
131
|
+
terms = @grammar.terminals
|
132
|
+
tfs = @terminal_first_sets = Array.new(nonterms.length)
|
133
|
+
terms.each {|t| tfs[t.index_number] = new_terminal_set(t)}
|
134
|
+
end
|
135
|
+
|
136
|
+
def create_follow_sets
|
137
|
+
nonterms = @grammar.nonterminals
|
138
|
+
fs = @follow_sets = Array.new(nonterms.length)
|
139
|
+
nonterms.each {|nt| fs[nt.index_number] = new_terminal_set}
|
140
|
+
end
|
141
|
+
|
142
|
+
def build_relations
|
143
|
+
@i2o_relations = Hash.new
|
144
|
+
@o2o_relations = DirectedGraph.new
|
145
|
+
@productions_ending_with = Array.new
|
146
|
+
i = elements = nonterminal = last_nonterm = len = update_first = nil
|
147
|
+
@grammar.productions.each do |production|
|
148
|
+
elements = production.elements
|
149
|
+
nonterminal = production.nonterminal
|
150
|
+
i, updating, len, last_nonterm = 0, true, elements.length, nil
|
151
|
+
while i < len
|
152
|
+
x = elements[i]
|
153
|
+
i2i_relation(x, nonterminal) if updating
|
154
|
+
i2o_relation(x, last_nonterm) if last_nonterm
|
155
|
+
last_nonterm = x.nonterminal? ? x : nil
|
156
|
+
updating = updating and x.nonterminal? and x.derives_epsilon?
|
157
|
+
i += 1
|
158
|
+
end
|
159
|
+
i, updating = len-1, true
|
160
|
+
while updating and i >= 0
|
161
|
+
x = elements[i]
|
162
|
+
if x.nonterminal?
|
163
|
+
o2o_relation(nonterminal, x)
|
164
|
+
update_productions_ending_with(x, production, elements[0...i])
|
165
|
+
end
|
166
|
+
updating = x.nonterminal? and x.derives_epsilon?
|
167
|
+
i -= 1
|
168
|
+
end
|
169
|
+
end
|
170
|
+
# Should be @grammar.original_start_symbol but this will always work
|
171
|
+
# since Follow(S) includes Follow(S'). It will take care of the accept.
|
172
|
+
i2o_relation(@grammar.eof_terminal, @grammar.start_symbol)
|
173
|
+
end
|
174
|
+
|
175
|
+
def i2i_relation(src, dest)
|
176
|
+
@i2i_relations.link_nodes(src, dest)
|
177
|
+
end
|
178
|
+
|
179
|
+
def o2o_relation(src, dest)
|
180
|
+
@o2o_relations.link_nodes(src, dest)
|
181
|
+
end
|
182
|
+
|
183
|
+
def i2o_relation(src, dest)
|
184
|
+
a = @i2o_relations[src]
|
185
|
+
if a
|
186
|
+
a.push dest
|
187
|
+
else
|
188
|
+
@i2o_relations[src] = [dest]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def new_terminal_set(terminal = nil)
|
193
|
+
if terminal
|
194
|
+
TerminalSet.new(all_terminals, [terminal])
|
195
|
+
else
|
196
|
+
TerminalSet.new(all_terminals)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def first_set(symbol)
|
201
|
+
if symbol.nonterminal?
|
202
|
+
@nonterminal_first_sets[symbol.index_number]
|
203
|
+
else
|
204
|
+
@terminal_first_sets[symbol.index_number]
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
def add_actions_for_inconsistent_states
|
209
|
+
Profiler.__enter__(:add_actions_for_inconsistent_states) if $PROFILE
|
210
|
+
irs = @state_graph.inconsistent_reduce_states
|
211
|
+
build_lalr_follow_sets(irs).each do |state, pair|
|
212
|
+
state.final_items.each do |item|
|
213
|
+
add_reduce_action_for_terminalset(state, lalr_follow_set(pair),
|
214
|
+
item.production)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
Profiler.__leave__(:add_actions_for_inconsistent_states) if $PROFILE
|
218
|
+
end
|
219
|
+
|
220
|
+
def add_reduce_action_for_terminalset(state, terminalSet, production)
|
221
|
+
Profiler.__enter__(:add_reduce_action_for_terminalset, state, terminalSet, production) if $PROFILE
|
222
|
+
if production.nonterminal == @grammar.start_symbol
|
223
|
+
a = [:ACCEPT, 0]
|
224
|
+
else
|
225
|
+
a = [:REDUCE, production.index_number]
|
226
|
+
end
|
227
|
+
@parsetable.add_action_for_terminalset(state.index_number, a, terminalSet)
|
228
|
+
Profiler.__leave__(:add_reduce_action_for_terminalset) if $PROFILE
|
229
|
+
end
|
230
|
+
|
231
|
+
def propagate_terminal_sets(roots, graph = @relations, &getTerminalSet)
|
232
|
+
Profiler.__enter__(:propagate_terminal_sets, roots, graph) if $PROFILE
|
233
|
+
# Logothetis-Bermudez-style propagation. Does NOT exploit the fact that
|
234
|
+
# many propagations share a majority of paths. Exploit if speed needed!
|
235
|
+
# Note that this is not garantueed to work if there is a strongly connected
|
236
|
+
# component with more than two nodes. The intermediate nodes (only linked
|
237
|
+
# within the component) will not be updated after the backlink.
|
238
|
+
# Solve by doing a real reachability graph instead.
|
239
|
+
roots.each do |root|
|
240
|
+
graph.each_reachable_node_once_breadth_first(root) do |parent|
|
241
|
+
parent_fset = getTerminalSet.call(parent)
|
242
|
+
graph.children(parent).each do |child|
|
243
|
+
getTerminalSet.call(child).update parent_fset
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
247
|
+
Profiler.__leave__(:propagate_terminal_sets) if $PROFILE
|
248
|
+
end
|
249
|
+
|
250
|
+
def build_lalr_follow_sets(states)
|
251
|
+
Profiler.__enter__(:build_lalr_follow_sets) if $PROFILE
|
252
|
+
follow_sets_needed = Array.new
|
253
|
+
states.each do |state|
|
254
|
+
state.final_items.each do |item|
|
255
|
+
production = item.production
|
256
|
+
src_state = back_traverse(state, production.elements)
|
257
|
+
follow_sets_needed.push [state,
|
258
|
+
lalr_pair(src_state, production.nonterminal)]
|
259
|
+
end
|
260
|
+
end
|
261
|
+
build_o2o_lalr_relations
|
262
|
+
create_lalr_follow_sets
|
263
|
+
update_lalr_follow_sets_with_direct_followers
|
264
|
+
propagate_terminal_sets(@o2o_lalr_relations.roots, @o2o_lalr_relations) {|n| lalr_follow_set(n)}
|
265
|
+
Profiler.__leave__(:build_lalr_follow_sets) if $PROFILE
|
266
|
+
follow_sets_needed
|
267
|
+
end
|
268
|
+
|
269
|
+
def o2o_lalr_relation(src, dest)
|
270
|
+
@o2o_lalr_relations.link_nodes(src, dest)
|
271
|
+
end
|
272
|
+
|
273
|
+
LaLrPair = Struct.new("LaLrPair", :state, :nonterminal)
|
274
|
+
class LaLrPair
|
275
|
+
include Indexable
|
276
|
+
def inspect
|
277
|
+
"(#{state.index_number.inspect}, #{nonterminal.inspect})"
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
def lalr_pair(state, nonterminal)
|
282
|
+
@lalr_pair_factory.instance_with_args(state, nonterminal)
|
283
|
+
end
|
284
|
+
|
285
|
+
def precalc_items_at_nonterminal(allItems) # O(I)
|
286
|
+
@items_at_nonterminal = Array.new
|
287
|
+
allItems.each do |item|
|
288
|
+
symbol = item.symbol
|
289
|
+
if symbol and symbol.nonterminal?
|
290
|
+
update_items_at_nonterminal(symbol, item)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
@items_at_nonterminal
|
294
|
+
end
|
295
|
+
|
296
|
+
def update_items_at_nonterminal(nonterminal, item)
|
297
|
+
i = nonterminal.index_number
|
298
|
+
if (a = @items_at_nonterminal[i])
|
299
|
+
a.push item
|
300
|
+
else
|
301
|
+
@items_at_nonterminal[i] = [item]
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
def items_at_nonterminal(nonterminal)
|
306
|
+
@items_at_nonterminal[nonterminal.index_number]
|
307
|
+
end
|
308
|
+
|
309
|
+
def create_lalr_follow_sets
|
310
|
+
Profiler.__enter__(:create_lalr_follow_sets) if $PROFILE
|
311
|
+
lalr_pairs = @lalr_pair_factory.instances
|
312
|
+
@lalr_follow_sets = Array.new(lalr_pairs.length)
|
313
|
+
lalr_pairs.each do |pair|
|
314
|
+
@lalr_follow_sets[pair.index_number] = new_terminal_set
|
315
|
+
end
|
316
|
+
Profiler.__leave__(:create_lalr_follow_sets) if $PROFILE
|
317
|
+
end
|
318
|
+
|
319
|
+
def lalr_follow_set(lalrPair)
|
320
|
+
@lalr_follow_sets[lalrPair.index_number]
|
321
|
+
end
|
322
|
+
|
323
|
+
def update_lalr_follow_sets_with_direct_followers
|
324
|
+
Profiler.__enter__(:update_lalr_follow_sets_with_direct_followers) if $PROFILE
|
325
|
+
@lalr_pair_factory.instances.each do |pair|
|
326
|
+
if pair.nonterminal == @grammar.start_symbol
|
327
|
+
lalr_follow_set(pair).update first_set(@grammar.eof_terminal)
|
328
|
+
end
|
329
|
+
items = items_at_nonterminal(pair.nonterminal)
|
330
|
+
next unless items
|
331
|
+
items.each do |item|
|
332
|
+
item.direct_following_symbols.each do |symbol|
|
333
|
+
lalr_follow_set(pair).update first_set(symbol)
|
334
|
+
end
|
335
|
+
end
|
336
|
+
end
|
337
|
+
Profiler.__leave__(:update_lalr_follow_sets_with_direct_followers) if $PROFILE
|
338
|
+
end
|
339
|
+
|
340
|
+
def productions_ending_with2(nonterminal, withLastPrefix = nil)
|
341
|
+
if withLastPrefix
|
342
|
+
@productions_ending_with[nonterminal.index_number][withLastPrefix.index_number]
|
343
|
+
else
|
344
|
+
@productions_ending_with_and_having_empty_prefix
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
def update_productions_ending_with2(nonterminal, production, prefix)
|
349
|
+
ntindex = nonterminal.index_number
|
350
|
+
pindex = prefix.last.index_number
|
351
|
+
new_element = [production, prefix]
|
352
|
+
if a = @productions_ending_with[ntindex]
|
353
|
+
if a2 = @productions_ending_with[ntindex][pindex]
|
354
|
+
a2.push(new_element) unless a2.include?(new_element)
|
355
|
+
else
|
356
|
+
@productions_ending_with[ntindex][pindex] = [new_element]
|
357
|
+
end
|
358
|
+
else
|
359
|
+
@productions_ending_with[ntindex] = Array.new
|
360
|
+
@productions_ending_with[ntindex][pindex] = [new_element]
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
def productions_ending_with(nonterminal)
|
365
|
+
@productions_ending_with[nonterminal.index_number]
|
366
|
+
end
|
367
|
+
|
368
|
+
def update_productions_ending_with(nonterminal, production, prefix)
|
369
|
+
ntindex = nonterminal.index_number
|
370
|
+
new_element = [production, prefix]
|
371
|
+
if a = @productions_ending_with[ntindex]
|
372
|
+
a.push(new_element) unless a.index(new_element)
|
373
|
+
else
|
374
|
+
@productions_ending_with[ntindex] = [new_element]
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
@@empty_array = Array.new
|
379
|
+
|
380
|
+
# We discard prefixes where the first symbol is not valid directly
|
381
|
+
# to save time in back_traverse.
|
382
|
+
def productions_with_valid_prefixes(pair)
|
383
|
+
Profiler.__enter__(:productions_with_valid_prefixes) if $PROFILE
|
384
|
+
prods = productions_ending_with(pair.nonterminal)
|
385
|
+
unless prods
|
386
|
+
Profiler.__leave__(:productions_with_valid_prefixes) if $PROFILE
|
387
|
+
return @@empty_array
|
388
|
+
end
|
389
|
+
incoming = @state_graph.incoming_links_info(pair.state)
|
390
|
+
res = prods.select do |prod, prefix|
|
391
|
+
prefix.length == 0 or incoming.include?(prefix.last)
|
392
|
+
end
|
393
|
+
Profiler.__leave__(:productions_with_valid_prefixes) if $PROFILE
|
394
|
+
res
|
395
|
+
end
|
396
|
+
|
397
|
+
def build_o2o_lalr_relations
|
398
|
+
Profiler.__enter__(:build_o2o_lalr_relations) if $PROFILE
|
399
|
+
@o2o_lalr_relations = DirectedGraph.new
|
400
|
+
current = 0
|
401
|
+
while current < @lalr_pair_factory.instances.length
|
402
|
+
pair = @lalr_pair_factory.instances[current]
|
403
|
+
productions = productions_with_valid_prefixes(pair)
|
404
|
+
productions.each do |prod, prefix|
|
405
|
+
q = back_traverse(pair.state, prefix)
|
406
|
+
if q
|
407
|
+
src_pair = lalr_pair(q, prod.nonterminal)
|
408
|
+
o2o_lalr_relation(src_pair, pair)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
current += 1
|
412
|
+
end
|
413
|
+
#puts "Num LaLrPairs = #{current.inspect}"
|
414
|
+
Profiler.__leave__(:build_o2o_lalr_relations) if $PROFILE
|
415
|
+
end
|
416
|
+
|
417
|
+
def back_traverse(state, elements)
|
418
|
+
Profiler.__enter__(:back_traverse, state, elements) if $PROFILE
|
419
|
+
index = state.index_number
|
420
|
+
dest = @traverse_cache[index][elements]
|
421
|
+
unless dest
|
422
|
+
begin
|
423
|
+
dest = @state_graph.back_traverse(state, elements)
|
424
|
+
@traverse_cache[index][elements] = dest
|
425
|
+
rescue GraphTraversalException => e
|
426
|
+
dest = nil
|
427
|
+
end
|
428
|
+
end
|
429
|
+
Profiler.__leave__(:back_traverse) if $PROFILE
|
430
|
+
dest
|
431
|
+
end
|
432
|
+
|
433
|
+
def follow_set(symbol)
|
434
|
+
@follow_sets[symbol.index_number]
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
class TerminalSet
|
439
|
+
# With fast union and add operations. Actually its actually a general
|
440
|
+
# implementation and should probably be called IndexableObjectSet.
|
441
|
+
|
442
|
+
def initialize(allPossibleMembers, members = [], all = nil, max = nil)
|
443
|
+
@all_possible_members = allPossibleMembers
|
444
|
+
@max_index = max || allPossibleMembers.map{|m| m.index_number}.max
|
445
|
+
@all = all || (2**(@max_index+1))-1
|
446
|
+
@included = 0 # We represent as bit vector in Integer.
|
447
|
+
members.each {|t| add(t)}
|
448
|
+
end
|
449
|
+
|
450
|
+
@@empty_array = Array.new
|
451
|
+
|
452
|
+
def TerminalSet.new_from_integer(allPossibleMembers, included, all = nil,
|
453
|
+
max = nil)
|
454
|
+
ts = new(allPossibleMembers, @@empty_array, all, max)
|
455
|
+
ts.set_include_vector(included)
|
456
|
+
ts
|
457
|
+
end
|
458
|
+
|
459
|
+
def add(terminal)
|
460
|
+
raise ArgumentError unless @all_possible_members.include?(terminal)
|
461
|
+
begin
|
462
|
+
@included |= mask(terminal.index_number)
|
463
|
+
rescue Exception
|
464
|
+
puts "TerminalSet: #{@all_possible_members.inspect} but was #{terminal.inspect}"
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
def update(other)
|
469
|
+
@included |= other.to_i
|
470
|
+
end
|
471
|
+
|
472
|
+
def -(other)
|
473
|
+
# 1 in result if 1 in @included and 0 in other =>
|
474
|
+
#
|
475
|
+
TerminalSet.new_from_integer(@all_possible_members,
|
476
|
+
@included & (@all - other.to_i), @all,
|
477
|
+
@max_index)
|
478
|
+
end
|
479
|
+
|
480
|
+
def empty?
|
481
|
+
@included == 0
|
482
|
+
end
|
483
|
+
|
484
|
+
def terminals
|
485
|
+
@all_possible_members.select {|t| index_included?(t.index_number)}
|
486
|
+
end
|
487
|
+
|
488
|
+
def index_included?(index)
|
489
|
+
@included & mask(index) > 0
|
490
|
+
end
|
491
|
+
|
492
|
+
def include?(terminal)
|
493
|
+
index_included?(terminal.index_number)
|
494
|
+
end
|
495
|
+
|
496
|
+
def inspect
|
497
|
+
terminals.inspect
|
498
|
+
end
|
499
|
+
|
500
|
+
def each
|
501
|
+
Profiler.__enter__(:TerminalSet_each) if $PROFILE
|
502
|
+
@all_possible_members.each do |t|
|
503
|
+
yield(t) if index_included?(t.index_number)
|
504
|
+
end
|
505
|
+
Profiler.__leave__(:TerminalSet_each) if $PROFILE
|
506
|
+
end
|
507
|
+
|
508
|
+
def set_include_vector(newVector)
|
509
|
+
@included = newVector
|
510
|
+
end
|
511
|
+
|
512
|
+
def to_i
|
513
|
+
@included
|
514
|
+
end
|
515
|
+
|
516
|
+
protected
|
517
|
+
|
518
|
+
@@masks = Array.new
|
519
|
+
|
520
|
+
def mask(index)
|
521
|
+
@@masks[index] || (@@masks[index] = (1<<index))
|
522
|
+
end
|
523
|
+
end
|