rpdf2txt 0.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (127) hide show
  1. data/History.txt +5 -0
  2. data/LICENCE +515 -0
  3. data/Manifest.txt +126 -0
  4. data/README.txt +30 -0
  5. data/Rakefile +24 -0
  6. data/bin/rpdf2txt +58 -0
  7. data/config.save +12 -0
  8. data/install.rb +1098 -0
  9. data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
  10. data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
  11. data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
  12. data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
  13. data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
  14. data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
  15. data/lib/rpdf2txt-rockit/grammar.rb +644 -0
  16. data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
  17. data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
  18. data/lib/rpdf2txt-rockit/indexable.rb +53 -0
  19. data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
  20. data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
  21. data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
  22. data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
  23. data/lib/rpdf2txt-rockit/profiler.rb +168 -0
  24. data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
  25. data/lib/rpdf2txt-rockit/rockit.rb +76 -0
  26. data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
  27. data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
  28. data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
  29. data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
  30. data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
  31. data/lib/rpdf2txt-rockit/token.rb +364 -0
  32. data/lib/rpdf2txt-rockit/version.rb +3 -0
  33. data/lib/rpdf2txt/attributesparser.rb +42 -0
  34. data/lib/rpdf2txt/cmapparser.rb +65 -0
  35. data/lib/rpdf2txt/data/_cmap.grammar +11 -0
  36. data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
  37. data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
  38. data/lib/rpdf2txt/data/cmap.grammar +11 -0
  39. data/lib/rpdf2txt/data/cmap.rb +37 -0
  40. data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
  41. data/lib/rpdf2txt/data/cmap_range.rb +43 -0
  42. data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
  43. data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
  44. data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
  45. data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
  46. data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
  47. data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
  48. data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
  49. data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
  50. data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
  51. data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
  52. data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
  53. data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
  54. data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
  55. data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
  56. data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
  57. data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
  58. data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
  59. data/lib/rpdf2txt/data/pdftext.grammar +102 -0
  60. data/lib/rpdf2txt/data/pdftext.rb +146 -0
  61. data/lib/rpdf2txt/default_handler.rb +352 -0
  62. data/lib/rpdf2txt/lzw.rb +69 -0
  63. data/lib/rpdf2txt/object.rb +1114 -0
  64. data/lib/rpdf2txt/parser.rb +169 -0
  65. data/lib/rpdf2txt/symbol.rb +408 -0
  66. data/lib/rpdf2txt/text.rb +182 -0
  67. data/lib/rpdf2txt/text_state.rb +434 -0
  68. data/lib/rpdf2txt/textparser.rb +42 -0
  69. data/test/data/3392_obj +0 -0
  70. data/test/data/397_decrypted +15 -0
  71. data/test/data/450_decrypted +153 -0
  72. data/test/data/450_obj +0 -0
  73. data/test/data/452_decrypted +125 -0
  74. data/test/data/454_decrypted +108 -0
  75. data/test/data/456_decrypted +106 -0
  76. data/test/data/458_decrypted +111 -0
  77. data/test/data/458_obj +0 -0
  78. data/test/data/460_decrypted +118 -0
  79. data/test/data/460_obj +0 -0
  80. data/test/data/463_decrypted +117 -0
  81. data/test/data/465_decrypted +107 -0
  82. data/test/data/465_obj +0 -0
  83. data/test/data/90_obj +0 -0
  84. data/test/data/90_obj_comp +1 -0
  85. data/test/data/decrypted +0 -0
  86. data/test/data/encrypt_obj +0 -0
  87. data/test/data/encrypt_string +0 -0
  88. data/test/data/encrypt_string_128bit +0 -0
  89. data/test/data/encrypted_object_stream.pdf +0 -0
  90. data/test/data/firststream +1 -0
  91. data/test/data/index.pdfobj +0 -0
  92. data/test/data/index_2bit.pdfobj +0 -0
  93. data/test/data/index_masked.pdfobj +0 -0
  94. data/test/data/indexed.pdfobj +0 -0
  95. data/test/data/indexed_2bit.pdfobj +0 -0
  96. data/test/data/indexed_masked.pdfobj +0 -0
  97. data/test/data/inline.png +0 -0
  98. data/test/data/logo.png +0 -0
  99. data/test/data/lzw.pdfobj +0 -0
  100. data/test/data/lzw_index.pdfobj +0 -0
  101. data/test/data/page_tree.pdf +148 -0
  102. data/test/data/pdf_20.png +0 -0
  103. data/test/data/pdf_21.png +0 -0
  104. data/test/data/pdf_22.png +0 -0
  105. data/test/data/pdf_50.png +0 -0
  106. data/test/data/png.pdfobj +0 -0
  107. data/test/data/space_bug_stream.txt +119 -0
  108. data/test/data/stream.txt +292 -0
  109. data/test/data/stream_kerning_bug.txt +13 -0
  110. data/test/data/stream_kerning_bug2.txt +6 -0
  111. data/test/data/test.pdf +0 -0
  112. data/test/data/test.txt +8 -0
  113. data/test/data/test_text.txt +42 -0
  114. data/test/data/working_obj +0 -0
  115. data/test/data/working_obj2 +0 -0
  116. data/test/mock.rb +149 -0
  117. data/test/suite.rb +30 -0
  118. data/test/test_pdf_object.rb +1802 -0
  119. data/test/test_pdf_parser.rb +1340 -0
  120. data/test/test_pdf_text.rb +789 -0
  121. data/test/test_space_bug_05_2004.rb +87 -0
  122. data/test/test_stream.rb +194 -0
  123. data/test/test_text_state.rb +315 -0
  124. data/usage-en.txt +112 -0
  125. data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
  126. data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
  127. metadata +220 -0
@@ -0,0 +1,107 @@
1
+
2
+ def stack_to_node(stack, includeShape = false)
3
+ str = "#{stack.state} (#{stack.lexer.position.char_position})"
4
+ if includeShape
5
+ str = str.inspect + " [shape=box]"
6
+ end
7
+ str
8
+ end
9
+
10
+ def parsestack_as_dot_digraph(stack, nodes = Hash.new, links = Hash.new)
11
+ nodes[stack] = stack_to_node(stack, true)
12
+ stack.links.each do |link|
13
+ nodes, links = parsestack_as_dot_digraph(link.stack, nodes, links)
14
+ links[link] = "#{stack_to_node(stack).inspect} -> #{stack_to_node(link.stack).inspect} [label=#{link.tree.inspect_compact.inspect}]"
15
+ end
16
+ return nodes, links
17
+ end
18
+
19
+ def parsestacks_as_dot_digraph(stacks)
20
+ nodes, links = {}, {}
21
+ stacks.each do |stack|
22
+ nodes, links = parsestack_as_dot_digraph(stack, nodes, links)
23
+ end
24
+ "digraph G {\nsize=\"8,11\"\n" +
25
+ nodes.values.uniq.join("\n") + "\n" +
26
+ links.values.uniq.join("\n") + "\n" +
27
+ "}"
28
+ end
29
+
30
+ class DotGraphPrinter
31
+ def initialize(size = "11,9", orientation = "landscape")
32
+ @size, @orientation = size, orientation
33
+ end
34
+
35
+ def to_graph(ast)
36
+ @nodes, @links = Hash.new, Hash.new
37
+ eval_to_dot(ast, nil)
38
+ to_graph_from_nodes_and_links(@nodes, @links)
39
+ end
40
+
41
+ protected
42
+
43
+ def to_graph_from_nodes_and_links(nodes, links)
44
+ "digraph G {\n" +
45
+ "size = #{@size.inspect}\n" +
46
+ "orientation = #{@orientation}\n" +
47
+ @nodes.values.uniq.join("\n") + "\n" +
48
+ @links.values.uniq.join("\n") + "\n" +
49
+ "}"
50
+ end
51
+ end
52
+
53
+ class SyntaxTreeAsDotGraph < DotGraphPrinter
54
+ def add_parent_link(parent, child, label = nil, weight = nil)
55
+ if parent
56
+ @links[[parent, child]] =
57
+ "#{parent.id} -> #{child.id}"
58
+ if label or weight
59
+ @links[[parent, child]] += " [" +
60
+ (label ? "label=#{label.inspect}" : "") +
61
+ ((label and weight) ? "," : "") +
62
+ (weight ? "weight=#{weight.inspect}" : "") +
63
+ "]"
64
+ end
65
+ end
66
+ end
67
+
68
+ def eval_to_dot(ast, parent = nil, linkname = nil, weight = nil)
69
+ if ast
70
+ if ast.kind_of?(SyntaxTree)
71
+ case ast.name
72
+ when "_ArrayNode"
73
+ add_parent_link(parent, ast, linkname, weight)
74
+ @nodes[ast] = "#{ast.id} [label=" + '"[]"]'
75
+ ast.each_with_index {|c,i| eval_to_dot(c, ast, i.inspect)}
76
+ else
77
+ if parent
78
+ end
79
+ # Special handling of Token nodes since we only want to print
80
+ # the lexeme
81
+ if ast.children_names.sort == ["lexeme", "value"].sort
82
+ @nodes[ast] = "#{ast.id} [shape=box,label=#{ast.lexeme.inspect}]"
83
+ add_parent_link(parent, ast, linkname, weight)
84
+ else
85
+ add_parent_link(parent, ast, linkname, weight)
86
+ @nodes[ast] = "#{ast.id} [label=#{ast.name.inspect}]"
87
+ ast.childrens.each_with_index {|c,i|
88
+ eval_to_dot(c, ast, ast.children_names[i])
89
+ }
90
+ end
91
+ end
92
+ elsif ast.class == Array
93
+ # Or nodes return array but they should return ArrayNodes...
94
+ add_parent_link(parent, ast, linkname, weight)
95
+ @nodes[ast] = "#{ast.id} [label=\"[]\"]"
96
+ ast.each_with_index {|c,i| eval_to_dot(c, ast, i)}
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ def syntaxtree_as_dot_digraph(syntaxtree)
103
+ SyntaxTreeAsDotGraph.new.to_graph(syntaxtree)
104
+ end
105
+
106
+
107
+
@@ -0,0 +1,63 @@
1
+ class DotGraphFormatter
2
+ @@default_node_shaper = proc{|n| "box"}
3
+ @@default_node_labeler = proc{|n| n.inspect}
4
+ @@default_link_labeler = proc{|info| info ? info.inspect : nil}
5
+
6
+ def initialize(nodeShaper = nil, nodeLabeler = nil, linkLabeler = nil,
7
+ size = "11,9", orientation = "landscape")
8
+ @node_shaper = nodeShaper || @@default_node_shaper
9
+ @node_labeler = nodeLabeler || @@default_node_labeler
10
+ @link_labeler = linkLabeler || @@default_link_labeler
11
+ @size, @orientation = size, orientation
12
+ end
13
+
14
+ # nodes is array of node objects
15
+ # links is either array of
16
+ # arrays [fromNode, toNode [, infoOnLink]], or
17
+ # objects with attributes :from, :to, :info
18
+ def format(nodes, links)
19
+ DotGraph.new("digraph G {\n" +
20
+ "size = #{@size.inspect}\n" +
21
+ "orientation = #{@orientation}\n" +
22
+ nodes.uniq.map {|n| format_node(n)}.join("\n") + "\n" +
23
+ links.uniq.map {|l| format_link(l)}.join("\n") + "\n" +
24
+ "}"
25
+ )
26
+ end
27
+
28
+ protected
29
+
30
+ def format_node(node)
31
+ node.id.inspect + " [" +
32
+ "shape=" + @node_shaper.call(node).inspect + ", " +
33
+ "label=" + @node_labeler.call(node).inspect + "]"
34
+ end
35
+
36
+ def get_link_data(link)
37
+ begin
38
+ return link.from, link.to, link.info
39
+ rescue Exception
40
+ return link[0], link[1], link[2]
41
+ end
42
+ end
43
+
44
+ def format_link(link)
45
+ from, to, info = get_link_data(link)
46
+ label = @link_labeler.call(info)
47
+ from.id.inspect + " -> " + to.id.inspect +
48
+ (label ? " [label=" + label.inspect + "]" : "")
49
+ end
50
+ end
51
+
52
+ DotGraph = Struct.new("DotGraph", :description)
53
+ class DotGraph
54
+ def write_to_file(filename)
55
+ tmpfile = filename + rand(100000).inspect
56
+ while test(?f, tmpfile)
57
+ tmpfile = filename + rand(100000).inspect
58
+ end
59
+ File.open(tmpfile, "w") {|f| f.write description}
60
+ system "dot -Tps -o #{filename} #{tmpfile}"
61
+ File.delete(tmpfile)
62
+ end
63
+ end
@@ -0,0 +1,53 @@
1
+ module Indexable
2
+ attr_accessor :index_number # attr_reader_once_write instead?
3
+ attr_accessor :factory
4
+ end
5
+
6
+ class IndexableFactory
7
+ attr_reader :instances, :start_index, :next_index
8
+
9
+ def initialize(klass, startIndex = 0)
10
+ unless klass.ancestors.include?(Indexable)
11
+ raise ArgumentError, "#{klass.inspect} is not Indexable"
12
+ end
13
+ @klass, @start_index, @next_index = klass, startIndex, startIndex
14
+ @instance_map, @instances = Hash.new, Array.new
15
+ end
16
+
17
+ def make(*args)
18
+ obj = @instance_map[args]
19
+ unless obj
20
+ @instance_map[args] = obj = make_new_obj(args)
21
+ @instances.push obj
22
+ end
23
+ obj
24
+ end
25
+
26
+ def make_unless_exists(*args)
27
+ new_instance = @instance_map[args] == nil
28
+ return make(*args), new_instance
29
+ end
30
+
31
+ def get_instance(*args)
32
+ @instance_map[args]
33
+ end
34
+
35
+ def instance_with_args(*args)
36
+ @instance_map[args] || (@instance_map[args] = make(*args))
37
+ end
38
+
39
+ protected
40
+
41
+ def make_new_obj(arguments)
42
+ obj = @klass.new(*arguments)
43
+ obj.index_number = advance_index_number
44
+ obj.factory = self
45
+ obj
46
+ end
47
+
48
+ def advance_index_number
49
+ i = @next_index
50
+ @next_index += 1
51
+ i
52
+ end
53
+ end
@@ -0,0 +1,144 @@
1
+ require 'rpdf2txt-rockit/grammar'
2
+ require 'rpdf2txt-rockit/parse_table'
3
+ require 'rpdf2txt-rockit/parsetable_generation'
4
+ require 'rpdf2txt-rockit/reduce_actions_generator'
5
+
6
+ require 'rpdf2txt-rockit/profiler'
7
+
8
+ module Parse
9
+ class StateGraph < BackLinkedDirectedGraph
10
+ attr_reader :start_state
11
+ attr_reader :consistent_reduce_states, :inconsistent_reduce_states
12
+
13
+ def initialize(startState, *args)
14
+ super(*args)
15
+ @start_state = startState
16
+ @consistent_reduce_states = Array.new
17
+ @inconsistent_reduce_states = Array.new
18
+ end
19
+
20
+ def add_node(state)
21
+ super(state)
22
+ if state.reduce_state?
23
+ if state.consistent?
24
+ a = @consistent_reduce_states
25
+ else
26
+ a = @inconsistent_reduce_states
27
+ end
28
+ a.push state unless a.include?(state)
29
+ end
30
+ end
31
+
32
+ @@node_labeler_with_kernels = proc{|state|
33
+ "S" + state.index_number.inspect + ": " + state.kernel_items.inspect
34
+ }
35
+
36
+ @@node_labeler = proc{|state|
37
+ "S" + state.index_number.inspect
38
+ }
39
+
40
+ def to_postscript_file(filename, withKernelItems = true)
41
+ super(filename, nil,
42
+ withKernelItems ? @@node_labeler_with_kernels : @@node_labeler )
43
+ end
44
+ end
45
+
46
+ class LaLr1ParseTableGenerator
47
+ def initialize(grammar,
48
+ lookaheadCalculatorKlass =
49
+ ReduceActionsGenerator)
50
+ @grammar, @lookahead_calculator_klass = grammar, lookaheadCalculatorKlass
51
+ end
52
+
53
+ def generate_parse_table(parseTableKlass = ParseTable)
54
+ @grammar.augment
55
+ #puts "\n NoN = #{@grammar.productions.map{|p| p.nonterminal}.uniq.length}"
56
+ #puts " NoP = #{@grammar.productions.length}"
57
+ time_and_puts("\n Normalizing grammar") {
58
+ @grammar.normalize
59
+ }
60
+ #puts " NoPN = #{@grammar.productions.length}"
61
+ state_graph = nil
62
+ time_and_puts(" Calculating states") {
63
+ @item_factory = IndexableFactory.new(Item, 0)
64
+ @state_factory = IndexableFactory.new(LrState, 0)
65
+ precalc_nonkernel_items_for_nonterminals
66
+ @parse_table = parseTableKlass.new_from_grammar(@grammar)
67
+ state_graph = calculate_state_graph # also adds gotos and shift actions
68
+ }
69
+ time_and_puts(" Calculating lalr1_lookaheads") {
70
+ lookahead_alg =
71
+ @lookahead_calculator_klass.new(state_graph, @grammar, @parse_table,
72
+ @item_factory.instances)
73
+ lookahead_alg.add_reduce_actions
74
+ }
75
+ @parse_table
76
+ end
77
+
78
+ def test_nonterminal_uniqueness
79
+ nts = DefaultInitHash.new {|k| Array.new}
80
+ @grammar.productions.each do |production|
81
+ nts[production.nonterminal.name].push(production.nonterminal.id)
82
+ nts[production.nonterminal.name].uniq!
83
+ production.elements.each do |e|
84
+ if e.nonterminal?
85
+ nts[e.name].push(e.id)
86
+ nts[e.name].uniq!
87
+ end
88
+ end
89
+ end
90
+ puts nts.inspect
91
+ end
92
+
93
+ protected
94
+
95
+ def precalc_nonkernel_items_for_nonterminals
96
+ @grammar.nonterminals.each do |nt|
97
+ nt.calc_nonkernel_items(@grammar, @item_factory)
98
+ end
99
+ end
100
+
101
+ def add_state_unless_exists(kernel_items)
102
+ @state_factory.make_unless_exists(kernel_items)
103
+ end
104
+
105
+ # Calculate the state graph by constructing the sets-of-lr0-items
106
+ # collection.
107
+ # See page 224 (basic algorithm) and 240 (representing the states by
108
+ # their kernel items) in the Dragon book
109
+ def calculate_state_graph
110
+ Profiler.__enter__(:calculate_state_graph) if $PROFILE
111
+ start_item = @item_factory.make(@grammar.productions[0], 0)
112
+ state_graph = StateGraph.new(add_state_unless_exists([start_item]).first)
113
+ states, current = [state_graph.start_state], 0
114
+ dest_sets, next_item = DefaultInitHash.new {|k| Array.new}, nil
115
+ while current < states.length
116
+ state = states[current]
117
+ dest_sets.clear
118
+ state.closure.each do |item|
119
+ symbol = item.symbol
120
+ if symbol
121
+ next_item = item.next_item
122
+ dest_sets[symbol].push(next_item) if next_item
123
+ end
124
+ end
125
+ dest_sets.each do |symbol, kernel_item_set|
126
+ kernel_item_set.uniq! # Needed?
127
+ dest_state, new_state = add_state_unless_exists(kernel_item_set)
128
+ states.push(dest_state) if new_state
129
+ state_graph.add_link(state, dest_state, symbol)
130
+ if symbol.nonterminal?
131
+ @parse_table.add_goto(state.index_number, symbol,
132
+ dest_state.index_number)
133
+ else
134
+ @parse_table.add_action(state.index_number, symbol,
135
+ [:SHIFT, dest_state.index_number])
136
+ end
137
+ end
138
+ current += 1
139
+ end
140
+ Profiler.__leave__(:calculate_state_graph, state_graph) if $PROFILE
141
+ state_graph
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,273 @@
1
+ require 'rpdf2txt-rockit/sourcecode_dumpable'
2
+ require 'rpdf2txt-rockit/grammar'
3
+ require 'rpdf2txt-rockit/base_extensions'
4
+
5
+ require 'rpdf2txt-rockit/profiler'
6
+
7
+ class ParseTable
8
+ include SourceCodeDumpable
9
+ attr_reader :start_state, :tokens, :priorities
10
+ attr_accessor :language
11
+
12
+ def ParseTable.new_from_grammar(aGrammar)
13
+ pt = self.new(aGrammar.productions, aGrammar.tokens, aGrammar.priorities)
14
+ pt.language = aGrammar.name || "UNNAMED_LANGUAGE"
15
+ pt
16
+ end
17
+
18
+ # We save the actions in a compact numerical way to save space and time:
19
+ # The action table is an array of arrays. Each state number is an index
20
+ # into the array and its array contains an even number of integers.
21
+ # Each pair of integers represent one unique action. The first of the
22
+ # integers is the action and the second is the number representing the
23
+ # terminals for which it apply. The least significant 'action_bits' bits
24
+ # of the action number determines the type of action by giving an index
25
+ # into the 'action_map'. Its default value is:
26
+ # [:REDUCE, :SHIFT, :ACCEPT]
27
+ # so that
28
+ @@default_action_map = [:REDUCE, :SHIFT, :ACCEPT]
29
+
30
+ def initialize(productions, tokens, priorities = nil,
31
+ actionTable = ArrayOfArrays.new, gotoHash = Hash.new,
32
+ actionBits = 2, actionMap = @@default_action_map)
33
+ @productions, @start_state, @language = productions, 0, "UNNAMED_LANGUAGE"
34
+ @priorities = priorities
35
+ @tokens, @nonterminals = tokens, nonterminals(productions)
36
+ @action_table, @goto_hash = actionTable, gotoHash
37
+ @action_cache = ArrayOfHashes.new
38
+ @mask = Array.new
39
+ @action_map, @action_bits, @action_mask = actionMap, actionBits, 0
40
+ while actionBits > 0
41
+ @action_mask = (@action_mask << 1) | 1
42
+ actionBits -= 1
43
+ end
44
+ init_productionnum_to_nonterminal_number_hash
45
+ init_tokentype_to_token_number_hash
46
+ end
47
+
48
+ def num_states
49
+ @action_table.length
50
+ end
51
+
52
+ def ==(other)
53
+ other.class == self.class and
54
+ other.productions == @productions and
55
+ other.tokens == @tokens and
56
+ other.action_table == @action_table and
57
+ other.goto_hash == @goto_hash and
58
+ other.start_state == @start_state
59
+ end
60
+
61
+ def add_action(state, aTokenType, action)
62
+ Profiler.__enter__(:ParseTable_add_action, state, aTokenType, action) if $PROFILE
63
+ @action_cache[state].clear
64
+ actionnum = action_to_actionnum(action)
65
+ @action_table[state] << actionnum << token_to_terminalset(aTokenType)
66
+ Profiler.__leave__(:ParseTable_add_action) if $PROFILE
67
+ end
68
+
69
+ def add_action_for_terminalset(state, action, terminalSet)
70
+ Profiler.__enter__(:ParseTable_add_action_for_terminalset, state, action, terminalSet) if $PROFILE
71
+ @action_table[state] << action_to_actionnum(action) << terminalSet.to_i
72
+ Profiler.__leave__(:ParseTable_add_action_for_terminalset) if $PROFILE
73
+ end
74
+
75
+ # Unify terminal sets for identical actions
76
+ def compact!
77
+ Profiler.__enter__(:ParseTable_compact!) if $PROFILE
78
+ actions, i, new_index = Hash.new, 0, 0
79
+ @action_table.map! do |actionnums|
80
+ actions.clear;
81
+ i, new_actionnums, new_index = 0, Array.new, 0
82
+ while i < actionnums.length
83
+ if (index = actions[actionnums[i]])
84
+ new_actionnums[index+1] |= actionnums[i+1]
85
+ else
86
+ actions[actionnums[i]] = new_index
87
+ new_index += 2
88
+ new_actionnums << actionnums[i] << actionnums[i+1]
89
+ end
90
+ i += 2
91
+ end
92
+ new_actionnums
93
+ end
94
+ Profiler.__leave__(:ParseTable_compact!) if $PROFILE
95
+ end
96
+
97
+ def token_to_terminalset(aTokenType)
98
+ mask(@token_to_number[aTokenType])
99
+ end
100
+
101
+ def mask(index)
102
+ @mask[index] || (@mask[index] = (1 << index))
103
+ end
104
+
105
+ def add_goto(state, aNonTerminal, newState)
106
+ begin
107
+ @goto_hash[state][@nonterminals.index(aNonTerminal)] = newState
108
+ rescue NameError
109
+ @goto_hash[state] = Hash.new
110
+ retry
111
+ end
112
+ end
113
+
114
+ def actions(state, tokenType)
115
+ actions = @action_cache[state][tokenType]
116
+ unless actions
117
+ actions = Array.new
118
+ actionnums = @action_table[state]
119
+ token_mask = mask(@token_to_number[tokenType])
120
+ i = 0
121
+ while i < actionnums.length
122
+ if(actionnums[i+1] & token_mask > 0)
123
+ actions.push actionnum_to_action(actionnums[i])
124
+ end
125
+ i += 2
126
+ end
127
+ @action_cache[state][tokenType] = actions
128
+ end
129
+ actions
130
+ end
131
+
132
+ def valid_tokens(state)
133
+ terminal_set = 0
134
+ each_terminalset(state) {|ts| terminal_set |= ts}
135
+ terminalset_to_terminals(terminal_set)
136
+ end
137
+
138
+ def each_terminalset(state)
139
+ @action_table[state].each_with_index {|e,i| yield(e) if i % 2 == 1}
140
+ end
141
+
142
+ def terminalset_to_terminals(terminalSet)
143
+ @tokens.select {|t| terminalSet & mask(@token_to_number[t]) > 0}
144
+ end
145
+
146
+ def actionnum_to_action(actionNumber)
147
+ [@action_map[actionNumber & @action_mask], actionNumber >> @action_bits]
148
+ end
149
+
150
+ def action_to_actionnum(action)
151
+ Profiler.__enter__(:ParseTable_action_to_actionnum, action) if $PROFILE
152
+ res = @action_map.index(action[0]) + (action[1] << @action_bits)
153
+ Profiler.__leave__(:ParseTable_action_to_actionnum) if $PROFILE
154
+ res
155
+ end
156
+
157
+ def goto(state, productionNumber)
158
+ begin
159
+ @goto_hash[state][@productionnum_to_nonterminal_num[productionNumber]]
160
+ rescue Exception
161
+ nil
162
+ end
163
+ end
164
+
165
+ def production(number)
166
+ @productions[number]
167
+ end
168
+
169
+ def to_src(name = "parse_table", nameHash = {})
170
+ names = name_hash(@tokens) {|t| "t"}
171
+ str = @tokens.to_src("tokens", names) + "\n"
172
+ names.update(name_hash(@productions) {|p| "p"})
173
+ str << @productions.to_src("productions", names) + "\n"
174
+ str << @priorities.to_src("priorities", names) + "\n"
175
+ #str << "r = :REDUCE\n"
176
+ #str << "s = :SHIFT\n"
177
+ str << @action_table.to_compact_src("action_table") + "\n"
178
+ str << @goto_hash.to_compact_src("goto_hash") + "\n"
179
+ str << assign_to(name,
180
+ new_of_my_type(as_code("productions"),
181
+ as_code("tokens"),
182
+ as_code("priorities"),
183
+ as_code("action_table"),
184
+ as_code("goto_hash"),
185
+ @action_bits,
186
+ @action_map))
187
+ str
188
+ end
189
+
190
+ def inspect
191
+ str = "ParseTable\n"
192
+ str += "Tokens: #{@tokens.inspect}\n"
193
+ str += "NonTerminals: #{@nonterminals.inspect}\n"
194
+ str += "Productions:\n#{productions_inspect}\n"
195
+ str += "Actions: \n"
196
+ max_state = @action_table.length-1
197
+ (max_state+1).times do |state|
198
+ str += "#{state}:\t"
199
+ @tokens.each do |t|
200
+ str += inspect_actions(actions(state, t)) + ","
201
+ end
202
+ str += "| "
203
+ @nonterminals.each do |nt|
204
+ i = @productions.index(@productions.detect {|p| p.nonterminal == nt})
205
+ str += ((ns=goto(state, i)) ? "#{ns}" : " ") + ","
206
+ end
207
+ str += "\n"
208
+ end
209
+ str
210
+ end
211
+
212
+ protected
213
+
214
+ def productions_inspect
215
+ str = ""
216
+ @productions.each_with_index do |production, i|
217
+ str += " #{i}: #{production.inspect}\n"
218
+ end
219
+ str
220
+ end
221
+
222
+ def inspect_actions(actions)
223
+ unless actions
224
+ " "
225
+ else
226
+ return " " if actions.length == 0
227
+ if actions.length > 1
228
+ "[" + actions.map {|a| inspect_actions([a])}.join(',') + "]"
229
+ else
230
+ case actions[0][0]
231
+ when :ACCEPT
232
+ " a "
233
+ when :SHIFT
234
+ "s#{actions[0][1]} "
235
+ when :REDUCE
236
+ "r#{actions[0][1]} "
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ attr_reader :productions, :action_table, :goto_hash
243
+
244
+ def nonterminals(anArrayOfProductions)
245
+ anArrayOfProductions.map {|p| p.nonterminal}.equality_uniq
246
+ end
247
+
248
+ def init_nonterminal_index(nonterminals, productions)
249
+ @nonterminal_index = Hash.new
250
+ productions.each_with_index do |prod, i|
251
+ @nonterminal_index[i] = nonterminals.index(prod.nonterminal)
252
+ end
253
+ @nonterminal_index
254
+ end
255
+
256
+ def init_token_index(tokens)
257
+ @token_index = Hash.new
258
+ tokens.each_with_index {|t,i| @token_index[t] = i}
259
+ @token_index
260
+ end
261
+
262
+ def init_productionnum_to_nonterminal_number_hash
263
+ @productionnum_to_nonterminal_num = Hash.new
264
+ @productions.each_with_index do |p, n|
265
+ @productionnum_to_nonterminal_num[n] = @nonterminals.index(p.nonterminal)
266
+ end
267
+ end
268
+
269
+ def init_tokentype_to_token_number_hash
270
+ @token_to_number = Hash.new
271
+ @tokens.each_with_index {|t,i| @token_to_number[t] = i}
272
+ end
273
+ end