rpdf2txt 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. data/History.txt +5 -0
  2. data/LICENCE +515 -0
  3. data/Manifest.txt +126 -0
  4. data/README.txt +30 -0
  5. data/Rakefile +24 -0
  6. data/bin/rpdf2txt +58 -0
  7. data/config.save +12 -0
  8. data/install.rb +1098 -0
  9. data/lib/rpdf2txt-rockit/base_extensions.rb +73 -0
  10. data/lib/rpdf2txt-rockit/bootstrap.rb +120 -0
  11. data/lib/rpdf2txt-rockit/bounded_lru_cache.rb +43 -0
  12. data/lib/rpdf2txt-rockit/conflict_resolution.rb +302 -0
  13. data/lib/rpdf2txt-rockit/directed_graph.rb +401 -0
  14. data/lib/rpdf2txt-rockit/glr_parser.rb +393 -0
  15. data/lib/rpdf2txt-rockit/grammar.rb +644 -0
  16. data/lib/rpdf2txt-rockit/graphdrawing.rb +107 -0
  17. data/lib/rpdf2txt-rockit/graphviz_dot.rb +63 -0
  18. data/lib/rpdf2txt-rockit/indexable.rb +53 -0
  19. data/lib/rpdf2txt-rockit/lalr_parsetable_generator.rb +144 -0
  20. data/lib/rpdf2txt-rockit/parse_table.rb +273 -0
  21. data/lib/rpdf2txt-rockit/parsetable_generation.rb +164 -0
  22. data/lib/rpdf2txt-rockit/parsing_ambiguities.rb +84 -0
  23. data/lib/rpdf2txt-rockit/profiler.rb +168 -0
  24. data/lib/rpdf2txt-rockit/reduce_actions_generator.rb +523 -0
  25. data/lib/rpdf2txt-rockit/rockit.rb +76 -0
  26. data/lib/rpdf2txt-rockit/rockit_grammar_ast_eval.rb +187 -0
  27. data/lib/rpdf2txt-rockit/rockit_grammars_parser.rb +126 -0
  28. data/lib/rpdf2txt-rockit/sourcecode_dumpable.rb +181 -0
  29. data/lib/rpdf2txt-rockit/stringscanner.rb +54 -0
  30. data/lib/rpdf2txt-rockit/syntax_tree.rb +452 -0
  31. data/lib/rpdf2txt-rockit/token.rb +364 -0
  32. data/lib/rpdf2txt-rockit/version.rb +3 -0
  33. data/lib/rpdf2txt/attributesparser.rb +42 -0
  34. data/lib/rpdf2txt/cmapparser.rb +65 -0
  35. data/lib/rpdf2txt/data/_cmap.grammar +11 -0
  36. data/lib/rpdf2txt/data/_cmap_range.grammar +15 -0
  37. data/lib/rpdf2txt/data/_pdfattributes.grammar +32 -0
  38. data/lib/rpdf2txt/data/cmap.grammar +11 -0
  39. data/lib/rpdf2txt/data/cmap.rb +37 -0
  40. data/lib/rpdf2txt/data/cmap_range.grammar +15 -0
  41. data/lib/rpdf2txt/data/cmap_range.rb +43 -0
  42. data/lib/rpdf2txt/data/fonts/Courier-Bold.afm +342 -0
  43. data/lib/rpdf2txt/data/fonts/Courier-BoldOblique.afm +342 -0
  44. data/lib/rpdf2txt/data/fonts/Courier-Oblique.afm +342 -0
  45. data/lib/rpdf2txt/data/fonts/Courier.afm +342 -0
  46. data/lib/rpdf2txt/data/fonts/Helvetica-Bold.afm +2827 -0
  47. data/lib/rpdf2txt/data/fonts/Helvetica-BoldOblique.afm +2827 -0
  48. data/lib/rpdf2txt/data/fonts/Helvetica-Oblique.afm +3051 -0
  49. data/lib/rpdf2txt/data/fonts/Helvetica.afm +3051 -0
  50. data/lib/rpdf2txt/data/fonts/License-Adobe.txt +65 -0
  51. data/lib/rpdf2txt/data/fonts/Symbol.afm +213 -0
  52. data/lib/rpdf2txt/data/fonts/Times-Bold.afm +2588 -0
  53. data/lib/rpdf2txt/data/fonts/Times-BoldItalic.afm +2384 -0
  54. data/lib/rpdf2txt/data/fonts/Times-Italic.afm +2667 -0
  55. data/lib/rpdf2txt/data/fonts/Times-Roman.afm +2419 -0
  56. data/lib/rpdf2txt/data/fonts/ZapfDingbats.afm +225 -0
  57. data/lib/rpdf2txt/data/pdfattributes.grammar +32 -0
  58. data/lib/rpdf2txt/data/pdfattributes.rb +71 -0
  59. data/lib/rpdf2txt/data/pdftext.grammar +102 -0
  60. data/lib/rpdf2txt/data/pdftext.rb +146 -0
  61. data/lib/rpdf2txt/default_handler.rb +352 -0
  62. data/lib/rpdf2txt/lzw.rb +69 -0
  63. data/lib/rpdf2txt/object.rb +1114 -0
  64. data/lib/rpdf2txt/parser.rb +169 -0
  65. data/lib/rpdf2txt/symbol.rb +408 -0
  66. data/lib/rpdf2txt/text.rb +182 -0
  67. data/lib/rpdf2txt/text_state.rb +434 -0
  68. data/lib/rpdf2txt/textparser.rb +42 -0
  69. data/test/data/3392_obj +0 -0
  70. data/test/data/397_decrypted +15 -0
  71. data/test/data/450_decrypted +153 -0
  72. data/test/data/450_obj +0 -0
  73. data/test/data/452_decrypted +125 -0
  74. data/test/data/454_decrypted +108 -0
  75. data/test/data/456_decrypted +106 -0
  76. data/test/data/458_decrypted +111 -0
  77. data/test/data/458_obj +0 -0
  78. data/test/data/460_decrypted +118 -0
  79. data/test/data/460_obj +0 -0
  80. data/test/data/463_decrypted +117 -0
  81. data/test/data/465_decrypted +107 -0
  82. data/test/data/465_obj +0 -0
  83. data/test/data/90_obj +0 -0
  84. data/test/data/90_obj_comp +1 -0
  85. data/test/data/decrypted +0 -0
  86. data/test/data/encrypt_obj +0 -0
  87. data/test/data/encrypt_string +0 -0
  88. data/test/data/encrypt_string_128bit +0 -0
  89. data/test/data/encrypted_object_stream.pdf +0 -0
  90. data/test/data/firststream +1 -0
  91. data/test/data/index.pdfobj +0 -0
  92. data/test/data/index_2bit.pdfobj +0 -0
  93. data/test/data/index_masked.pdfobj +0 -0
  94. data/test/data/indexed.pdfobj +0 -0
  95. data/test/data/indexed_2bit.pdfobj +0 -0
  96. data/test/data/indexed_masked.pdfobj +0 -0
  97. data/test/data/inline.png +0 -0
  98. data/test/data/logo.png +0 -0
  99. data/test/data/lzw.pdfobj +0 -0
  100. data/test/data/lzw_index.pdfobj +0 -0
  101. data/test/data/page_tree.pdf +148 -0
  102. data/test/data/pdf_20.png +0 -0
  103. data/test/data/pdf_21.png +0 -0
  104. data/test/data/pdf_22.png +0 -0
  105. data/test/data/pdf_50.png +0 -0
  106. data/test/data/png.pdfobj +0 -0
  107. data/test/data/space_bug_stream.txt +119 -0
  108. data/test/data/stream.txt +292 -0
  109. data/test/data/stream_kerning_bug.txt +13 -0
  110. data/test/data/stream_kerning_bug2.txt +6 -0
  111. data/test/data/test.pdf +0 -0
  112. data/test/data/test.txt +8 -0
  113. data/test/data/test_text.txt +42 -0
  114. data/test/data/working_obj +0 -0
  115. data/test/data/working_obj2 +0 -0
  116. data/test/mock.rb +149 -0
  117. data/test/suite.rb +30 -0
  118. data/test/test_pdf_object.rb +1802 -0
  119. data/test/test_pdf_parser.rb +1340 -0
  120. data/test/test_pdf_text.rb +789 -0
  121. data/test/test_space_bug_05_2004.rb +87 -0
  122. data/test/test_stream.rb +194 -0
  123. data/test/test_text_state.rb +315 -0
  124. data/usage-en.txt +112 -0
  125. data/user-stories/UserStories_Rpdf2Txt.txt +34 -0
  126. data/user-stories/documents/swissmedicjournal/04_2004.pdf +0 -0
  127. metadata +220 -0
@@ -0,0 +1,107 @@
1
+
2
+ def stack_to_node(stack, includeShape = false)
3
+ str = "#{stack.state} (#{stack.lexer.position.char_position})"
4
+ if includeShape
5
+ str = str.inspect + " [shape=box]"
6
+ end
7
+ str
8
+ end
9
+
10
+ def parsestack_as_dot_digraph(stack, nodes = Hash.new, links = Hash.new)
11
+ nodes[stack] = stack_to_node(stack, true)
12
+ stack.links.each do |link|
13
+ nodes, links = parsestack_as_dot_digraph(link.stack, nodes, links)
14
+ links[link] = "#{stack_to_node(stack).inspect} -> #{stack_to_node(link.stack).inspect} [label=#{link.tree.inspect_compact.inspect}]"
15
+ end
16
+ return nodes, links
17
+ end
18
+
19
+ def parsestacks_as_dot_digraph(stacks)
20
+ nodes, links = {}, {}
21
+ stacks.each do |stack|
22
+ nodes, links = parsestack_as_dot_digraph(stack, nodes, links)
23
+ end
24
+ "digraph G {\nsize=\"8,11\"\n" +
25
+ nodes.values.uniq.join("\n") + "\n" +
26
+ links.values.uniq.join("\n") + "\n" +
27
+ "}"
28
+ end
29
+
30
+ class DotGraphPrinter
31
+ def initialize(size = "11,9", orientation = "landscape")
32
+ @size, @orientation = size, orientation
33
+ end
34
+
35
+ def to_graph(ast)
36
+ @nodes, @links = Hash.new, Hash.new
37
+ eval_to_dot(ast, nil)
38
+ to_graph_from_nodes_and_links(@nodes, @links)
39
+ end
40
+
41
+ protected
42
+
43
+ def to_graph_from_nodes_and_links(nodes, links)
44
+ "digraph G {\n" +
45
+ "size = #{@size.inspect}\n" +
46
+ "orientation = #{@orientation}\n" +
47
+ @nodes.values.uniq.join("\n") + "\n" +
48
+ @links.values.uniq.join("\n") + "\n" +
49
+ "}"
50
+ end
51
+ end
52
+
53
+ class SyntaxTreeAsDotGraph < DotGraphPrinter
54
+ def add_parent_link(parent, child, label = nil, weight = nil)
55
+ if parent
56
+ @links[[parent, child]] =
57
+ "#{parent.id} -> #{child.id}"
58
+ if label or weight
59
+ @links[[parent, child]] += " [" +
60
+ (label ? "label=#{label.inspect}" : "") +
61
+ ((label and weight) ? "," : "") +
62
+ (weight ? "weight=#{weight.inspect}" : "") +
63
+ "]"
64
+ end
65
+ end
66
+ end
67
+
68
+ def eval_to_dot(ast, parent = nil, linkname = nil, weight = nil)
69
+ if ast
70
+ if ast.kind_of?(SyntaxTree)
71
+ case ast.name
72
+ when "_ArrayNode"
73
+ add_parent_link(parent, ast, linkname, weight)
74
+ @nodes[ast] = "#{ast.id} [label=" + '"[]"]'
75
+ ast.each_with_index {|c,i| eval_to_dot(c, ast, i.inspect)}
76
+ else
77
+ if parent
78
+ end
79
+ # Special handling of Token nodes since we only want to print
80
+ # the lexeme
81
+ if ast.children_names.sort == ["lexeme", "value"].sort
82
+ @nodes[ast] = "#{ast.id} [shape=box,label=#{ast.lexeme.inspect}]"
83
+ add_parent_link(parent, ast, linkname, weight)
84
+ else
85
+ add_parent_link(parent, ast, linkname, weight)
86
+ @nodes[ast] = "#{ast.id} [label=#{ast.name.inspect}]"
87
+ ast.childrens.each_with_index {|c,i|
88
+ eval_to_dot(c, ast, ast.children_names[i])
89
+ }
90
+ end
91
+ end
92
+ elsif ast.class == Array
93
+ # Or nodes return array but they should return ArrayNodes...
94
+ add_parent_link(parent, ast, linkname, weight)
95
+ @nodes[ast] = "#{ast.id} [label=\"[]\"]"
96
+ ast.each_with_index {|c,i| eval_to_dot(c, ast, i)}
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ def syntaxtree_as_dot_digraph(syntaxtree)
103
+ SyntaxTreeAsDotGraph.new.to_graph(syntaxtree)
104
+ end
105
+
106
+
107
+
@@ -0,0 +1,63 @@
1
+ class DotGraphFormatter
2
+ @@default_node_shaper = proc{|n| "box"}
3
+ @@default_node_labeler = proc{|n| n.inspect}
4
+ @@default_link_labeler = proc{|info| info ? info.inspect : nil}
5
+
6
+ def initialize(nodeShaper = nil, nodeLabeler = nil, linkLabeler = nil,
7
+ size = "11,9", orientation = "landscape")
8
+ @node_shaper = nodeShaper || @@default_node_shaper
9
+ @node_labeler = nodeLabeler || @@default_node_labeler
10
+ @link_labeler = linkLabeler || @@default_link_labeler
11
+ @size, @orientation = size, orientation
12
+ end
13
+
14
+ # nodes is array of node objects
15
+ # links is either array of
16
+ # arrays [fromNode, toNode [, infoOnLink]], or
17
+ # objects with attributes :from, :to, :info
18
+ def format(nodes, links)
19
+ DotGraph.new("digraph G {\n" +
20
+ "size = #{@size.inspect}\n" +
21
+ "orientation = #{@orientation}\n" +
22
+ nodes.uniq.map {|n| format_node(n)}.join("\n") + "\n" +
23
+ links.uniq.map {|l| format_link(l)}.join("\n") + "\n" +
24
+ "}"
25
+ )
26
+ end
27
+
28
+ protected
29
+
30
+ def format_node(node)
31
+ node.id.inspect + " [" +
32
+ "shape=" + @node_shaper.call(node).inspect + ", " +
33
+ "label=" + @node_labeler.call(node).inspect + "]"
34
+ end
35
+
36
+ def get_link_data(link)
37
+ begin
38
+ return link.from, link.to, link.info
39
+ rescue Exception
40
+ return link[0], link[1], link[2]
41
+ end
42
+ end
43
+
44
+ def format_link(link)
45
+ from, to, info = get_link_data(link)
46
+ label = @link_labeler.call(info)
47
+ from.id.inspect + " -> " + to.id.inspect +
48
+ (label ? " [label=" + label.inspect + "]" : "")
49
+ end
50
+ end
51
+
52
+ DotGraph = Struct.new("DotGraph", :description)
53
+ class DotGraph
54
+ def write_to_file(filename)
55
+ tmpfile = filename + rand(100000).inspect
56
+ while test(?f, tmpfile)
57
+ tmpfile = filename + rand(100000).inspect
58
+ end
59
+ File.open(tmpfile, "w") {|f| f.write description}
60
+ system "dot -Tps -o #{filename} #{tmpfile}"
61
+ File.delete(tmpfile)
62
+ end
63
+ end
@@ -0,0 +1,53 @@
1
+ module Indexable
2
+ attr_accessor :index_number # attr_reader_once_write instead?
3
+ attr_accessor :factory
4
+ end
5
+
6
+ class IndexableFactory
7
+ attr_reader :instances, :start_index, :next_index
8
+
9
+ def initialize(klass, startIndex = 0)
10
+ unless klass.ancestors.include?(Indexable)
11
+ raise ArgumentError, "#{klass.inspect} is not Indexable"
12
+ end
13
+ @klass, @start_index, @next_index = klass, startIndex, startIndex
14
+ @instance_map, @instances = Hash.new, Array.new
15
+ end
16
+
17
+ def make(*args)
18
+ obj = @instance_map[args]
19
+ unless obj
20
+ @instance_map[args] = obj = make_new_obj(args)
21
+ @instances.push obj
22
+ end
23
+ obj
24
+ end
25
+
26
+ def make_unless_exists(*args)
27
+ new_instance = @instance_map[args] == nil
28
+ return make(*args), new_instance
29
+ end
30
+
31
+ def get_instance(*args)
32
+ @instance_map[args]
33
+ end
34
+
35
+ def instance_with_args(*args)
36
+ @instance_map[args] || (@instance_map[args] = make(*args))
37
+ end
38
+
39
+ protected
40
+
41
+ def make_new_obj(arguments)
42
+ obj = @klass.new(*arguments)
43
+ obj.index_number = advance_index_number
44
+ obj.factory = self
45
+ obj
46
+ end
47
+
48
+ def advance_index_number
49
+ i = @next_index
50
+ @next_index += 1
51
+ i
52
+ end
53
+ end
@@ -0,0 +1,144 @@
1
+ require 'rpdf2txt-rockit/grammar'
2
+ require 'rpdf2txt-rockit/parse_table'
3
+ require 'rpdf2txt-rockit/parsetable_generation'
4
+ require 'rpdf2txt-rockit/reduce_actions_generator'
5
+
6
+ require 'rpdf2txt-rockit/profiler'
7
+
8
+ module Parse
9
+ class StateGraph < BackLinkedDirectedGraph
10
+ attr_reader :start_state
11
+ attr_reader :consistent_reduce_states, :inconsistent_reduce_states
12
+
13
+ def initialize(startState, *args)
14
+ super(*args)
15
+ @start_state = startState
16
+ @consistent_reduce_states = Array.new
17
+ @inconsistent_reduce_states = Array.new
18
+ end
19
+
20
+ def add_node(state)
21
+ super(state)
22
+ if state.reduce_state?
23
+ if state.consistent?
24
+ a = @consistent_reduce_states
25
+ else
26
+ a = @inconsistent_reduce_states
27
+ end
28
+ a.push state unless a.include?(state)
29
+ end
30
+ end
31
+
32
+ @@node_labeler_with_kernels = proc{|state|
33
+ "S" + state.index_number.inspect + ": " + state.kernel_items.inspect
34
+ }
35
+
36
+ @@node_labeler = proc{|state|
37
+ "S" + state.index_number.inspect
38
+ }
39
+
40
+ def to_postscript_file(filename, withKernelItems = true)
41
+ super(filename, nil,
42
+ withKernelItems ? @@node_labeler_with_kernels : @@node_labeler )
43
+ end
44
+ end
45
+
46
+ class LaLr1ParseTableGenerator
47
+ def initialize(grammar,
48
+ lookaheadCalculatorKlass =
49
+ ReduceActionsGenerator)
50
+ @grammar, @lookahead_calculator_klass = grammar, lookaheadCalculatorKlass
51
+ end
52
+
53
+ def generate_parse_table(parseTableKlass = ParseTable)
54
+ @grammar.augment
55
+ #puts "\n NoN = #{@grammar.productions.map{|p| p.nonterminal}.uniq.length}"
56
+ #puts " NoP = #{@grammar.productions.length}"
57
+ time_and_puts("\n Normalizing grammar") {
58
+ @grammar.normalize
59
+ }
60
+ #puts " NoPN = #{@grammar.productions.length}"
61
+ state_graph = nil
62
+ time_and_puts(" Calculating states") {
63
+ @item_factory = IndexableFactory.new(Item, 0)
64
+ @state_factory = IndexableFactory.new(LrState, 0)
65
+ precalc_nonkernel_items_for_nonterminals
66
+ @parse_table = parseTableKlass.new_from_grammar(@grammar)
67
+ state_graph = calculate_state_graph # also adds gotos and shift actions
68
+ }
69
+ time_and_puts(" Calculating lalr1_lookaheads") {
70
+ lookahead_alg =
71
+ @lookahead_calculator_klass.new(state_graph, @grammar, @parse_table,
72
+ @item_factory.instances)
73
+ lookahead_alg.add_reduce_actions
74
+ }
75
+ @parse_table
76
+ end
77
+
78
+ def test_nonterminal_uniqueness
79
+ nts = DefaultInitHash.new {|k| Array.new}
80
+ @grammar.productions.each do |production|
81
+ nts[production.nonterminal.name].push(production.nonterminal.id)
82
+ nts[production.nonterminal.name].uniq!
83
+ production.elements.each do |e|
84
+ if e.nonterminal?
85
+ nts[e.name].push(e.id)
86
+ nts[e.name].uniq!
87
+ end
88
+ end
89
+ end
90
+ puts nts.inspect
91
+ end
92
+
93
+ protected
94
+
95
+ def precalc_nonkernel_items_for_nonterminals
96
+ @grammar.nonterminals.each do |nt|
97
+ nt.calc_nonkernel_items(@grammar, @item_factory)
98
+ end
99
+ end
100
+
101
+ def add_state_unless_exists(kernel_items)
102
+ @state_factory.make_unless_exists(kernel_items)
103
+ end
104
+
105
+ # Calculate the state graph by constructing the sets-of-lr0-items
106
+ # collection.
107
+ # See page 224 (basic algorithm) and 240 (representing the states by
108
+ # their kernel items) in the Dragon book
109
+ def calculate_state_graph
110
+ Profiler.__enter__(:calculate_state_graph) if $PROFILE
111
+ start_item = @item_factory.make(@grammar.productions[0], 0)
112
+ state_graph = StateGraph.new(add_state_unless_exists([start_item]).first)
113
+ states, current = [state_graph.start_state], 0
114
+ dest_sets, next_item = DefaultInitHash.new {|k| Array.new}, nil
115
+ while current < states.length
116
+ state = states[current]
117
+ dest_sets.clear
118
+ state.closure.each do |item|
119
+ symbol = item.symbol
120
+ if symbol
121
+ next_item = item.next_item
122
+ dest_sets[symbol].push(next_item) if next_item
123
+ end
124
+ end
125
+ dest_sets.each do |symbol, kernel_item_set|
126
+ kernel_item_set.uniq! # Needed?
127
+ dest_state, new_state = add_state_unless_exists(kernel_item_set)
128
+ states.push(dest_state) if new_state
129
+ state_graph.add_link(state, dest_state, symbol)
130
+ if symbol.nonterminal?
131
+ @parse_table.add_goto(state.index_number, symbol,
132
+ dest_state.index_number)
133
+ else
134
+ @parse_table.add_action(state.index_number, symbol,
135
+ [:SHIFT, dest_state.index_number])
136
+ end
137
+ end
138
+ current += 1
139
+ end
140
+ Profiler.__leave__(:calculate_state_graph, state_graph) if $PROFILE
141
+ state_graph
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,273 @@
1
+ require 'rpdf2txt-rockit/sourcecode_dumpable'
2
+ require 'rpdf2txt-rockit/grammar'
3
+ require 'rpdf2txt-rockit/base_extensions'
4
+
5
+ require 'rpdf2txt-rockit/profiler'
6
+
7
+ class ParseTable
8
+ include SourceCodeDumpable
9
+ attr_reader :start_state, :tokens, :priorities
10
+ attr_accessor :language
11
+
12
+ def ParseTable.new_from_grammar(aGrammar)
13
+ pt = self.new(aGrammar.productions, aGrammar.tokens, aGrammar.priorities)
14
+ pt.language = aGrammar.name || "UNNAMED_LANGUAGE"
15
+ pt
16
+ end
17
+
18
+ # We save the actions in a compact numerical way to save space and time:
19
+ # The action table is an array of arrays. Each state number is an index
20
+ # into the array and its array contains an even number of integers.
21
+ # Each pair of integers represent one unique action. The first of the
22
+ # integers is the action and the second is the number representing the
23
+ # terminals for which it apply. The least significant 'action_bits' bits
24
+ # of the action number determines the type of action by giving an index
25
+ # into the 'action_map'. Its default value is:
26
+ # [:REDUCE, :SHIFT, :ACCEPT]
27
+ # so that
28
+ @@default_action_map = [:REDUCE, :SHIFT, :ACCEPT]
29
+
30
+ def initialize(productions, tokens, priorities = nil,
31
+ actionTable = ArrayOfArrays.new, gotoHash = Hash.new,
32
+ actionBits = 2, actionMap = @@default_action_map)
33
+ @productions, @start_state, @language = productions, 0, "UNNAMED_LANGUAGE"
34
+ @priorities = priorities
35
+ @tokens, @nonterminals = tokens, nonterminals(productions)
36
+ @action_table, @goto_hash = actionTable, gotoHash
37
+ @action_cache = ArrayOfHashes.new
38
+ @mask = Array.new
39
+ @action_map, @action_bits, @action_mask = actionMap, actionBits, 0
40
+ while actionBits > 0
41
+ @action_mask = (@action_mask << 1) | 1
42
+ actionBits -= 1
43
+ end
44
+ init_productionnum_to_nonterminal_number_hash
45
+ init_tokentype_to_token_number_hash
46
+ end
47
+
48
+ def num_states
49
+ @action_table.length
50
+ end
51
+
52
+ def ==(other)
53
+ other.class == self.class and
54
+ other.productions == @productions and
55
+ other.tokens == @tokens and
56
+ other.action_table == @action_table and
57
+ other.goto_hash == @goto_hash and
58
+ other.start_state == @start_state
59
+ end
60
+
61
+ def add_action(state, aTokenType, action)
62
+ Profiler.__enter__(:ParseTable_add_action, state, aTokenType, action) if $PROFILE
63
+ @action_cache[state].clear
64
+ actionnum = action_to_actionnum(action)
65
+ @action_table[state] << actionnum << token_to_terminalset(aTokenType)
66
+ Profiler.__leave__(:ParseTable_add_action) if $PROFILE
67
+ end
68
+
69
+ def add_action_for_terminalset(state, action, terminalSet)
70
+ Profiler.__enter__(:ParseTable_add_action_for_terminalset, state, action, terminalSet) if $PROFILE
71
+ @action_table[state] << action_to_actionnum(action) << terminalSet.to_i
72
+ Profiler.__leave__(:ParseTable_add_action_for_terminalset) if $PROFILE
73
+ end
74
+
75
+ # Unify terminal sets for identical actions
76
+ def compact!
77
+ Profiler.__enter__(:ParseTable_compact!) if $PROFILE
78
+ actions, i, new_index = Hash.new, 0, 0
79
+ @action_table.map! do |actionnums|
80
+ actions.clear;
81
+ i, new_actionnums, new_index = 0, Array.new, 0
82
+ while i < actionnums.length
83
+ if (index = actions[actionnums[i]])
84
+ new_actionnums[index+1] |= actionnums[i+1]
85
+ else
86
+ actions[actionnums[i]] = new_index
87
+ new_index += 2
88
+ new_actionnums << actionnums[i] << actionnums[i+1]
89
+ end
90
+ i += 2
91
+ end
92
+ new_actionnums
93
+ end
94
+ Profiler.__leave__(:ParseTable_compact!) if $PROFILE
95
+ end
96
+
97
+ def token_to_terminalset(aTokenType)
98
+ mask(@token_to_number[aTokenType])
99
+ end
100
+
101
+ def mask(index)
102
+ @mask[index] || (@mask[index] = (1 << index))
103
+ end
104
+
105
+ def add_goto(state, aNonTerminal, newState)
106
+ begin
107
+ @goto_hash[state][@nonterminals.index(aNonTerminal)] = newState
108
+ rescue NameError
109
+ @goto_hash[state] = Hash.new
110
+ retry
111
+ end
112
+ end
113
+
114
+ def actions(state, tokenType)
115
+ actions = @action_cache[state][tokenType]
116
+ unless actions
117
+ actions = Array.new
118
+ actionnums = @action_table[state]
119
+ token_mask = mask(@token_to_number[tokenType])
120
+ i = 0
121
+ while i < actionnums.length
122
+ if(actionnums[i+1] & token_mask > 0)
123
+ actions.push actionnum_to_action(actionnums[i])
124
+ end
125
+ i += 2
126
+ end
127
+ @action_cache[state][tokenType] = actions
128
+ end
129
+ actions
130
+ end
131
+
132
+ def valid_tokens(state)
133
+ terminal_set = 0
134
+ each_terminalset(state) {|ts| terminal_set |= ts}
135
+ terminalset_to_terminals(terminal_set)
136
+ end
137
+
138
+ def each_terminalset(state)
139
+ @action_table[state].each_with_index {|e,i| yield(e) if i % 2 == 1}
140
+ end
141
+
142
+ def terminalset_to_terminals(terminalSet)
143
+ @tokens.select {|t| terminalSet & mask(@token_to_number[t]) > 0}
144
+ end
145
+
146
+ def actionnum_to_action(actionNumber)
147
+ [@action_map[actionNumber & @action_mask], actionNumber >> @action_bits]
148
+ end
149
+
150
+ def action_to_actionnum(action)
151
+ Profiler.__enter__(:ParseTable_action_to_actionnum, action) if $PROFILE
152
+ res = @action_map.index(action[0]) + (action[1] << @action_bits)
153
+ Profiler.__leave__(:ParseTable_action_to_actionnum) if $PROFILE
154
+ res
155
+ end
156
+
157
+ def goto(state, productionNumber)
158
+ begin
159
+ @goto_hash[state][@productionnum_to_nonterminal_num[productionNumber]]
160
+ rescue Exception
161
+ nil
162
+ end
163
+ end
164
+
165
+ def production(number)
166
+ @productions[number]
167
+ end
168
+
169
+ def to_src(name = "parse_table", nameHash = {})
170
+ names = name_hash(@tokens) {|t| "t"}
171
+ str = @tokens.to_src("tokens", names) + "\n"
172
+ names.update(name_hash(@productions) {|p| "p"})
173
+ str << @productions.to_src("productions", names) + "\n"
174
+ str << @priorities.to_src("priorities", names) + "\n"
175
+ #str << "r = :REDUCE\n"
176
+ #str << "s = :SHIFT\n"
177
+ str << @action_table.to_compact_src("action_table") + "\n"
178
+ str << @goto_hash.to_compact_src("goto_hash") + "\n"
179
+ str << assign_to(name,
180
+ new_of_my_type(as_code("productions"),
181
+ as_code("tokens"),
182
+ as_code("priorities"),
183
+ as_code("action_table"),
184
+ as_code("goto_hash"),
185
+ @action_bits,
186
+ @action_map))
187
+ str
188
+ end
189
+
190
+ def inspect
191
+ str = "ParseTable\n"
192
+ str += "Tokens: #{@tokens.inspect}\n"
193
+ str += "NonTerminals: #{@nonterminals.inspect}\n"
194
+ str += "Productions:\n#{productions_inspect}\n"
195
+ str += "Actions: \n"
196
+ max_state = @action_table.length-1
197
+ (max_state+1).times do |state|
198
+ str += "#{state}:\t"
199
+ @tokens.each do |t|
200
+ str += inspect_actions(actions(state, t)) + ","
201
+ end
202
+ str += "| "
203
+ @nonterminals.each do |nt|
204
+ i = @productions.index(@productions.detect {|p| p.nonterminal == nt})
205
+ str += ((ns=goto(state, i)) ? "#{ns}" : " ") + ","
206
+ end
207
+ str += "\n"
208
+ end
209
+ str
210
+ end
211
+
212
+ protected
213
+
214
+ def productions_inspect
215
+ str = ""
216
+ @productions.each_with_index do |production, i|
217
+ str += " #{i}: #{production.inspect}\n"
218
+ end
219
+ str
220
+ end
221
+
222
+ def inspect_actions(actions)
223
+ unless actions
224
+ " "
225
+ else
226
+ return " " if actions.length == 0
227
+ if actions.length > 1
228
+ "[" + actions.map {|a| inspect_actions([a])}.join(',') + "]"
229
+ else
230
+ case actions[0][0]
231
+ when :ACCEPT
232
+ " a "
233
+ when :SHIFT
234
+ "s#{actions[0][1]} "
235
+ when :REDUCE
236
+ "r#{actions[0][1]} "
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ attr_reader :productions, :action_table, :goto_hash
243
+
244
+ def nonterminals(anArrayOfProductions)
245
+ anArrayOfProductions.map {|p| p.nonterminal}.equality_uniq
246
+ end
247
+
248
+ def init_nonterminal_index(nonterminals, productions)
249
+ @nonterminal_index = Hash.new
250
+ productions.each_with_index do |prod, i|
251
+ @nonterminal_index[i] = nonterminals.index(prod.nonterminal)
252
+ end
253
+ @nonterminal_index
254
+ end
255
+
256
+ def init_token_index(tokens)
257
+ @token_index = Hash.new
258
+ tokens.each_with_index {|t,i| @token_index[t] = i}
259
+ @token_index
260
+ end
261
+
262
+ def init_productionnum_to_nonterminal_number_hash
263
+ @productionnum_to_nonterminal_num = Hash.new
264
+ @productions.each_with_index do |p, n|
265
+ @productionnum_to_nonterminal_num[n] = @nonterminals.index(p.nonterminal)
266
+ end
267
+ end
268
+
269
+ def init_tokentype_to_token_number_hash
270
+ @token_to_number = Hash.new
271
+ @tokens.each_with_index {|t,i| @token_to_number[t] = i}
272
+ end
273
+ end