herbgobbler 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/bin/gobble +43 -0
  2. data/grammer/erb_grammer.treetop +256 -0
  3. data/lib/core/base_node.rb +29 -0
  4. data/lib/core/base_text_extractor.rb +19 -0
  5. data/lib/core/base_translation_store.rb +49 -0
  6. data/lib/core/core.rb +17 -0
  7. data/lib/core/double_quoted_text_node.rb +89 -0
  8. data/lib/core/erb_file.rb +159 -0
  9. data/lib/core/erb_string_non_text_content.rb +8 -0
  10. data/lib/core/i18n_key.rb +74 -0
  11. data/lib/core/ignorable_tag_node.rb +10 -0
  12. data/lib/core/method_call_node.rb +44 -0
  13. data/lib/core/node_processing.rb +26 -0
  14. data/lib/core/non_extracting_non_text_node.rb +8 -0
  15. data/lib/core/non_text_node.rb +32 -0
  16. data/lib/core/rails_text_extractor.rb +88 -0
  17. data/lib/core/rails_translation_store.rb +42 -0
  18. data/lib/core/text_extractor.rb +31 -0
  19. data/lib/core/text_node.rb +73 -0
  20. data/lib/herbgobbler.rb +7 -0
  21. data/lib/nodes/combindable_herb_non_text_node.rb +5 -0
  22. data/lib/nodes/herb_combined_node.rb +47 -0
  23. data/lib/nodes/herb_erb_text_call_node.rb +52 -0
  24. data/lib/nodes/herb_node_retaining_node.rb +37 -0
  25. data/lib/nodes/herb_node_retaining_non_text_node.rb +30 -0
  26. data/lib/nodes/herb_node_retaining_text_node.rb +107 -0
  27. data/lib/nodes/herb_non_text_node.rb +21 -0
  28. data/lib/nodes/herb_string_variable.rb +9 -0
  29. data/lib/nodes/herb_text_node.rb +23 -0
  30. data/lib/nodes/herb_white_space_text_node.rb +14 -0
  31. data/lib/nodes/nodes.rb +13 -0
  32. data/lib/nodes/rails_text_variable_node.rb +19 -0
  33. data/scripts/display_syntax_tree.rb +18 -0
  34. data/scripts/extract_text_from_erb.rb +12 -0
  35. data/scripts/extract_yml_from_erb.rb +12 -0
  36. data/scripts/rails_gobble_file.rb +37 -0
  37. data/scripts/save_result.rb +11 -0
  38. metadata +154 -0
data/bin/gobble ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'lib/herbgobbler'
3
+
4
+ def print_usage
5
+ puts "ruby scripts/rails_gobble_all_views.rb <rails_root>"
6
+ end
7
+
8
+ # remove the file extension and the app/views/ so that
9
+ # when the context is set that rails will be able to find
10
+ # it through the default . key syntax
11
+ def convert_path_to_key_path( path )
12
+ path.split('.').first.gsub( '/app/views/', '')
13
+ end
14
+
15
+ if( ARGV.length < 1 )
16
+ print_usage
17
+ else
18
+
19
+ rails_root = ARGV[0]
20
+ locale_file_name = "en.yml"
21
+
22
+ rails_view_directory = "#{rails_root}/app/views"
23
+ full_yml_file_path = "#{rails_root}/config/locales/#{locale_file_name}"
24
+
25
+ rails_translation_store = RailsTranslationStore.new
26
+ text_extractor = RailsTextExtractor.new( rails_translation_store )
27
+
28
+ Dir["#{rails_view_directory}/**/*html.erb" ].each do |full_erb_file_path|
29
+
30
+ erb_file = full_erb_file_path.gsub( rails_root, '' )
31
+ rails_translation_store.start_new_context( convert_path_to_key_path( erb_file.to_s ) )
32
+ erb_file = ErbFile.load( full_erb_file_path )
33
+ erb_file.extract_text( text_extractor )
34
+
35
+ File.open(full_erb_file_path, 'w') {|f| f.write(erb_file.to_s) }
36
+ puts "Wrote #{full_erb_file_path}"
37
+ end
38
+
39
+ File.open(full_yml_file_path, 'w') {|f| f.write(rails_translation_store.serialize) }
40
+ puts "Wrote #{full_yml_file_path}"
41
+
42
+ end
43
+
@@ -0,0 +1,256 @@
1
+ grammar ERBGrammer
2
+ rule erb
3
+ ( erb_string /
4
+ erb_block /
5
+ ignorable_script_tag_content /
6
+ html_end_tag /
7
+ html_start_tag /
8
+ html_self_contained /
9
+ whitespace /
10
+ text
11
+ ) (erb)*
12
+ end
13
+
14
+ rule method_names
15
+ "link_to" <NonTextNode>
16
+ {
17
+ def node_name
18
+ "method_names"
19
+ end
20
+ def can_be_combined?
21
+ true
22
+ end
23
+ }
24
+ end
25
+
26
+ rule comma
27
+ "," <NonTextNode>
28
+ {
29
+ def node_name
30
+ "method_names"
31
+ end
32
+
33
+ def can_be_combined?
34
+ true
35
+ end
36
+
37
+ }
38
+ end
39
+
40
+ rule method_call_content
41
+ method_names whitespace double_quoted_ruby_string (comma erb_string_non_text_content)? <MethodCallNode>
42
+ end
43
+
44
+ rule erb_string_non_text_content
45
+ (!erb_block_end .)* <ErbStringNonTextContent>
46
+ {
47
+ def can_be_combined?
48
+ true
49
+ end
50
+
51
+ def node_name
52
+ "erb_string_non_text_content"
53
+ end
54
+ }
55
+ end
56
+
57
+ rule erb_string
58
+ start_erb:erb_string_start start_space:(space) text:(method_call_content / double_quoted_ruby_string)? everything_else:erb_string_non_text_content end_erb:(erb_block_end) <NonTextNode>
59
+ {
60
+ def node_name
61
+ "erb_string"
62
+ end
63
+ }
64
+
65
+ end
66
+
67
+ rule string_variable
68
+ '#{' variable:((!'}' .)*) '}' <HerbStringVariable>
69
+ end
70
+
71
+ rule double_quoted_ruby_string
72
+ start_quote:'"' unquoted_text:((!'"' (string_variable / .))*) end_quote:'"' <DoubleQuotedTextNode>
73
+ {
74
+ def can_be_combined?
75
+ false
76
+ end
77
+
78
+ def html?
79
+ false
80
+ end
81
+
82
+ def text_value
83
+ self.elements.inject("") { |text, node| text += node.text_value }
84
+ end
85
+
86
+ def node_name
87
+ "double_quoted_ruby_string"
88
+ end
89
+ }
90
+
91
+ end
92
+
93
+ rule erb_block
94
+ start_block:'<%' block_contents:((!erb_block_end .)*) end_block:not_combindable_erb_block_end <NonTextNode>
95
+ {
96
+ def node_name
97
+ "erb_block"
98
+ end
99
+ }
100
+
101
+ end
102
+
103
+ rule erb_string_start
104
+ '<%=' <NonExtractingNonTextNode>
105
+ {
106
+ def node_name
107
+ "erb_string_start"
108
+ end
109
+
110
+ def can_be_combined?
111
+ true
112
+ end
113
+ }
114
+
115
+ end
116
+
117
+ rule not_combindable_erb_block_end
118
+ ('%>' / '-%>') <NonTextNode>
119
+ {
120
+ def node_name
121
+ "not_combindable_erb_block_end"
122
+ end
123
+
124
+ def can_be_combined?
125
+ false
126
+ end
127
+
128
+ }
129
+
130
+ end
131
+
132
+ rule erb_block_end
133
+ ('%>' / '-%>') <NonExtractingNonTextNode>
134
+ {
135
+ def node_name
136
+ "erb_block_end"
137
+ end
138
+
139
+ def can_be_combined?
140
+ true
141
+ end
142
+
143
+ }
144
+
145
+ end
146
+
147
+ rule ignorable_script_tag_content
148
+ '<script' (!'</script' .)+ <IgnorableTagNode>
149
+ {
150
+ def can_be_combined?
151
+ false
152
+ end
153
+
154
+ def node_name
155
+ "ignorable_tag_name"
156
+ end
157
+ }
158
+ end
159
+
160
+
161
+ rule html_self_contained
162
+ start_tag:'<' tag_name:([a-zA-Z])+ tag_contents:(!'/>' (erb_string / .))* end_tag:'/>' <NonTextNode>
163
+ {
164
+ def can_be_combined?
165
+ true
166
+ end
167
+
168
+ def node_name
169
+ "html_self_contained"
170
+ end
171
+
172
+ }
173
+ end
174
+
175
+ rule html_end_tag
176
+ '</' tag_name:([a-zA-Z])+ (!'>' .)* '>' <NonTextNode>
177
+ {
178
+ def can_be_combined?
179
+ !(['a', 'b', 'span', 'strong'].index(tag_name.text_value.downcase).nil?)
180
+ end
181
+
182
+ def node_name
183
+ "html_end_tag"
184
+ end
185
+ }
186
+
187
+ end
188
+
189
+ rule tag_text
190
+ (!'>' !'/>' (erb_string / .) )* <NonTextNode>
191
+ {
192
+ def node_name
193
+ "tag_text"
194
+ end
195
+ }
196
+ end
197
+
198
+ rule html_start_tag
199
+ start_bracket:'<' tag_name:([a-zA-Z])+ extra_text:tag_text end_bracket:'>' <NonTextNode>
200
+ {
201
+ def can_be_combined?
202
+ !(['a', 'b', 'span','strong'].index(tag_name.text_value.downcase).nil?)
203
+ end
204
+
205
+ def node_name
206
+ "html_start_tag"
207
+ end
208
+ }
209
+
210
+ end
211
+
212
+ rule text
213
+ ((!html_self_contained !html_start_tag !html_end_tag !erb_string !erb_block) . )+ <TextNode>
214
+ {
215
+ def can_be_combined?
216
+ true
217
+ end
218
+
219
+ def node_name
220
+ "text"
221
+ end
222
+ }
223
+
224
+ end
225
+
226
+ rule space
227
+ (' ')+ <NonTextNode>
228
+ {
229
+ def node_name
230
+ "space"
231
+ end
232
+
233
+ def can_be_combined?
234
+ true
235
+ end
236
+ }
237
+ end
238
+
239
+ rule whitespace
240
+ ((![a-zA-Z] !('<' [a-zA-Z]+) !('</' [a-zA-Z]+) !'<%' !'"') . )+ <NonTextNode>
241
+ {
242
+ def can_be_combined?
243
+ true
244
+ end
245
+
246
+ def node_name
247
+ "whitespace"
248
+ end
249
+ def white_space?
250
+ true
251
+ end
252
+ }
253
+ end
254
+
255
+ end
256
+
@@ -0,0 +1,29 @@
1
+ module BaseNode
2
+ @@keys = []
3
+ @@hash = nil
4
+
5
+ def shatter?
6
+ false
7
+ end
8
+
9
+ def shattered_node_list( *nodes )
10
+ nodes.reject {|terminal| terminal.nil? || terminal.text_value.empty? }
11
+ end
12
+
13
+ protected
14
+
15
+ def generate_i18n_key( text_extractor, node_tree )
16
+ I18nKey.new( self.text_value, get_key_hash(text_extractor, node_tree) )
17
+ end
18
+
19
+ def get_key_hash( text_extractor, node_tree )
20
+ # here we just try to use different keysets based on where they are in the tree since
21
+ # there can only be key collisions when generating the same node
22
+ if( @@hash != text_extractor.hash.to_s + node_tree.last.hash.to_s )
23
+ @@keys = []
24
+ @@hash = text_extractor.hash.to_s + node_tree.last.hash.to_s
25
+ end
26
+ @@keys
27
+ end
28
+
29
+ end
@@ -0,0 +1,19 @@
1
+ class BaseTextExtractor < TextExtractor
2
+
3
+ # This is called when text extraction has begun
4
+ def starting_text_extraction
5
+ end
6
+
7
+
8
+ # This takes in a text node and returns one or more nodes that will
9
+ # then be output. The nodes that are output should implement
10
+ # node_name and text_value
11
+ def html_text( text_node )
12
+ self.remove_leading_and_trailing_whitespace( text_node )
13
+ end
14
+
15
+ # This is called when text extraction has finished
16
+ def completed_text_extraction
17
+ end
18
+
19
+ end
@@ -0,0 +1,49 @@
1
+ class BaseTranslationStore
2
+
3
+ attr_reader :context
4
+
5
+ def initialize
6
+ @context_maps = {}
7
+ @context_array = []
8
+ end
9
+
10
+ def each( &block )
11
+ @context_array.each do |context|
12
+ @context_maps[context].each do |key_value_pair|
13
+ key_name, key_value = *key_value_pair
14
+ yield context, key_name, key_value
15
+ end
16
+ end
17
+ end
18
+
19
+ def start_new_context( new_context )
20
+ @context = new_context
21
+ end
22
+
23
+ def add_translation( key, value )
24
+ context_map() << [key, value]
25
+ end
26
+
27
+ def serialize
28
+ raise "Implement me"
29
+ end
30
+
31
+ private
32
+
33
+
34
+ def context_map
35
+ if( @context.nil? )
36
+ context_name = ''
37
+ else
38
+ context_name = @context
39
+ end
40
+
41
+ local_context_map = []
42
+ @context_maps[context_name] = [] if @context_maps[context_name].nil?
43
+ @context_array << context_name if @context_array.index( context_name ).nil?
44
+ @context_maps[context_name]
45
+ end
46
+
47
+
48
+
49
+ end
data/lib/core/core.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'core/base_node'
2
+ require 'core/node_processing'
3
+ require 'core/erb_file'
4
+ require 'core/text_node'
5
+ require 'core/non_text_node'
6
+ require 'core/text_extractor'
7
+ require 'core/base_text_extractor'
8
+ require 'core/rails_text_extractor'
9
+ require 'core/i18n_key'
10
+ require 'core/ignorable_tag_node'
11
+ require 'core/base_translation_store'
12
+ require 'core/rails_translation_store'
13
+ require 'core/double_quoted_text_node'
14
+ require 'core/erb_string_non_text_content'
15
+ require 'core/non_extracting_non_text_node'
16
+ require 'core/method_call_node'
17
+
@@ -0,0 +1,89 @@
1
+ class DoubleQuotedTextNode < Treetop::Runtime::SyntaxNode
2
+ include TextNode
3
+
4
+
5
+ def has_variables?
6
+ leaves.any? { |node| node.is_a?( HerbStringVariable ) }
7
+ end
8
+
9
+ def leaves
10
+ leaves_to_return = []
11
+ original_elements.each do |element|
12
+ leaves_to_return += flatten element
13
+ end unless original_elements.nil? || original_elements.empty?
14
+ leaves_to_return
15
+ end
16
+
17
+ alias :original_elements :elements
18
+ alias :original_text_value :text_value
19
+
20
+ def elements
21
+ if( has_variables? )
22
+ strip_leading_and_trailing_double_quotes( variable_elements )
23
+ else
24
+ strip_leading_and_trailing_double_quotes( original_elements )
25
+ end
26
+ end
27
+
28
+ def text_value
29
+ if( has_variables? )
30
+ elements.inject('') { |string, current_element| string += current_element.text_value }
31
+ else
32
+ original_text_value
33
+ end
34
+
35
+ end
36
+
37
+ def variable_elements
38
+ found_elements = []
39
+ current_text = ''
40
+ leaves.each do |leaf|
41
+ if( leaf.is_a?( HerbStringVariable ) )
42
+ found_elements = add_text_to_array( found_elements, current_text )
43
+ current_text = ''
44
+ found_elements << leaf
45
+ else
46
+ current_text += leaf.text_value unless( leaf.nil? || leaf.text_value.empty? )
47
+ end
48
+ end
49
+ found_elements = add_text_to_array( found_elements, current_text )
50
+ found_elements
51
+ end
52
+
53
+ private
54
+
55
+ def strip_leading_and_trailing_double_quotes( list_of_elements )
56
+ first_element = list_of_elements.first
57
+ last_element = list_of_elements.last
58
+
59
+ list_of_elements = list_of_elements[ 1..list_of_elements.length - 2 ]
60
+ first_element = remove_double_quote_at_position( first_element, 0 )
61
+ last_element = remove_double_quote_at_position( last_element, last_element.text_value.length - 1 )
62
+ ([first_element,list_of_elements,last_element]).flatten.compact
63
+ end
64
+
65
+ def remove_double_quote_at_position( element, position )
66
+ if( element.text_value[position..position] == '"' )
67
+ new_text = ""
68
+ new_text += element.text_value[0..position-1] if position > 0
69
+ new_text += element.text_value[position+1..element.text_value.length - 1] if (position + 1) < (element.text_value.length - 1)
70
+ if( new_text.length == 0 )
71
+ element = nil
72
+ else
73
+ element = HerbTextNode.new( new_text, false )
74
+ end
75
+ end
76
+
77
+ element
78
+ end
79
+
80
+
81
+ def add_text_to_array( array, text )
82
+ unless( text.empty? )
83
+ array << HerbTextNode.new( text )
84
+ end
85
+ array
86
+ end
87
+
88
+
89
+ end
@@ -0,0 +1,159 @@
1
+ class ErbFile
2
+
3
+ include NodeProcessing
4
+ attr_accessor :nodes
5
+ attr_accessor :node_set
6
+
7
+ def initialize( node_set )
8
+ @node_set = node_set
9
+ @nodes = flatten_elements if compiled?
10
+ @debug = false
11
+ end
12
+
13
+ def flatten_elements
14
+ terminals = []
15
+ flatten( @node_set, terminals )
16
+ terminals
17
+ end
18
+
19
+ def compiled?
20
+ !@node_set.nil?
21
+ end
22
+
23
+ def ErbFile.debug( file_path )
24
+ ErbFile.parse( File.read( file_path ) )
25
+ @parser.failure_reason
26
+ end
27
+
28
+ def combine_nodes( leaves )
29
+ combined_nodes = []
30
+ leaves.each do |node|
31
+ last_combined_node = combined_nodes.pop
32
+ if( last_combined_node.nil? )
33
+ combined_nodes << node
34
+ elsif( combindable_node?( node ) && combindable_node?( last_combined_node ) )
35
+ new_node = HerbNodeRetainingNonTextNode.new( last_combined_node )
36
+ new_node << node
37
+ combined_nodes << new_node
38
+ elsif( !node.is_a?(TextNode) && !last_combined_node.is_a?(TextNode) && !(last_combined_node.is_a?(BaseNode) && last_combined_node.can_be_combined? ) && !(node.is_a?(NonTextNode) && node.can_be_combined? ) )
39
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingNonTextNode )
40
+ elsif( last_combined_node.is_a?(TextNode) && node.is_a?(TextNode) )
41
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
42
+ elsif( combindable_node?( last_combined_node ) && node.is_a?(TextNode) )
43
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
44
+ elsif( combindable_node?( node ) && last_combined_node.is_a?(TextNode))
45
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
46
+ elsif( node.is_a?(NonTextNode) && node.can_be_combined? && last_combined_node.is_a?(TextNode ) )
47
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
48
+ else
49
+ combined_nodes << last_combined_node
50
+ combined_nodes << node
51
+ end
52
+ end
53
+ remove_edge_tags_from_combined_text_nodes( combined_nodes )
54
+ end
55
+
56
+
57
+ def extract_text( text_extractor )
58
+ # 1. Going to have to loop over all of the elements and find all of
59
+ # the text.
60
+ # 2. Then make the call back to the text_extractor
61
+ # 3. Finally replace the nodes in the top levels (not sure how
62
+ # this is going to work with any of the nested stuff)
63
+ text_extractor.starting_text_extraction
64
+ new_node_set = []
65
+ @nodes = combine_nodes( @nodes )
66
+ @nodes.each do |node|
67
+ if( node.text? )
68
+ text_extractor.start_html_text
69
+ node.extract_text( text_extractor, new_node_set )
70
+ new_node_set += text_extractor.end_html_text
71
+ else
72
+ new_node_set << node
73
+ end
74
+ @nodes = new_node_set
75
+ self
76
+ end
77
+ text_extractor.completed_text_extraction
78
+ end
79
+
80
+ def ErbFile.from_string( string_to_parse )
81
+ ErbFile.parse( string_to_parse )
82
+ end
83
+
84
+ def ErbFile.load( file_path )
85
+ ErbFile.parse( File.read( file_path ) )
86
+ end
87
+
88
+ def serialize
89
+ to_return = ""
90
+ nodes.each do |node|
91
+ to_return << node.text_value
92
+ end
93
+
94
+ to_return
95
+ end
96
+
97
+
98
+ def to_s
99
+ to_return = ""
100
+ nodes.each do |node|
101
+ to_return += node.text_value
102
+ end
103
+ to_return
104
+ end
105
+
106
+ private
107
+
108
+ def remove_edge_tags_from_combined_text_nodes( nodes )
109
+ node_count = nodes.size
110
+ to_return = []
111
+
112
+ nodes.each do |node|
113
+ if( node.is_a?( HerbNodeRetainingTextNode ) )
114
+ to_return += remove_edge_tags_from_combined_text_node( node )
115
+ else
116
+ to_return << node
117
+ end
118
+ end
119
+
120
+ if( node_count != to_return.length )
121
+ to_return = remove_edge_tags_from_combined_text_nodes( to_return )
122
+ end
123
+
124
+ return to_return
125
+ end
126
+
127
+ def remove_edge_tags_from_combined_text_node( node )
128
+ if( node.can_remove_starting_or_ending_html_tags? )
129
+ node.break_out_start_and_end_tags
130
+ else
131
+ [node]
132
+ end
133
+ end
134
+
135
+
136
+
137
+ def combine_two_nodes( node_a, node_b, resulting_node_type )
138
+ if node_a.class == resulting_node_type
139
+ node_a << node_b
140
+ node_a
141
+ else
142
+ new_node = resulting_node_type.new( node_a )
143
+ new_node << node_b
144
+ new_node
145
+ end
146
+ end
147
+
148
+
149
+ def combindable_node?( node )
150
+ node.is_a?( NonTextNode) && node.can_be_combined?
151
+ end
152
+
153
+ def ErbFile.parse( data_to_parse )
154
+ Treetop.load( $ERB_GRAMMER_FILE )
155
+ @parser = ERBGrammerParser.new
156
+ ErbFile.new( @parser.parse( data_to_parse ) )
157
+ end
158
+
159
+ end
@@ -0,0 +1,8 @@
1
+ module ErbStringNonTextContent
2
+ include NonTextNode
3
+
4
+ def extract_text( text_extractor, node_tree )
5
+ text_extractor.add_variable( generate_i18n_key( text_extractor, node_tree ).to_s, self.text_value.strip ) unless self.contains_only_whitespace?
6
+ end
7
+
8
+ end