herbgobbler 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/bin/gobble +43 -0
  2. data/grammer/erb_grammer.treetop +256 -0
  3. data/lib/core/base_node.rb +29 -0
  4. data/lib/core/base_text_extractor.rb +19 -0
  5. data/lib/core/base_translation_store.rb +49 -0
  6. data/lib/core/core.rb +17 -0
  7. data/lib/core/double_quoted_text_node.rb +89 -0
  8. data/lib/core/erb_file.rb +159 -0
  9. data/lib/core/erb_string_non_text_content.rb +8 -0
  10. data/lib/core/i18n_key.rb +74 -0
  11. data/lib/core/ignorable_tag_node.rb +10 -0
  12. data/lib/core/method_call_node.rb +44 -0
  13. data/lib/core/node_processing.rb +26 -0
  14. data/lib/core/non_extracting_non_text_node.rb +8 -0
  15. data/lib/core/non_text_node.rb +32 -0
  16. data/lib/core/rails_text_extractor.rb +88 -0
  17. data/lib/core/rails_translation_store.rb +42 -0
  18. data/lib/core/text_extractor.rb +31 -0
  19. data/lib/core/text_node.rb +73 -0
  20. data/lib/herbgobbler.rb +7 -0
  21. data/lib/nodes/combindable_herb_non_text_node.rb +5 -0
  22. data/lib/nodes/herb_combined_node.rb +47 -0
  23. data/lib/nodes/herb_erb_text_call_node.rb +52 -0
  24. data/lib/nodes/herb_node_retaining_node.rb +37 -0
  25. data/lib/nodes/herb_node_retaining_non_text_node.rb +30 -0
  26. data/lib/nodes/herb_node_retaining_text_node.rb +107 -0
  27. data/lib/nodes/herb_non_text_node.rb +21 -0
  28. data/lib/nodes/herb_string_variable.rb +9 -0
  29. data/lib/nodes/herb_text_node.rb +23 -0
  30. data/lib/nodes/herb_white_space_text_node.rb +14 -0
  31. data/lib/nodes/nodes.rb +13 -0
  32. data/lib/nodes/rails_text_variable_node.rb +19 -0
  33. data/scripts/display_syntax_tree.rb +18 -0
  34. data/scripts/extract_text_from_erb.rb +12 -0
  35. data/scripts/extract_yml_from_erb.rb +12 -0
  36. data/scripts/rails_gobble_file.rb +37 -0
  37. data/scripts/save_result.rb +11 -0
  38. metadata +154 -0
data/bin/gobble ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ require 'lib/herbgobbler'
3
+
4
+ def print_usage
5
+ puts "ruby scripts/rails_gobble_all_views.rb <rails_root>"
6
+ end
7
+
8
+ # remove the file extension and the app/views/ so that
9
+ # when the context is set that rails will be able to find
10
+ # it through the default . key syntax
11
+ def convert_path_to_key_path( path )
12
+ path.split('.').first.gsub( '/app/views/', '')
13
+ end
14
+
15
+ if( ARGV.length < 1 )
16
+ print_usage
17
+ else
18
+
19
+ rails_root = ARGV[0]
20
+ locale_file_name = "en.yml"
21
+
22
+ rails_view_directory = "#{rails_root}/app/views"
23
+ full_yml_file_path = "#{rails_root}/config/locales/#{locale_file_name}"
24
+
25
+ rails_translation_store = RailsTranslationStore.new
26
+ text_extractor = RailsTextExtractor.new( rails_translation_store )
27
+
28
+ Dir["#{rails_view_directory}/**/*html.erb" ].each do |full_erb_file_path|
29
+
30
+ erb_file = full_erb_file_path.gsub( rails_root, '' )
31
+ rails_translation_store.start_new_context( convert_path_to_key_path( erb_file.to_s ) )
32
+ erb_file = ErbFile.load( full_erb_file_path )
33
+ erb_file.extract_text( text_extractor )
34
+
35
+ File.open(full_erb_file_path, 'w') {|f| f.write(erb_file.to_s) }
36
+ puts "Wrote #{full_erb_file_path}"
37
+ end
38
+
39
+ File.open(full_yml_file_path, 'w') {|f| f.write(rails_translation_store.serialize) }
40
+ puts "Wrote #{full_yml_file_path}"
41
+
42
+ end
43
+
@@ -0,0 +1,256 @@
1
+ grammar ERBGrammer
2
+ rule erb
3
+ ( erb_string /
4
+ erb_block /
5
+ ignorable_script_tag_content /
6
+ html_end_tag /
7
+ html_start_tag /
8
+ html_self_contained /
9
+ whitespace /
10
+ text
11
+ ) (erb)*
12
+ end
13
+
14
+ rule method_names
15
+ "link_to" <NonTextNode>
16
+ {
17
+ def node_name
18
+ "method_names"
19
+ end
20
+ def can_be_combined?
21
+ true
22
+ end
23
+ }
24
+ end
25
+
26
+ rule comma
27
+ "," <NonTextNode>
28
+ {
29
+ def node_name
30
+ "method_names"
31
+ end
32
+
33
+ def can_be_combined?
34
+ true
35
+ end
36
+
37
+ }
38
+ end
39
+
40
+ rule method_call_content
41
+ method_names whitespace double_quoted_ruby_string (comma erb_string_non_text_content)? <MethodCallNode>
42
+ end
43
+
44
+ rule erb_string_non_text_content
45
+ (!erb_block_end .)* <ErbStringNonTextContent>
46
+ {
47
+ def can_be_combined?
48
+ true
49
+ end
50
+
51
+ def node_name
52
+ "erb_string_non_text_content"
53
+ end
54
+ }
55
+ end
56
+
57
+ rule erb_string
58
+ start_erb:erb_string_start start_space:(space) text:(method_call_content / double_quoted_ruby_string)? everything_else:erb_string_non_text_content end_erb:(erb_block_end) <NonTextNode>
59
+ {
60
+ def node_name
61
+ "erb_string"
62
+ end
63
+ }
64
+
65
+ end
66
+
67
+ rule string_variable
68
+ '#{' variable:((!'}' .)*) '}' <HerbStringVariable>
69
+ end
70
+
71
+ rule double_quoted_ruby_string
72
+ start_quote:'"' unquoted_text:((!'"' (string_variable / .))*) end_quote:'"' <DoubleQuotedTextNode>
73
+ {
74
+ def can_be_combined?
75
+ false
76
+ end
77
+
78
+ def html?
79
+ false
80
+ end
81
+
82
+ def text_value
83
+ self.elements.inject("") { |text, node| text += node.text_value }
84
+ end
85
+
86
+ def node_name
87
+ "double_quoted_ruby_string"
88
+ end
89
+ }
90
+
91
+ end
92
+
93
+ rule erb_block
94
+ start_block:'<%' block_contents:((!erb_block_end .)*) end_block:not_combindable_erb_block_end <NonTextNode>
95
+ {
96
+ def node_name
97
+ "erb_block"
98
+ end
99
+ }
100
+
101
+ end
102
+
103
+ rule erb_string_start
104
+ '<%=' <NonExtractingNonTextNode>
105
+ {
106
+ def node_name
107
+ "erb_string_start"
108
+ end
109
+
110
+ def can_be_combined?
111
+ true
112
+ end
113
+ }
114
+
115
+ end
116
+
117
+ rule not_combindable_erb_block_end
118
+ ('%>' / '-%>') <NonTextNode>
119
+ {
120
+ def node_name
121
+ "not_combindable_erb_block_end"
122
+ end
123
+
124
+ def can_be_combined?
125
+ false
126
+ end
127
+
128
+ }
129
+
130
+ end
131
+
132
+ rule erb_block_end
133
+ ('%>' / '-%>') <NonExtractingNonTextNode>
134
+ {
135
+ def node_name
136
+ "erb_block_end"
137
+ end
138
+
139
+ def can_be_combined?
140
+ true
141
+ end
142
+
143
+ }
144
+
145
+ end
146
+
147
+ rule ignorable_script_tag_content
148
+ '<script' (!'</script' .)+ <IgnorableTagNode>
149
+ {
150
+ def can_be_combined?
151
+ false
152
+ end
153
+
154
+ def node_name
155
+ "ignorable_tag_name"
156
+ end
157
+ }
158
+ end
159
+
160
+
161
+ rule html_self_contained
162
+ start_tag:'<' tag_name:([a-zA-Z])+ tag_contents:(!'/>' (erb_string / .))* end_tag:'/>' <NonTextNode>
163
+ {
164
+ def can_be_combined?
165
+ true
166
+ end
167
+
168
+ def node_name
169
+ "html_self_contained"
170
+ end
171
+
172
+ }
173
+ end
174
+
175
+ rule html_end_tag
176
+ '</' tag_name:([a-zA-Z])+ (!'>' .)* '>' <NonTextNode>
177
+ {
178
+ def can_be_combined?
179
+ !(['a', 'b', 'span', 'strong'].index(tag_name.text_value.downcase).nil?)
180
+ end
181
+
182
+ def node_name
183
+ "html_end_tag"
184
+ end
185
+ }
186
+
187
+ end
188
+
189
+ rule tag_text
190
+ (!'>' !'/>' (erb_string / .) )* <NonTextNode>
191
+ {
192
+ def node_name
193
+ "tag_text"
194
+ end
195
+ }
196
+ end
197
+
198
+ rule html_start_tag
199
+ start_bracket:'<' tag_name:([a-zA-Z])+ extra_text:tag_text end_bracket:'>' <NonTextNode>
200
+ {
201
+ def can_be_combined?
202
+ !(['a', 'b', 'span','strong'].index(tag_name.text_value.downcase).nil?)
203
+ end
204
+
205
+ def node_name
206
+ "html_start_tag"
207
+ end
208
+ }
209
+
210
+ end
211
+
212
+ rule text
213
+ ((!html_self_contained !html_start_tag !html_end_tag !erb_string !erb_block) . )+ <TextNode>
214
+ {
215
+ def can_be_combined?
216
+ true
217
+ end
218
+
219
+ def node_name
220
+ "text"
221
+ end
222
+ }
223
+
224
+ end
225
+
226
+ rule space
227
+ (' ')+ <NonTextNode>
228
+ {
229
+ def node_name
230
+ "space"
231
+ end
232
+
233
+ def can_be_combined?
234
+ true
235
+ end
236
+ }
237
+ end
238
+
239
+ rule whitespace
240
+ ((![a-zA-Z] !('<' [a-zA-Z]+) !('</' [a-zA-Z]+) !'<%' !'"') . )+ <NonTextNode>
241
+ {
242
+ def can_be_combined?
243
+ true
244
+ end
245
+
246
+ def node_name
247
+ "whitespace"
248
+ end
249
+ def white_space?
250
+ true
251
+ end
252
+ }
253
+ end
254
+
255
+ end
256
+
@@ -0,0 +1,29 @@
1
+ module BaseNode
2
+ @@keys = []
3
+ @@hash = nil
4
+
5
+ def shatter?
6
+ false
7
+ end
8
+
9
+ def shattered_node_list( *nodes )
10
+ nodes.reject {|terminal| terminal.nil? || terminal.text_value.empty? }
11
+ end
12
+
13
+ protected
14
+
15
+ def generate_i18n_key( text_extractor, node_tree )
16
+ I18nKey.new( self.text_value, get_key_hash(text_extractor, node_tree) )
17
+ end
18
+
19
+ def get_key_hash( text_extractor, node_tree )
20
+ # here we just try to use different keysets based on where they are in the tree since
21
+ # there can only be key collisions when generating the same node
22
+ if( @@hash != text_extractor.hash.to_s + node_tree.last.hash.to_s )
23
+ @@keys = []
24
+ @@hash = text_extractor.hash.to_s + node_tree.last.hash.to_s
25
+ end
26
+ @@keys
27
+ end
28
+
29
+ end
@@ -0,0 +1,19 @@
1
+ class BaseTextExtractor < TextExtractor
2
+
3
+ # This is called when text extraction has begun
4
+ def starting_text_extraction
5
+ end
6
+
7
+
8
+ # This takes in a text node and returns one or more nodes that will
9
+ # then be output. The nodes that are output should implement
10
+ # node_name and text_value
11
+ def html_text( text_node )
12
+ self.remove_leading_and_trailing_whitespace( text_node )
13
+ end
14
+
15
+ # This is called when text extraction has finished
16
+ def completed_text_extraction
17
+ end
18
+
19
+ end
@@ -0,0 +1,49 @@
1
+ class BaseTranslationStore
2
+
3
+ attr_reader :context
4
+
5
+ def initialize
6
+ @context_maps = {}
7
+ @context_array = []
8
+ end
9
+
10
+ def each( &block )
11
+ @context_array.each do |context|
12
+ @context_maps[context].each do |key_value_pair|
13
+ key_name, key_value = *key_value_pair
14
+ yield context, key_name, key_value
15
+ end
16
+ end
17
+ end
18
+
19
+ def start_new_context( new_context )
20
+ @context = new_context
21
+ end
22
+
23
+ def add_translation( key, value )
24
+ context_map() << [key, value]
25
+ end
26
+
27
+ def serialize
28
+ raise "Implement me"
29
+ end
30
+
31
+ private
32
+
33
+
34
+ def context_map
35
+ if( @context.nil? )
36
+ context_name = ''
37
+ else
38
+ context_name = @context
39
+ end
40
+
41
+ local_context_map = []
42
+ @context_maps[context_name] = [] if @context_maps[context_name].nil?
43
+ @context_array << context_name if @context_array.index( context_name ).nil?
44
+ @context_maps[context_name]
45
+ end
46
+
47
+
48
+
49
+ end
data/lib/core/core.rb ADDED
@@ -0,0 +1,17 @@
1
+ require 'core/base_node'
2
+ require 'core/node_processing'
3
+ require 'core/erb_file'
4
+ require 'core/text_node'
5
+ require 'core/non_text_node'
6
+ require 'core/text_extractor'
7
+ require 'core/base_text_extractor'
8
+ require 'core/rails_text_extractor'
9
+ require 'core/i18n_key'
10
+ require 'core/ignorable_tag_node'
11
+ require 'core/base_translation_store'
12
+ require 'core/rails_translation_store'
13
+ require 'core/double_quoted_text_node'
14
+ require 'core/erb_string_non_text_content'
15
+ require 'core/non_extracting_non_text_node'
16
+ require 'core/method_call_node'
17
+
@@ -0,0 +1,89 @@
1
+ class DoubleQuotedTextNode < Treetop::Runtime::SyntaxNode
2
+ include TextNode
3
+
4
+
5
+ def has_variables?
6
+ leaves.any? { |node| node.is_a?( HerbStringVariable ) }
7
+ end
8
+
9
+ def leaves
10
+ leaves_to_return = []
11
+ original_elements.each do |element|
12
+ leaves_to_return += flatten element
13
+ end unless original_elements.nil? || original_elements.empty?
14
+ leaves_to_return
15
+ end
16
+
17
+ alias :original_elements :elements
18
+ alias :original_text_value :text_value
19
+
20
+ def elements
21
+ if( has_variables? )
22
+ strip_leading_and_trailing_double_quotes( variable_elements )
23
+ else
24
+ strip_leading_and_trailing_double_quotes( original_elements )
25
+ end
26
+ end
27
+
28
+ def text_value
29
+ if( has_variables? )
30
+ elements.inject('') { |string, current_element| string += current_element.text_value }
31
+ else
32
+ original_text_value
33
+ end
34
+
35
+ end
36
+
37
+ def variable_elements
38
+ found_elements = []
39
+ current_text = ''
40
+ leaves.each do |leaf|
41
+ if( leaf.is_a?( HerbStringVariable ) )
42
+ found_elements = add_text_to_array( found_elements, current_text )
43
+ current_text = ''
44
+ found_elements << leaf
45
+ else
46
+ current_text += leaf.text_value unless( leaf.nil? || leaf.text_value.empty? )
47
+ end
48
+ end
49
+ found_elements = add_text_to_array( found_elements, current_text )
50
+ found_elements
51
+ end
52
+
53
+ private
54
+
55
+ def strip_leading_and_trailing_double_quotes( list_of_elements )
56
+ first_element = list_of_elements.first
57
+ last_element = list_of_elements.last
58
+
59
+ list_of_elements = list_of_elements[ 1..list_of_elements.length - 2 ]
60
+ first_element = remove_double_quote_at_position( first_element, 0 )
61
+ last_element = remove_double_quote_at_position( last_element, last_element.text_value.length - 1 )
62
+ ([first_element,list_of_elements,last_element]).flatten.compact
63
+ end
64
+
65
+ def remove_double_quote_at_position( element, position )
66
+ if( element.text_value[position..position] == '"' )
67
+ new_text = ""
68
+ new_text += element.text_value[0..position-1] if position > 0
69
+ new_text += element.text_value[position+1..element.text_value.length - 1] if (position + 1) < (element.text_value.length - 1)
70
+ if( new_text.length == 0 )
71
+ element = nil
72
+ else
73
+ element = HerbTextNode.new( new_text, false )
74
+ end
75
+ end
76
+
77
+ element
78
+ end
79
+
80
+
81
+ def add_text_to_array( array, text )
82
+ unless( text.empty? )
83
+ array << HerbTextNode.new( text )
84
+ end
85
+ array
86
+ end
87
+
88
+
89
+ end
@@ -0,0 +1,159 @@
1
+ class ErbFile
2
+
3
+ include NodeProcessing
4
+ attr_accessor :nodes
5
+ attr_accessor :node_set
6
+
7
+ def initialize( node_set )
8
+ @node_set = node_set
9
+ @nodes = flatten_elements if compiled?
10
+ @debug = false
11
+ end
12
+
13
+ def flatten_elements
14
+ terminals = []
15
+ flatten( @node_set, terminals )
16
+ terminals
17
+ end
18
+
19
+ def compiled?
20
+ !@node_set.nil?
21
+ end
22
+
23
+ def ErbFile.debug( file_path )
24
+ ErbFile.parse( File.read( file_path ) )
25
+ @parser.failure_reason
26
+ end
27
+
28
+ def combine_nodes( leaves )
29
+ combined_nodes = []
30
+ leaves.each do |node|
31
+ last_combined_node = combined_nodes.pop
32
+ if( last_combined_node.nil? )
33
+ combined_nodes << node
34
+ elsif( combindable_node?( node ) && combindable_node?( last_combined_node ) )
35
+ new_node = HerbNodeRetainingNonTextNode.new( last_combined_node )
36
+ new_node << node
37
+ combined_nodes << new_node
38
+ elsif( !node.is_a?(TextNode) && !last_combined_node.is_a?(TextNode) && !(last_combined_node.is_a?(BaseNode) && last_combined_node.can_be_combined? ) && !(node.is_a?(NonTextNode) && node.can_be_combined? ) )
39
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingNonTextNode )
40
+ elsif( last_combined_node.is_a?(TextNode) && node.is_a?(TextNode) )
41
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
42
+ elsif( combindable_node?( last_combined_node ) && node.is_a?(TextNode) )
43
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
44
+ elsif( combindable_node?( node ) && last_combined_node.is_a?(TextNode))
45
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
46
+ elsif( node.is_a?(NonTextNode) && node.can_be_combined? && last_combined_node.is_a?(TextNode ) )
47
+ combined_nodes << combine_two_nodes( last_combined_node, node, HerbNodeRetainingTextNode )
48
+ else
49
+ combined_nodes << last_combined_node
50
+ combined_nodes << node
51
+ end
52
+ end
53
+ remove_edge_tags_from_combined_text_nodes( combined_nodes )
54
+ end
55
+
56
+
57
+ def extract_text( text_extractor )
58
+ # 1. Going to have to loop over all of the elements and find all of
59
+ # the text.
60
+ # 2. Then make the call back to the text_extractor
61
+ # 3. Finally replace the nodes in the top levels (not sure how
62
+ # this is going to work with any of the nested stuff)
63
+ text_extractor.starting_text_extraction
64
+ new_node_set = []
65
+ @nodes = combine_nodes( @nodes )
66
+ @nodes.each do |node|
67
+ if( node.text? )
68
+ text_extractor.start_html_text
69
+ node.extract_text( text_extractor, new_node_set )
70
+ new_node_set += text_extractor.end_html_text
71
+ else
72
+ new_node_set << node
73
+ end
74
+ @nodes = new_node_set
75
+ self
76
+ end
77
+ text_extractor.completed_text_extraction
78
+ end
79
+
80
+ def ErbFile.from_string( string_to_parse )
81
+ ErbFile.parse( string_to_parse )
82
+ end
83
+
84
+ def ErbFile.load( file_path )
85
+ ErbFile.parse( File.read( file_path ) )
86
+ end
87
+
88
+ def serialize
89
+ to_return = ""
90
+ nodes.each do |node|
91
+ to_return << node.text_value
92
+ end
93
+
94
+ to_return
95
+ end
96
+
97
+
98
+ def to_s
99
+ to_return = ""
100
+ nodes.each do |node|
101
+ to_return += node.text_value
102
+ end
103
+ to_return
104
+ end
105
+
106
+ private
107
+
108
+ def remove_edge_tags_from_combined_text_nodes( nodes )
109
+ node_count = nodes.size
110
+ to_return = []
111
+
112
+ nodes.each do |node|
113
+ if( node.is_a?( HerbNodeRetainingTextNode ) )
114
+ to_return += remove_edge_tags_from_combined_text_node( node )
115
+ else
116
+ to_return << node
117
+ end
118
+ end
119
+
120
+ if( node_count != to_return.length )
121
+ to_return = remove_edge_tags_from_combined_text_nodes( to_return )
122
+ end
123
+
124
+ return to_return
125
+ end
126
+
127
+ def remove_edge_tags_from_combined_text_node( node )
128
+ if( node.can_remove_starting_or_ending_html_tags? )
129
+ node.break_out_start_and_end_tags
130
+ else
131
+ [node]
132
+ end
133
+ end
134
+
135
+
136
+
137
+ def combine_two_nodes( node_a, node_b, resulting_node_type )
138
+ if node_a.class == resulting_node_type
139
+ node_a << node_b
140
+ node_a
141
+ else
142
+ new_node = resulting_node_type.new( node_a )
143
+ new_node << node_b
144
+ new_node
145
+ end
146
+ end
147
+
148
+
149
+ def combindable_node?( node )
150
+ node.is_a?( NonTextNode) && node.can_be_combined?
151
+ end
152
+
153
+ def ErbFile.parse( data_to_parse )
154
+ Treetop.load( $ERB_GRAMMER_FILE )
155
+ @parser = ERBGrammerParser.new
156
+ ErbFile.new( @parser.parse( data_to_parse ) )
157
+ end
158
+
159
+ end
@@ -0,0 +1,8 @@
1
+ module ErbStringNonTextContent
2
+ include NonTextNode
3
+
4
+ def extract_text( text_extractor, node_tree )
5
+ text_extractor.add_variable( generate_i18n_key( text_extractor, node_tree ).to_s, self.text_value.strip ) unless self.contains_only_whitespace?
6
+ end
7
+
8
+ end