i18n_template 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/Gemfile.rails23_r187 +8 -0
- data/Gemfile.rails30_r193 +8 -0
- data/Gemfile.rails31_r193 +8 -0
- data/README.md +250 -0
- data/Rakefile +1 -0
- data/bin/i18n_template +5 -0
- data/i18n_template.gemspec +26 -0
- data/lib/i18n_template.rb +28 -0
- data/lib/i18n_template/document.rb +460 -0
- data/lib/i18n_template/extractor.rb +4 -0
- data/lib/i18n_template/extractor/base.rb +44 -0
- data/lib/i18n_template/extractor/gettext.rb +127 -0
- data/lib/i18n_template/extractor/plain.rb +43 -0
- data/lib/i18n_template/extractor/yaml.rb +53 -0
- data/lib/i18n_template/handler.rb +61 -0
- data/lib/i18n_template/node.rb +74 -0
- data/lib/i18n_template/railtie.rb +7 -0
- data/lib/i18n_template/runner.rb +61 -0
- data/lib/i18n_template/runner/base.rb +11 -0
- data/lib/i18n_template/runner/extract_phrases.rb +70 -0
- data/lib/i18n_template/tasks.rb +2 -0
- data/lib/i18n_template/translation.rb +62 -0
- data/lib/i18n_template/translator.rb +5 -0
- data/lib/i18n_template/translator/i18n.rb +24 -0
- data/lib/i18n_template/version.rb +3 -0
- data/test/abstract_unit.rb +11 -0
- data/test/document_test.rb +316 -0
- data/test/fixtures/handling_if_blocks.yml +23 -0
- data/test/fixtures/ignored_markup.yml +15 -0
- data/test/fixtures/incorrect_node_markup.yml +17 -0
- data/test/fixtures/nested_nodes.yml +16 -0
- data/test/fixtures/nested_wrapped_text.yml +15 -0
- data/test/fixtures/phrase_fully_ignored.yml +14 -0
- data/test/fixtures/phrase_with_embed_words_and_scriptlet.yml +17 -0
- data/test/fixtures/phrase_with_single_char_to_ignore.yml +19 -0
- data/test/fixtures/replacing_br_with_newline.yml +15 -0
- data/test/fixtures/skipping_ignored_blocks.yml +15 -0
- data/test/fixtures/spans_as_phrases.yml +18 -0
- data/test/fixtures/table.yml +35 -0
- data/test/fixtures/text_with_braces.yml +17 -0
- data/test/fixtures/text_with_brackets.yml +17 -0
- data/test/fixtures/wrapped_key_propagation.yml +15 -0
- data/test/fixtures/wrapping_eval_blocks.yml +17 -0
- data/test/fixtures_rendering_test.rb +46 -0
- data/test/inline_rendering_test.rb +27 -0
- data/test/support/i18n_test_case_helper.rb +12 -0
- data/test/templates/_footer.html.erb +3 -0
- data/test/templates/greeting.html.erb +1 -0
- data/test/templates/layouts/application.html.erb +5 -0
- data/test/templates/users/_account.html.erb +3 -0
- data/test/templates/users/_profile.html.erb +6 -0
- data/test/templates/users/index.html.erb +5 -0
- data/test/templates/users/show.html.erb +4 -0
- data/test/templates_rendering_test.rb +81 -0
- data/test/translate_test.rb +72 -0
- metadata +156 -0
data/.gitignore
ADDED
data/README.md
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
# I18nTemplate
|
2
|
+
|
3
|
+
## Main Feature
|
4
|
+
|
5
|
+
Just compare regulare rails view internationalization:
|
6
|
+
|
7
|
+
<html>
|
8
|
+
<body>
|
9
|
+
<% current_year = Time.now.year %>
|
10
|
+
<span><%= t('hello') %></span>
|
11
|
+
<h2><%= t('Dashboard') </h2>
|
12
|
+
<div><%= t('Posts count:') %><%= current_user.posts.count %></div>
|
13
|
+
<div><%= t('Click') %><a href="#"><%= t('here') %></a></div>
|
14
|
+
...
|
15
|
+
</body>
|
16
|
+
</html>
|
17
|
+
|
18
|
+
with i18n template internationalization:
|
19
|
+
|
20
|
+
<html>
|
21
|
+
<body>
|
22
|
+
<% current_year = Time.now.year %>
|
23
|
+
<span i18n="p">hello</span>
|
24
|
+
<h2>Dashboard</h2>
|
25
|
+
<div>Posts count: <%= current_user.posts.count %></div>
|
26
|
+
<div>Click<a href="#">here</a></div>
|
27
|
+
...
|
28
|
+
</body>
|
29
|
+
</html>
|
30
|
+
|
31
|
+
Nice?
|
32
|
+
|
33
|
+
## How it Works
|
34
|
+
|
35
|
+
It convert *on the fly* regular erb template to another erb template. For above example this is something like:
|
36
|
+
|
37
|
+
<html>
|
38
|
+
<body>
|
39
|
+
<% current_year = Time.now.year %>
|
40
|
+
<span>
|
41
|
+
<%- i18n_variables = {}; i18n_wrappers = [] -%>
|
42
|
+
<%= ::I18nTemplate::Translation.translate("hello", i18n_wrappers, i18n_variables) %>
|
43
|
+
</span>
|
44
|
+
<h2>
|
45
|
+
<%- i18n_variables = {}; i18n_wrappers = [] -%>
|
46
|
+
<%= ::I18nTemplate::Translation.translate("Dashboard", i18n_wrappers, i18n_variables) %>
|
47
|
+
</h2>
|
48
|
+
<div>
|
49
|
+
<%- i18n_variables = {}; i18n_wrappers = [] -%>
|
50
|
+
<%- i18n_variables['current user posts count'] = capture do -%>
|
51
|
+
<%= current_user.posts.count %>
|
52
|
+
<%- end -%>
|
53
|
+
<%= ::I18nTemplate::Translation.translate("Posts count: {current user posts count}",
|
54
|
+
i18n_wrappers, i18n_variables) %>
|
55
|
+
</div>
|
56
|
+
<div>
|
57
|
+
<%- i18n_variables = {}; i18n_wrappers = [] -%>
|
58
|
+
<%- i18n_wrappers[1] = capture do -%>
|
59
|
+
<a href="#" i18n_wrapper="1">
|
60
|
+
<%- i18n_variables = {}; i18n_wrappers = [] -%>
|
61
|
+
<%= ::I18nTemplate::Translation.translate("here", i18n_wrappers, i18n_variables) %>
|
62
|
+
</a>
|
63
|
+
<%- end -%>
|
64
|
+
<%= ::I18nTemplate::Translation.translate("Click[1]here[/1]", i18n_wrappers, i18n_variables) %>
|
65
|
+
</div>
|
66
|
+
</body>
|
67
|
+
</html>
|
68
|
+
|
69
|
+
Translation phrases (keys):
|
70
|
+
|
71
|
+
* _hello_
|
72
|
+
* _Dashboard_
|
73
|
+
* _Posts count: {current user posts count}_
|
74
|
+
* _Click[1]here[/1]_
|
75
|
+
|
76
|
+
## Description
|
77
|
+
|
78
|
+
I18nTemplate is made to extract phrases and translate html/xhtml/xml document or erb templates.
|
79
|
+
Currently the it can work with (x)html documents.
|
80
|
+
Translation is done by modify the original template (on the fly) to be translated on erb execution time.
|
81
|
+
|
82
|
+
## Semantics
|
83
|
+
|
84
|
+
The engine is leveraging the HTML document semantics.
|
85
|
+
As we know HTML document element can contain : block elements and/or inline elements.
|
86
|
+
The engine has the following parsing rules, based on what kind of children a parent element contains:
|
87
|
+
|
88
|
+
* block element containing only block elements - is named a parent element, and is ignored by the engine;
|
89
|
+
* block element containing only inline elements - is named phrase, while every inline element is named a word;
|
90
|
+
* inline element containing other inline elements - is also a word;
|
91
|
+
* any other variation - is considered a broken element, which should be one of there above.
|
92
|
+
|
93
|
+
### Markup
|
94
|
+
|
95
|
+
Additionally for the sake of best practices and optimiztion the following rules take place:
|
96
|
+
|
97
|
+
* the following elements as considered block elements by the engine
|
98
|
+
* usual block : `blockquote p div h1 h2 h3 h4 h5 h6 li dd dt`
|
99
|
+
* inline elements : `td th a legend label title caption option optgroup button`
|
100
|
+
* the following elements, and their content, will be ignored by the engine:
|
101
|
+
* html elements: `select style script`
|
102
|
+
* non-breaking space: ` `
|
103
|
+
* erb scriptlets: `<% <%=`
|
104
|
+
* html comments: `<!-- -->`
|
105
|
+
* xhtml doctype: `<!DOCTYPE`
|
106
|
+
* additional best practices are added to translate content inside such tags
|
107
|
+
|
108
|
+
In order to fix a broken elements next elements/attributes can be added to the html document to resolve engine misunderstanding:
|
109
|
+
|
110
|
+
* `<i18n>content</i18n>` - mark invisible for parser content for internationalization
|
111
|
+
* `<... i18n="i" ...>content<...>` - (ignore) ignore element content internationalization
|
112
|
+
* `<... i18n="p" ...>content<...>` - (phrase) explicitly enable content internationalization
|
113
|
+
* `<... i18n="s" ...>content<...>` - (subphrase) mark element content as sub-phrase for parent element phrase
|
114
|
+
|
115
|
+
## Translation
|
116
|
+
|
117
|
+
### Brackets
|
118
|
+
|
119
|
+
[1]Hello World[/1]
|
120
|
+
|
121
|
+
### Braces
|
122
|
+
|
123
|
+
Example
|
124
|
+
|
125
|
+
Hello { user name }
|
126
|
+
|
127
|
+
* `<%= @user_name %>` as `{user name}`
|
128
|
+
* `<%= user_name %>` as `{user name}`
|
129
|
+
* `<%= @post.comments.count %>` as `{post comments count}`
|
130
|
+
|
131
|
+
## Using with Rails (2.3.x 3.x.x)
|
132
|
+
|
133
|
+
$ gem install i18n_template
|
134
|
+
|
135
|
+
require 'i18n_template'
|
136
|
+
|
137
|
+
ActionView::Template.register_template_handler(:erb, I18nTemplate::Handler.new)
|
138
|
+
|
139
|
+
### Set another phrase translator:
|
140
|
+
|
141
|
+
I18nTemplate.phrase_translator = lambda { |phrase| Google.translate(phrase) }
|
142
|
+
|
143
|
+
### More template internationalize control
|
144
|
+
|
145
|
+
Assume we don't want to internationalize admin view templates.
|
146
|
+
|
147
|
+
class MyI18nTemplateHandler < I18nTemplate::Handler
|
148
|
+
def internationalize?(template)
|
149
|
+
if template.respond_to?(:path)
|
150
|
+
path =~ /^admin/ ? false : true
|
151
|
+
else
|
152
|
+
true
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
ActionView::Template.register_template_handler(:erb, MyI18nTemplateHandler.new)
|
158
|
+
|
159
|
+
## Testing
|
160
|
+
|
161
|
+
### Setup
|
162
|
+
|
163
|
+
$ rvm alias create rails23_r187 ruby-1.8.7
|
164
|
+
$ rvm alias create rails30_r193 ruby-1.9.3
|
165
|
+
$ rvm alias create rails31_r193 ruby-1.9.3
|
166
|
+
|
167
|
+
$ gem install multiversion
|
168
|
+
$ multiversion all bundle install
|
169
|
+
|
170
|
+
### Run
|
171
|
+
|
172
|
+
Against all versions:
|
173
|
+
|
174
|
+
$ multiversion all exec testrb test/*_test.rb
|
175
|
+
|
176
|
+
Against specific versions:
|
177
|
+
|
178
|
+
$ multiversion rails30_r193,rails31_r193 exec testrb test/*_test.rb
|
179
|
+
|
180
|
+
|
181
|
+
## Extract phrases
|
182
|
+
|
183
|
+
$ i18n_template --help
|
184
|
+
extract_phrases - extract phrases for translations
|
185
|
+
--format plain|gettext|yaml translation format (default gettext)
|
186
|
+
--po-root PO ROOT root directly for po files (default po)
|
187
|
+
--glob GLOB template files glob (default app/views/**/*.{erb,rhtml})
|
188
|
+
--textdomain TEXTDOMAIN gettext textdomain (default phrases)
|
189
|
+
--output-file FILE output file (default template_phrases.txt)
|
190
|
+
--locales-root DIRECTORY locales directory (default config/locales)
|
191
|
+
|
192
|
+
### Plain format
|
193
|
+
|
194
|
+
$ i18n_template extract_phrases --format plain --output-file /tmp/phrases.txt
|
195
|
+
|
196
|
+
### Yaml format
|
197
|
+
|
198
|
+
$ i18n_template extract_phrases --format yaml
|
199
|
+
|
200
|
+
$ cat config/locales/phrases.yml
|
201
|
+
en:
|
202
|
+
Hello {user name}, {message}:
|
203
|
+
'[1]First name[/1] : {profile first name}':
|
204
|
+
'[1]Last name[/1] : {profile last name}':
|
205
|
+
'[1]Email[/1] : {account email}':
|
206
|
+
Copyright {current year}. All rights reserved.:
|
207
|
+
|
208
|
+
### Gettext format
|
209
|
+
|
210
|
+
$ i18n_template extract_phrases \
|
211
|
+
--textdomain myapp \
|
212
|
+
--glob app/views/**/*.erb \
|
213
|
+
--glob lib/view/**/*.erb
|
214
|
+
|
215
|
+
$ tree --dirsfirst po
|
216
|
+
po
|
217
|
+
├── de
|
218
|
+
│ └── myapp.po
|
219
|
+
└── myapp.pot
|
220
|
+
|
221
|
+
$ cat po/phrases.pot
|
222
|
+
# SOME DESCRIPTIVE TITLE.
|
223
|
+
# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
|
224
|
+
# This file is distributed under the same license as the PACKAGE package.
|
225
|
+
# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
|
226
|
+
#
|
227
|
+
msgid ""
|
228
|
+
msgstr ""
|
229
|
+
"Project-Id-Version: PACKAGE VERSION\n"
|
230
|
+
"POT-Creation-Date: 2011-11-28 15:38+0200\n"
|
231
|
+
"PO-Revision-Date: 2011-11-25 21:27+0200\n"
|
232
|
+
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
233
|
+
"Language-Team: LANGUAGE <LL@li.org>\n"
|
234
|
+
"Language: \n"
|
235
|
+
"MIME-Version: 1.0\n"
|
236
|
+
"Content-Type: text/plain; charset=UTF-8\n"
|
237
|
+
"Content-Transfer-Encoding: 8bit\n"
|
238
|
+
"Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n"
|
239
|
+
|
240
|
+
# app/views/_footer.html.erb
|
241
|
+
msgid "Copyright {current year}. All rights reserved."
|
242
|
+
msgstr ""
|
243
|
+
|
244
|
+
# app/views/greeting.html.erb
|
245
|
+
msgid "Hello {user name}, {message}"
|
246
|
+
msgstr ""
|
247
|
+
|
248
|
+
## References
|
249
|
+
|
250
|
+
* [multiversion](https://github.com/railsware/multiversion)
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/bin/i18n_template
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "i18n_template/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "i18n_template"
|
7
|
+
s.version = I18nTemplate::VERSION
|
8
|
+
s.authors = ["Nikolai Lugovoi", "Yaroslav Lazor", "Andriy Yanko"]
|
9
|
+
s.email = ["andriy.yanko@gmail.com"]
|
10
|
+
s.homepage = "https://github.com/railsware/i18n_template"
|
11
|
+
s.summary = %q{I18nTemplate is made to extract phrases from html/xhtml/xml documents and translate them on the fly}
|
12
|
+
s.description = %q{
|
13
|
+
I18nTemplate is made to extract phrases and translate templates.
|
14
|
+
Currently I18nTemplate can work with (x)html documents.
|
15
|
+
Translation is done by modify the original template (on the fly) to be translated on erb execution time.
|
16
|
+
}
|
17
|
+
|
18
|
+
s.rubyforge_project = "i18n_template"
|
19
|
+
|
20
|
+
s.files = `git ls-files`.split("\n")
|
21
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
22
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
23
|
+
s.require_paths = ["lib"]
|
24
|
+
|
25
|
+
s.add_runtime_dependency "actionpack", ">=2.3.0"
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "i18n_template/version"
|
2
|
+
|
3
|
+
module I18nTemplate
|
4
|
+
autoload :Handler, 'i18n_template/handler'
|
5
|
+
autoload :Translator, 'i18n_template/translator'
|
6
|
+
autoload :Extractor, 'i18n_template/extractor'
|
7
|
+
autoload :Runner, 'i18n_template/runner'
|
8
|
+
autoload :Translation, 'i18n_template/translation'
|
9
|
+
autoload :Document, 'i18n_template/document'
|
10
|
+
autoload :Node, 'i18n_template/node'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def runners
|
14
|
+
@runners ||= []
|
15
|
+
end
|
16
|
+
|
17
|
+
def extractors
|
18
|
+
@extractors ||= []
|
19
|
+
end
|
20
|
+
|
21
|
+
def translator
|
22
|
+
@translator ||= I18nTemplate::Translator::I18n
|
23
|
+
end
|
24
|
+
attr_writer :translator
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
require "i18n_template/railtie"
|
@@ -0,0 +1,460 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'active_support'
|
3
|
+
require 'active_support/core_ext/string'
|
4
|
+
require 'action_controller/vendor/html-scanner/html/tokenizer'
|
5
|
+
|
6
|
+
module I18nTemplate
|
7
|
+
##
|
8
|
+
# I18nTemplate::Document processes on the fly xhtml document internationalization.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
#
|
12
|
+
# Next document will be automatically internationalized
|
13
|
+
#
|
14
|
+
# <body>
|
15
|
+
# <% current_year = Time.now.year %>
|
16
|
+
# <span i18n="p">hello</span>
|
17
|
+
# <h2>Dashboard</h2>
|
18
|
+
# <div>Posts count: <%= current_user.posts.count %></div>
|
19
|
+
# <div>Click<a href="#">here</a></div>
|
20
|
+
# </body>
|
21
|
+
#
|
22
|
+
# to:
|
23
|
+
#
|
24
|
+
# <body>
|
25
|
+
# <% current_year = Time.now.year %>
|
26
|
+
# <span>
|
27
|
+
# <%- i18n_variables = {}; i18n_wrappers = [] -%>
|
28
|
+
# <%= ::I18nTemplate::Translation.translate("hello", i18n_wrappers, i18n_variables) %>
|
29
|
+
# </span>
|
30
|
+
# <h2>
|
31
|
+
# <%- i18n_variables = {}; i18n_wrappers = [] -%>
|
32
|
+
# <%= ::I18nTemplate::Translation.translate("Dashboard", i18n_wrappers, i18n_variables) %>
|
33
|
+
# </h2>
|
34
|
+
# <div>
|
35
|
+
# <%- i18n_variables = {}; i18n_wrappers = [] -%>
|
36
|
+
# <%- i18n_variables['current user posts count'] = capture do -%>
|
37
|
+
# <%= current_user.posts.count %>
|
38
|
+
# <%- end -%>
|
39
|
+
# <%= ::I18nTemplate::Translation.translate("Posts count: {current user posts count}",
|
40
|
+
# i18n_wrappers, i18n_variables) %>
|
41
|
+
# </div>
|
42
|
+
# <div>
|
43
|
+
# <%- i18n_variables = {}; i18n_wrappers = [] -%>
|
44
|
+
# <%- i18n_wrappers[1] = capture do -%>
|
45
|
+
# <a href="#" i18n_wrapper="1">
|
46
|
+
# <%- i18n_variables = {}; i18n_wrappers = [] -%>
|
47
|
+
# <%= ::I18nTemplate::Translation.translate("here", i18n_wrappers, i18n_variables) %>
|
48
|
+
# </a>
|
49
|
+
# <%- end -%>
|
50
|
+
# <%= ::I18nTemplate::Translation.translate("Click[1]here[/1]", i18n_wrappers, i18n_variables) %>
|
51
|
+
# </div>
|
52
|
+
# </body>
|
53
|
+
#
|
54
|
+
# So you need just tp translate next phrases:
|
55
|
+
#
|
56
|
+
# * _hello_
|
57
|
+
# * _Dashboard_
|
58
|
+
# * _Posts count: {current user posts count}_
|
59
|
+
# * _Click[1]here[/1]_
|
60
|
+
#
|
61
|
+
# I18n special markup element/attributes:
|
62
|
+
#
|
63
|
+
# * <i18n>content</i18n> - mark invisible for parser content for internationalization
|
64
|
+
# * <... i18n="i" ...>content<...> - (ignore) ignore element content internationalization
|
65
|
+
# * <... i18n="p" ...>content<...> - (phrase) explicitly enable content internationalization
|
66
|
+
# * <... i18n="s" ...>content<...> - (subphrase) mark element content as subphrase for parent element phrase
|
67
|
+
#
|
68
|
+
# Internal i18n element/attributes/scriptlets:
|
69
|
+
#
|
70
|
+
# * < ... i18n_phrase="phrase content" ...> - set extracted phrase into attribute
|
71
|
+
# * < ... i18n_wrapper="position" ...> - mark element as wrapper as position in i18n_wrappers array
|
72
|
+
# * <i18n_variable name="variable name">variable value</i18n_variable> - holds captured variable value with specified variable name from i18n_variables hash
|
73
|
+
# * <% i18n_wrappers %> - array of captured wrapper contents
|
74
|
+
# * <% i18n_variables %> - hash of name-value where name is variable name and value is captured variable value
|
75
|
+
|
76
|
+
class Document
|
77
|
+
# a symbol that means fold start
|
78
|
+
FOLD_START = [0x2264].pack("U*").freeze
|
79
|
+
|
80
|
+
# a symbol that means fold end
|
81
|
+
FOLD_END = [0x2265].pack("U*").freeze
|
82
|
+
|
83
|
+
# folds mapping
|
84
|
+
FOLDS = [
|
85
|
+
[ 'ignore', /<!DOCTYPE--.+?-->/m ],
|
86
|
+
[ 'ignore', /<script[^>]*?>.+?<\/script>/m ],
|
87
|
+
[ 'ignore', /<!--.+?-->/m ],
|
88
|
+
[ 'ignore', /<style[^>]*?>.+?<\/style>/m ],
|
89
|
+
[ 'eval', /<select.+?<\/select>/m ],
|
90
|
+
[ 'ignore', /<%[^=](.*?)%>/m ],
|
91
|
+
[ 'eval', /<%=(.*?)%>/m ]
|
92
|
+
].freeze
|
93
|
+
|
94
|
+
# $1 - fold index
|
95
|
+
# $2 - fold type e.g (eval, ignore)
|
96
|
+
FOLD = /#{FOLD_START}(\d+):(\w+)#{FOLD_END}/.freeze
|
97
|
+
|
98
|
+
# $1 tag name. E.g a-b:c_d
|
99
|
+
OPEN_TAG = /^<(\w+(:[\w_-]+)?)/.freeze
|
100
|
+
|
101
|
+
# $1 tag name. E.g a-b:c_d
|
102
|
+
CLOSED_TAG = /<\/(\w+(:[\w_-]+)?)>/.freeze
|
103
|
+
|
104
|
+
SELF_CLOSE = /\/>$/.freeze
|
105
|
+
|
106
|
+
BLOCK_TAGS = %w(
|
107
|
+
i18n address blockquote p div h1 h2 h3 h4 h5 h6 li dd dt td th a
|
108
|
+
legend label title caption option optgroup button
|
109
|
+
).freeze
|
110
|
+
|
111
|
+
# © ©
|
112
|
+
HTML_ENTITY = /&(#\d+|\w+);/
|
113
|
+
|
114
|
+
# processed document source
|
115
|
+
attr_reader :source
|
116
|
+
|
117
|
+
# array of processing warings
|
118
|
+
attr_reader :warnings
|
119
|
+
|
120
|
+
# array of folds
|
121
|
+
attr_reader :folds
|
122
|
+
|
123
|
+
# array of translation phrases
|
124
|
+
attr_reader :phrases
|
125
|
+
|
126
|
+
# root document node
|
127
|
+
attr_reader :root_node
|
128
|
+
|
129
|
+
# stack of document nodes
|
130
|
+
attr_reader :node_stack
|
131
|
+
|
132
|
+
# Initialize document processor
|
133
|
+
# @param [String] document a pure html/xml document or erb template
|
134
|
+
def initialize(source)
|
135
|
+
@source = source.dup
|
136
|
+
@warnings = []
|
137
|
+
@folds = []
|
138
|
+
@phrases = []
|
139
|
+
end
|
140
|
+
|
141
|
+
# Pre process document:
|
142
|
+
# * add translation key attributes
|
143
|
+
# * extract translation phrases
|
144
|
+
# * modify document source
|
145
|
+
# @return true
|
146
|
+
def preprocess!
|
147
|
+
raise "Document is already preprocessed" if @preprocessed
|
148
|
+
|
149
|
+
fold_special_tags!
|
150
|
+
|
151
|
+
parse_nodes do |node|
|
152
|
+
set_node_phrase(node)
|
153
|
+
end
|
154
|
+
|
155
|
+
@source = ""
|
156
|
+
@node_stack.each do |node|
|
157
|
+
@source << node_to_text(node)
|
158
|
+
end
|
159
|
+
|
160
|
+
@preprocessed = true
|
161
|
+
end
|
162
|
+
|
163
|
+
# Processs a document:
|
164
|
+
# * expand translation keys
|
165
|
+
# * modify document source
|
166
|
+
def process!
|
167
|
+
raise "Document is already processed" if @processed
|
168
|
+
|
169
|
+
preprocess!
|
170
|
+
parse_nodes
|
171
|
+
|
172
|
+
@source = ""
|
173
|
+
@root_node.children.each { |node| translate_node(node) }
|
174
|
+
unfold_special_tags!
|
175
|
+
|
176
|
+
@processed = true
|
177
|
+
end
|
178
|
+
|
179
|
+
# return true if document is preprocessed?
|
180
|
+
def preprocessed?
|
181
|
+
@preprocessed
|
182
|
+
end
|
183
|
+
|
184
|
+
# return true if document is processed?
|
185
|
+
def processed?
|
186
|
+
@processed
|
187
|
+
end
|
188
|
+
|
189
|
+
protected
|
190
|
+
|
191
|
+
# convert special tags to string FOLD_STARTindex:nameFOLD_END
|
192
|
+
# push tag and content to folds array
|
193
|
+
def fold_special_tags!
|
194
|
+
@folds = []
|
195
|
+
|
196
|
+
FOLDS.each do |name, pattern|
|
197
|
+
@source.gsub!(pattern) do |content|
|
198
|
+
fold = "#{FOLD_START}#{@folds.size}:#{name}#{FOLD_END}"
|
199
|
+
@folds << content
|
200
|
+
fold
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# replace FOLD_STARTindex:nameFOLD_END with @folds[index]
|
206
|
+
def unfold_special_tags!
|
207
|
+
@source.gsub!(FOLD) { @folds[$1.to_i] }
|
208
|
+
end
|
209
|
+
|
210
|
+
def parse_nodes
|
211
|
+
@root_node = Node.new(nil, 0, 0, "ROOT", "ROOT") { @parent = self }
|
212
|
+
@node_stack = []
|
213
|
+
current_node = @root_node
|
214
|
+
|
215
|
+
tokenizer = ::HTML::Tokenizer.new(@source)
|
216
|
+
|
217
|
+
while token = tokenizer.next
|
218
|
+
case token
|
219
|
+
when OPEN_TAG
|
220
|
+
node = Node.new(current_node, tokenizer.line, tokenizer.position, token, $1)
|
221
|
+
@node_stack.push node
|
222
|
+
current_node.children.push node
|
223
|
+
current_node = node unless token =~ SELF_CLOSE
|
224
|
+
when CLOSED_TAG
|
225
|
+
node = Node.new(current_node, tokenizer.line, tokenizer.position, token, $1)
|
226
|
+
warn("EXTRA CLOSING TAG:#{node.tag}, UP:#{current_node.token}", node.line) unless current_node.token[1, node.tag.size] == node.tag
|
227
|
+
@node_stack.push node
|
228
|
+
current_node = current_node.parent
|
229
|
+
else
|
230
|
+
node = Node.new(current_node, tokenizer.line, tokenizer.position, token)
|
231
|
+
@node_stack.push node
|
232
|
+
current_node.children.push node
|
233
|
+
end
|
234
|
+
|
235
|
+
yield @node_stack.last if block_given?
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
# Escape next characters:
|
240
|
+
# * '[' - [lsb] left square bracket
|
241
|
+
# * ']' - [rsb] right square bracket
|
242
|
+
# * '{' - [lcb] left curly bracket
|
243
|
+
# * '}' - [rcb] right curly bracket
|
244
|
+
# * '#' - [ns] number sign
|
245
|
+
def escape_phrase(phrase)
|
246
|
+
phrase.gsub(/(\[|\]|\{|\}|#)/) do |char|
|
247
|
+
case char
|
248
|
+
when '[' then '[lsb]'
|
249
|
+
when ']' then '[rsb]'
|
250
|
+
when '{' then '[lcb]'
|
251
|
+
when '}' then '[rcb]'
|
252
|
+
when '#' then '[ns]'
|
253
|
+
else
|
254
|
+
char
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
def set_node_phrase(node)
|
260
|
+
return if node.tag?
|
261
|
+
return if node.token.blank?
|
262
|
+
return if node.token.split(/\s+/).all? { |v| v =~ HTML_ENTITY }
|
263
|
+
|
264
|
+
phrase = node.token.dup
|
265
|
+
phrase.gsub!(/"/, '"')
|
266
|
+
phrase.gsub!(/\r\n/, ' ')
|
267
|
+
phrase.gsub!(/\s+/, ' ')
|
268
|
+
phrase.strip!
|
269
|
+
|
270
|
+
|
271
|
+
until node.parent.root?
|
272
|
+
break if node.phrase ||
|
273
|
+
node.token =~ /i18n="(p|i)"/ ||
|
274
|
+
(BLOCK_TAGS.include?(node.tag) && node.token !~ /i18n="s"/)
|
275
|
+
node = node.parent
|
276
|
+
end
|
277
|
+
return if node.token =~ /i18n="i"/
|
278
|
+
|
279
|
+
node.phrase ||= ''
|
280
|
+
node.phrase << " " << phrase
|
281
|
+
end
|
282
|
+
|
283
|
+
def node_to_text(node)
|
284
|
+
node_text = node.token.dup
|
285
|
+
node_text.gsub!(FOLD) { @folds[$1.to_i] }
|
286
|
+
|
287
|
+
return node_text if node.phrase.nil? || node.phrase.strip.split(/ /).all? { |value| value =~ FOLD }
|
288
|
+
|
289
|
+
# push down phrase for cases like <div><span><span>Text</span></span></div>
|
290
|
+
if node.children.first && node.children.first.tag? &&
|
291
|
+
(node.children.size == 1 || node.wrapped_node_text)
|
292
|
+
node.children.first.phrase = node.phrase
|
293
|
+
node.phrase = nil
|
294
|
+
return node_text
|
295
|
+
end
|
296
|
+
|
297
|
+
# allowed fold indices
|
298
|
+
fold_indices = []
|
299
|
+
node.phrase.scan(FOLD).each do |index, type|
|
300
|
+
next unless type == 'eval'
|
301
|
+
fold_indices.push(index.to_i)
|
302
|
+
end
|
303
|
+
|
304
|
+
phrase = ""
|
305
|
+
wrap_counter = 0
|
306
|
+
node.children.each do |child|
|
307
|
+
if child.text?
|
308
|
+
#phrase << unfold_text(child.token, fold_indices)
|
309
|
+
text = escape_phrase(child.token)
|
310
|
+
phrase << text
|
311
|
+
elsif child.tag == 'br'
|
312
|
+
phrase << "[nl]"
|
313
|
+
else
|
314
|
+
wrap_counter += 1
|
315
|
+
child.token.sub!(/>$/, " i18n_wrapper=\"#{wrap_counter}\">")
|
316
|
+
if text = child.wrapped_node_text
|
317
|
+
#text = unfold_text(text, fold_indices)
|
318
|
+
phrase << "[#{wrap_counter}]#{text}[/#{wrap_counter}]"
|
319
|
+
else
|
320
|
+
text = child.descendants_text
|
321
|
+
#text = unfold_text(node.descendants_text, fold_indices)
|
322
|
+
phrase << "NNODE[#{wrap_counter}]#{text}[/#{wrap_counter}]"
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
# unfold phrase
|
328
|
+
unfold_text!(phrase, fold_indices)
|
329
|
+
|
330
|
+
# wrap variables in text nodes
|
331
|
+
wrap_variables(node, fold_indices)
|
332
|
+
|
333
|
+
phrase.gsub!(/\s+/, ' ')
|
334
|
+
phrase.gsub!(/"/, '"')
|
335
|
+
phrase.strip!
|
336
|
+
|
337
|
+
# append translation key attribute
|
338
|
+
unless phrase.blank?
|
339
|
+
@phrases << phrase
|
340
|
+
node_text.sub!(/>$/) { " i18n_phrase=\"#{phrase}\">" }
|
341
|
+
end
|
342
|
+
|
343
|
+
node_text
|
344
|
+
end
|
345
|
+
|
346
|
+
def unfold_text!(text, fold_indices)
|
347
|
+
text.gsub!(FOLD) do |string|
|
348
|
+
index = $1.to_i
|
349
|
+
|
350
|
+
if $2 == 'eval' && fold_indices.include?(index)
|
351
|
+
'{' << fold_human_variable(index) << '}'
|
352
|
+
else
|
353
|
+
string
|
354
|
+
end
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
def wrap_variables(node, fold_indices)
|
359
|
+
node.children.each do |child|
|
360
|
+
|
361
|
+
child.token.gsub!(FOLD) do |string|
|
362
|
+
index = $1.to_i
|
363
|
+
if $2 == 'eval' && fold_indices.include?(index)
|
364
|
+
"<i18n_variable name=\"" << fold_human_variable(index) << "\">#{string}</i18n_variable>"
|
365
|
+
else
|
366
|
+
string
|
367
|
+
end
|
368
|
+
end if child.text?
|
369
|
+
|
370
|
+
wrap_variables(child, fold_indices)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
def fold_human_variable(index)
|
375
|
+
fold = @folds[index]
|
376
|
+
|
377
|
+
var = fold.dup
|
378
|
+
var.sub!(/^<%=/, '')
|
379
|
+
var.gsub!(/<\/?[^>]+>/, '')
|
380
|
+
var.gsub!(/\W+/, ' ')
|
381
|
+
var.gsub!(/_/, ' ')
|
382
|
+
var.strip!
|
383
|
+
parts = var.split(/\s+/)
|
384
|
+
|
385
|
+
parts.shift if parts[0] == 'h' || parts[0] == 'render' || parts[0] == 'f'
|
386
|
+
parts.shift if parts[0] == 'partial'
|
387
|
+
|
388
|
+
3.times { parts.shift } if parts[0,3] == ['check', 'box', 'tag']
|
389
|
+
3.times { parts.shift } if parts[0,3] == ['radio', 'button', 'tag']
|
390
|
+
3.times { parts.shift } if parts[0,3] == ['text', 'field', 'tag']
|
391
|
+
2.times { parts.shift } if parts[0,2] == ['select', 'tag']
|
392
|
+
|
393
|
+
variable = (parts.size > 3 ? parts[0,5] : parts).join(" ")
|
394
|
+
warn "EMPTY VARIABLE:#{fold}" if variable.empty?
|
395
|
+
variable
|
396
|
+
end
|
397
|
+
|
398
|
+
def translate_node(node, translate = true, notext = false)
|
399
|
+
if node.text?
|
400
|
+
@source << node.token unless notext
|
401
|
+
else
|
402
|
+
if node.token =~ /i18n_phrase/
|
403
|
+
warn("NESTED T9N:#{node.tag} UP #{node.parent.token}", node.line) unless translate
|
404
|
+
node.token.sub!(/ i18n_phrase="(.+?)"/, '')
|
405
|
+
key = $1.dup
|
406
|
+
node.token.sub!(/ i18n="p"/, '')
|
407
|
+
|
408
|
+
warn("BLOCK NOT EXPANDED:#{node.token} #{key}", node.line) if key =~ FOLD
|
409
|
+
warn("NODE MISSING:#{node.token} #{key}", node.line) if key =~ /NNODE/
|
410
|
+
|
411
|
+
@source << node.token unless node.tag == 'i18n'
|
412
|
+
|
413
|
+
@source << "<%- i18n_variables = {}; i18n_wrappers = [] -%>"
|
414
|
+
node.children.each do |child|
|
415
|
+
if child.tag?
|
416
|
+
if child.token =~ /<i18n_variable name="(.*?)"/
|
417
|
+
@source << "<%- i18n_variables['#$1'] = capture do -%>"
|
418
|
+
translate_node(child.children.first, false, false)
|
419
|
+
@source << "<%- end -%>"
|
420
|
+
elsif child.token =~ /i18n_wrapper=\"(\d+)\"/
|
421
|
+
@source << "<%- i18n_wrappers[#$1] = capture do -%>"
|
422
|
+
translate_node(child, false, true)
|
423
|
+
@source << "<%- end -%>"
|
424
|
+
end
|
425
|
+
end
|
426
|
+
end
|
427
|
+
|
428
|
+
@source << "<%= ::I18nTemplate::Translation.translate(#{key.inspect}, i18n_wrappers, i18n_variables) %>"
|
429
|
+
@source << "</#{node.tag}>" unless node.tag == 'i18n' || node.tag[-2,2] == '/>'
|
430
|
+
|
431
|
+
return
|
432
|
+
elsif node.token =~ /<i18n_variable name="(.*?)"/
|
433
|
+
@source << "<%- i18n_variables['#$1'] = capture do -%>"
|
434
|
+
translate_node(node.children.first, false, false)
|
435
|
+
@source << "<%- end -%>"
|
436
|
+
|
437
|
+
return
|
438
|
+
end
|
439
|
+
|
440
|
+
@source << node.token.sub(/\s+i18n="i"/, '') unless node.tag == 'i18n'
|
441
|
+
node.children.each do |child|
|
442
|
+
translate_node(child, translate, notext)
|
443
|
+
end
|
444
|
+
@source << "</#{node.tag}>" unless node.tag == 'i18n' || node.tag[-2,2] == '/>'
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
# Record waring
|
449
|
+
# @param [String] message a message
|
450
|
+
# @param [String] line (optional) a line in source
|
451
|
+
def warn(message, line = nil)
|
452
|
+
if line
|
453
|
+
@warnings << "[SOURCE:#{line}]: #{message}"
|
454
|
+
else
|
455
|
+
@warnings << message
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
end
|
460
|
+
end
|