simple_po_parser 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +3 -0
- data/.travis.yml +8 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +58 -0
- data/LICENSE.txt +22 -0
- data/README.md +30 -0
- data/Rakefile +68 -0
- data/lib/simple_po_parser/error.rb +13 -0
- data/lib/simple_po_parser/parser.rb +352 -0
- data/lib/simple_po_parser/tokenizer.rb +21 -0
- data/lib/simple_po_parser/version.rb +3 -0
- data/lib/simple_po_parser.rb +21 -0
- data/simple_po_parser.gemspec +26 -0
- data/spec/simple_po_parser/fixtures/complex_entry.po +21 -0
- data/spec/simple_po_parser/fixtures/header.po +7 -0
- data/spec/simple_po_parser/fixtures/simple_entry.po +6 -0
- data/spec/simple_po_parser/parser_spec.rb +49 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/utils/random_pofile_generator.rb +175 -0
- data/test/benchmark.po +683 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5af2707fffbad814707531a12d5c255cf0be4912
|
4
|
+
data.tar.gz: 1a0bc546529532459b6059dc3cf858815cf7f6b5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d7b3fe6a2abff7824a0fbb57f4671116370e6b7a84a83d8c871de8b2fdb75f832ecaf4b2f6fe09b6c5794732479685b4f3d130543be23f59a36614cd4e19eaca
|
7
|
+
data.tar.gz: 480e973695ec4c9ea10b51e7971306862df1b7fb7109ad7ac8fc70a03448d87e1a72b87921dc80f16ef5689cfba3013753959909c4b63ba0babfe9c306020558
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
simple_po_parser (0.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
awesome_print (1.7.0)
|
10
|
+
coveralls (0.8.16)
|
11
|
+
json (>= 1.8, < 3)
|
12
|
+
simplecov (~> 0.12.0)
|
13
|
+
term-ansicolor (~> 1.3.0)
|
14
|
+
thor (~> 0.19.1)
|
15
|
+
tins (>= 1.6.0, < 2)
|
16
|
+
diff-lcs (1.2.5)
|
17
|
+
docile (1.1.5)
|
18
|
+
geminabox-release (0.2.1)
|
19
|
+
bundler (>= 1.0.14)
|
20
|
+
json (2.0.2)
|
21
|
+
rake (12.0.0)
|
22
|
+
rspec (3.5.0)
|
23
|
+
rspec-core (~> 3.5.0)
|
24
|
+
rspec-expectations (~> 3.5.0)
|
25
|
+
rspec-mocks (~> 3.5.0)
|
26
|
+
rspec-core (3.5.4)
|
27
|
+
rspec-support (~> 3.5.0)
|
28
|
+
rspec-expectations (3.5.0)
|
29
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
30
|
+
rspec-support (~> 3.5.0)
|
31
|
+
rspec-mocks (3.5.0)
|
32
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
33
|
+
rspec-support (~> 3.5.0)
|
34
|
+
rspec-support (3.5.0)
|
35
|
+
simplecov (0.12.0)
|
36
|
+
docile (~> 1.1.0)
|
37
|
+
json (>= 1.8, < 3)
|
38
|
+
simplecov-html (~> 0.10.0)
|
39
|
+
simplecov-html (0.10.0)
|
40
|
+
term-ansicolor (1.3.2)
|
41
|
+
tins (~> 1.0)
|
42
|
+
thor (0.19.4)
|
43
|
+
tins (1.13.0)
|
44
|
+
|
45
|
+
PLATFORMS
|
46
|
+
ruby
|
47
|
+
|
48
|
+
DEPENDENCIES
|
49
|
+
awesome_print
|
50
|
+
bundler
|
51
|
+
coveralls
|
52
|
+
geminabox-release (= 0.2.1)
|
53
|
+
rake
|
54
|
+
rspec (~> 3.5.0)
|
55
|
+
simple_po_parser!
|
56
|
+
|
57
|
+
BUNDLED WITH
|
58
|
+
1.13.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2017 Dennis-Florian Herr @ Experteer GmbH
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Simple Po Parser
|
2
|
+
|
3
|
+
This is a simple PO file to ruby hash parser, which complies with [GNU PO file specification](https://www.gnu.org/software/gettext/manual/html_node/PO-Files.html). Tested with the msgcat (GNU gettext-tools) 0.18.3 tool.
|
4
|
+
|
5
|
+
## Hash format
|
6
|
+
|
7
|
+
A PO message is parsed into a hash with meaningful keys for each type of line.
|
8
|
+
The values are always arrays of strings.
|
9
|
+
Each string is representing one line of content in the PO file.
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
{
|
13
|
+
:translator_comment => [""],
|
14
|
+
:extracted_comment => [""],
|
15
|
+
:reference => [""],
|
16
|
+
:flag => [""],
|
17
|
+
:previous_msgctxt => [""], # msgctxt of the message used for the fuzzy translation
|
18
|
+
:previous_msgid => [""], # msgid of the messaged used for the fuzzy translation
|
19
|
+
:previous_msgid_plural => [""],
|
20
|
+
:msgctxt => [""],
|
21
|
+
:msgid => [""],
|
22
|
+
:msgid_plural => [""],
|
23
|
+
:msgstr => [""], # for singular messages
|
24
|
+
"msgstr[N]" => [""] # for plural messages, there N is the plural number starting from 0
|
25
|
+
}
|
26
|
+
```
|
27
|
+
|
28
|
+
### License
|
29
|
+
|
30
|
+
License: [MIT](LICENSE.txt) - Copyright (c) 2017 Dennis-Florian Herr @Experteer GmbH
|
data/Rakefile
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
require 'geminabox-release'
|
9
|
+
GeminaboxRelease.patch(:use_config => true, :remove_release => false)
|
10
|
+
|
11
|
+
desc "Generate a random po file to \"test/benchmark.po\". Takes optional rake args for number of entries"
|
12
|
+
task 'generate_random_pofile', :messages, :obsoletes do |t, args|
|
13
|
+
args.with_defaults(:messages => "200", :obsoletes => "10")
|
14
|
+
require_relative 'spec/utils/random_pofile_generator'
|
15
|
+
PoParser::RandomPoFileGenerator.generate_file(
|
16
|
+
File.expand_path("test/benchmark.po", __dir__), args[:messages].to_i, args[:obsoletes].to_i
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
namespace :parser do
|
21
|
+
require 'benchmark'
|
22
|
+
require 'simple_po_parser'
|
23
|
+
|
24
|
+
desc "Benchmark of 10 full PoParser runs of test/benchmark.po"
|
25
|
+
task "benchmark" do
|
26
|
+
pofile = File.expand_path("test/benchmark.po", __dir__)
|
27
|
+
Benchmark.bmbm do |x|
|
28
|
+
x.report("Parser:") {10.times { SimplePoParser.parse(pofile) }}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "Generate 5 random PO files with 100 to 500 messages and benchmark each full PoParser run"
|
33
|
+
task 'five_random_po_full' do
|
34
|
+
include Benchmark
|
35
|
+
require_relative 'spec/utils/random_pofile_generator'
|
36
|
+
pofile = File.expand_path("test/benchmark.po.tmp", __dir__)
|
37
|
+
Benchmark.benchmark(CAPTION, 6, FORMAT, "total:") do |x|
|
38
|
+
total = nil
|
39
|
+
total_length = 0
|
40
|
+
for i in 0..5 do
|
41
|
+
length = (Random.new.rand * 400.0 + 100).to_i
|
42
|
+
total_length += length
|
43
|
+
puts "Benchmarking file of length #{length}"
|
44
|
+
SimplePoParser::RandomPoFileGenerator.generate_file(pofile, length)
|
45
|
+
t = x.report("try#{i}:") {SimplePoParser.parse(pofile)}
|
46
|
+
File.unlink(pofile)
|
47
|
+
total = total ? total+t : t
|
48
|
+
end
|
49
|
+
puts "Total message length #{total_length}"
|
50
|
+
[total]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "Show ruby-prof profiler for spec/fixtures/complex_entry.po"
|
55
|
+
task "profile_parser" do
|
56
|
+
require 'ruby-prof'
|
57
|
+
RubyProf.start
|
58
|
+
po_message = File.read(File.expand_path("spec/simple_po_parser/fixtures/complex_entry.po", __dir__))
|
59
|
+
SimplePoParser.parse_message(po_message)
|
60
|
+
result = RubyProf.stop
|
61
|
+
|
62
|
+
printer = RubyProf::FlatPrinter.new(result)
|
63
|
+
printer.print(STDOUT)
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
module SimplePoParser
|
2
|
+
# FastParser directly using Rubys powerful StringScanner (strscan)
|
3
|
+
#
|
4
|
+
# Important notes about StringScanner.scan:
|
5
|
+
# * scan will return nil if there is no match. Using the regex * (zero or more) quantifier will
|
6
|
+
# let scan return an empty string if there is "no match" as the empty string qualifies as
|
7
|
+
# a match of the regex (zero times). We make use of this "trick"
|
8
|
+
# * the start of line anchor ^ is obsolete as scan will only match start of line.
|
9
|
+
# * rubys regex is by default in single-line mode, therefore scan will only match until
|
10
|
+
# the next newline is hit (unless multi-line mode is explicitly enabled)
|
11
|
+
module Parser
|
12
|
+
require_relative 'error'
|
13
|
+
require 'strscan'
|
14
|
+
extend self
|
15
|
+
|
16
|
+
# parse a single message of the PO format.
|
17
|
+
#
|
18
|
+
# @param message a single PO message in String format without leading or trailing whitespace
|
19
|
+
# @return [Hash] parsed PO message information in Hash format
|
20
|
+
def parse(message)
|
21
|
+
@result = {}
|
22
|
+
@scanner = StringScanner.new(message.strip)
|
23
|
+
begin
|
24
|
+
lines
|
25
|
+
rescue ParserError => pe
|
26
|
+
puts "Parsing error!"
|
27
|
+
puts "#{pe.message}"
|
28
|
+
puts "Backtrace:\n#{pe.backtrace.select{|i| i =~ /lib\/poparser/}.join("\n")}"
|
29
|
+
puts "\nResult up to error: '#{@result}'"
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
@result
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
#########################################
|
38
|
+
### branching ###
|
39
|
+
#########################################
|
40
|
+
|
41
|
+
# arbitary line of a PO message. Can be comment or message
|
42
|
+
# message parsing is always started with checking for msgctxt as content is expected in
|
43
|
+
# msgctxt -> msgid -> msgid_plural -> msgstr order
|
44
|
+
def lines
|
45
|
+
begin
|
46
|
+
if @scanner.scan(/#/)
|
47
|
+
comment
|
48
|
+
else
|
49
|
+
msgctxt
|
50
|
+
end
|
51
|
+
rescue PoSyntaxError => pe
|
52
|
+
# throw a normal ParserError to break the recursion
|
53
|
+
raise ParserError, "Syntax error in lines\n" + pe.message, pe.backtrace
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# match a comment line. called on lines starting with '#'.
|
58
|
+
# Recalls line when the comment line was parsed
|
59
|
+
def comment
|
60
|
+
begin
|
61
|
+
case @scanner.getch
|
62
|
+
when ' '
|
63
|
+
skip_whitespace
|
64
|
+
add_result(:translator_comment, comment_text)
|
65
|
+
lines
|
66
|
+
when '.'
|
67
|
+
skip_whitespace
|
68
|
+
add_result(:extracted_comment, comment_text)
|
69
|
+
lines
|
70
|
+
when ':'
|
71
|
+
skip_whitespace
|
72
|
+
add_result(:reference, comment_text)
|
73
|
+
lines
|
74
|
+
when ','
|
75
|
+
skip_whitespace
|
76
|
+
add_result(:flag, comment_text)
|
77
|
+
lines
|
78
|
+
when '|'
|
79
|
+
skip_whitespace
|
80
|
+
previous_comments
|
81
|
+
lines
|
82
|
+
when "\n"
|
83
|
+
add_result(:translator_comment, "") # empty comment line
|
84
|
+
lines
|
85
|
+
when '~'
|
86
|
+
if @result[:previous_msgctxt] || @result[:previous_msgid] || @result[:previous_msgid_plural]
|
87
|
+
raise PoSyntaxError, "Previous comment entries need to be marked obsolete too in obsolete message entries. But already got: #{@result}"
|
88
|
+
end
|
89
|
+
skip_whitespace
|
90
|
+
add_result(:obsolete, comment_text)
|
91
|
+
obsoletes
|
92
|
+
else
|
93
|
+
@scanner.pos = @scanner.pos - 2
|
94
|
+
raise PoSyntaxError, "Unknown comment type #{@scanner.peek(10).inspect}"
|
95
|
+
end
|
96
|
+
rescue PoSyntaxError => pe
|
97
|
+
raise PoSyntaxError, "Syntax error in comment\n" + pe.message, pe.backtrace
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# matches the msgctxt line and will continue to check for msgid afterwards
|
102
|
+
#
|
103
|
+
# msgctxt is optional
|
104
|
+
def msgctxt
|
105
|
+
begin
|
106
|
+
if @scanner.scan(/msgctxt/)
|
107
|
+
skip_whitespace
|
108
|
+
text = message_line
|
109
|
+
add_result(:msgctxt, text)
|
110
|
+
message_multiline(:msgctxt) if text.empty?
|
111
|
+
end
|
112
|
+
msgid
|
113
|
+
rescue PoSyntaxError => pe
|
114
|
+
raise PoSyntaxError, "Syntax error in msgctxt\n" + pe.message, pe.backtrace
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# matches the msgid line. Will check for optional msgid_plural.
|
119
|
+
# Will advance to msgstr or msgstr_plural based on msgid_plural
|
120
|
+
#
|
121
|
+
# msgid is required
|
122
|
+
def msgid
|
123
|
+
begin
|
124
|
+
if @scanner.scan(/msgid/)
|
125
|
+
skip_whitespace
|
126
|
+
text = message_line
|
127
|
+
add_result(:msgid, text)
|
128
|
+
message_multiline(:msgid) if text.empty?
|
129
|
+
if msgid_plural
|
130
|
+
msgstr_plural
|
131
|
+
else
|
132
|
+
msgstr
|
133
|
+
end
|
134
|
+
else
|
135
|
+
err_msg = "Message without msgid is not allowed."
|
136
|
+
err_msg += "The Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
137
|
+
raise PoSyntaxError, err_msg
|
138
|
+
end
|
139
|
+
rescue PoSyntaxError => pe
|
140
|
+
raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
# matches the msgid_plural line.
|
146
|
+
#
|
147
|
+
# msgid_plural is optional
|
148
|
+
#
|
149
|
+
# @return [boolean] true if msgid_plural is present, false otherwise
|
150
|
+
def msgid_plural
|
151
|
+
begin
|
152
|
+
if @scanner.scan(/msgid_plural/)
|
153
|
+
skip_whitespace
|
154
|
+
text = message_line
|
155
|
+
add_result(:msgid_plural, text)
|
156
|
+
message_multiline(:msgid_plural) if text.empty?
|
157
|
+
true
|
158
|
+
else
|
159
|
+
false
|
160
|
+
end
|
161
|
+
rescue PoSyntaxError => pe
|
162
|
+
raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# matches the msgstr singular line
|
167
|
+
#
|
168
|
+
# msgstr is required in singular translations
|
169
|
+
def msgstr
|
170
|
+
begin
|
171
|
+
if @scanner.scan(/msgstr/)
|
172
|
+
skip_whitespace
|
173
|
+
text = message_line
|
174
|
+
add_result(:msgstr, text)
|
175
|
+
message_multiline(:msgstr) if text.empty?
|
176
|
+
skip_whitespace
|
177
|
+
raise PoSyntaxError, "Unexpected content after expected message end #{@scanner.peek(10).inspect}" unless @scanner.eos?
|
178
|
+
else
|
179
|
+
raise PoSyntaxError, "Singular message without msgstr is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
180
|
+
end
|
181
|
+
rescue PoSyntaxError => pe
|
182
|
+
raise PoSyntaxError, "Syntax error in msgstr\n" + pe.message, pe.backtrace
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
def msgstr_plural(num = 0)
|
188
|
+
begin
|
189
|
+
msgstr_key = @scanner.scan(/msgstr\[\d\]/) # matches 'msgstr[0]' to 'msgstr[9]'
|
190
|
+
if msgstr_key
|
191
|
+
# msgstr plurals must come in 0-based index in order
|
192
|
+
msgstr_num = msgstr_key.match(/\d/)[0].to_i
|
193
|
+
raise PoSyntaxError, "Bad 'msgstr[index]' index." if msgstr_num != num
|
194
|
+
skip_whitespace
|
195
|
+
text = message_line
|
196
|
+
add_result(msgstr_key, text)
|
197
|
+
message_multiline(msgstr_key) if text.empty?
|
198
|
+
msgstr_plural(num+1)
|
199
|
+
elsif num == 0 # and msgstr_key was false
|
200
|
+
raise PoSyntaxError, "Plural message without msgstr[0] is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
201
|
+
else
|
202
|
+
raise PoSyntaxError, "End of message was expected, but line started unexpectedly with #{@scanner.peek(10).inspect}" unless @scanner.eos?
|
203
|
+
end
|
204
|
+
rescue PoSyntaxError => pe
|
205
|
+
raise PoSyntaxError, "Syntax error in msgstr_plural\n" + pe.message, pe.backtrace
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def previous_comments
|
210
|
+
begin
|
211
|
+
# next part must be msgctxt, msgid or msgid_plural
|
212
|
+
if @scanner.scan(/msg/)
|
213
|
+
if @scanner.scan(/id/)
|
214
|
+
if @scanner.scan(/_plural/)
|
215
|
+
key = :previous_msgid_plural
|
216
|
+
else
|
217
|
+
key = :previous_msgid
|
218
|
+
end
|
219
|
+
elsif @scanner.scan(/ctxt/)
|
220
|
+
key = :previous_msgctxt
|
221
|
+
else
|
222
|
+
raise PoSyntaxError, "Previous comment type #{("msg" + @scanner.peek(10)).inspect} unknown."
|
223
|
+
end
|
224
|
+
skip_whitespace
|
225
|
+
text = message_line
|
226
|
+
add_result(key, text)
|
227
|
+
previous_multiline(key) if text.empty?
|
228
|
+
else
|
229
|
+
raise PoSyntaxError, "Previous comments must start with '#| msg'. #{@scanner.peek(10).inspect} unknown."
|
230
|
+
end
|
231
|
+
rescue PoSyntaxError => pe
|
232
|
+
raise PoSyntaxError, "Syntax error in previous_comments\n" + pe.message, pe.backtrace
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def previous_multiline(key)
|
237
|
+
begin
|
238
|
+
# scan multilines until no further multiline is hit
|
239
|
+
# /#\|\p{Blank}"/ needs to catch the double quote to ensure it hits a previous
|
240
|
+
# multiline and not another line type.
|
241
|
+
if @scanner.scan(/#\|\p{Blank}*"/)
|
242
|
+
@scanner.pos = @scanner.pos - 1 # go one character back, so we can reuse the "message line" method
|
243
|
+
add_result(key, message_line)
|
244
|
+
previous_multiline(key) # go on until we no longer hit a multiline line
|
245
|
+
end
|
246
|
+
rescue PoSyntaxError => pe
|
247
|
+
raise PoSyntaxError, "Syntax error in previous_multiline\n" + pe.message, pe.backtrace
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def message_multiline(key)
|
252
|
+
begin
|
253
|
+
skip_whitespace
|
254
|
+
if @scanner.check(/"/)
|
255
|
+
add_result(key, message_line)
|
256
|
+
message_multiline(key)
|
257
|
+
end
|
258
|
+
rescue PoSyntaxError => pe
|
259
|
+
raise PoSyntaxError, "Syntax error in message_multiline with key '#{key}'\n" + pe.message, pe.backtrace
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# identifies a message line and returns it's text or raises an error
|
264
|
+
#
|
265
|
+
# @return [String] message_text
|
266
|
+
def message_line
|
267
|
+
begin
|
268
|
+
if @scanner.getch == '"'
|
269
|
+
text = message_text
|
270
|
+
unless @scanner.getch == '"'
|
271
|
+
err_msg = "The message text '#{text}' must be finished with the double quote character '\"'."
|
272
|
+
raise PoSyntaxError, err_msg
|
273
|
+
end
|
274
|
+
skip_whitespace
|
275
|
+
unless end_of_line
|
276
|
+
err_msg = "There should be only whitespace until the end of line"
|
277
|
+
err_msg += "after the double quote character of a message text."
|
278
|
+
raise PoSyntaxError.new(err_msg)
|
279
|
+
end
|
280
|
+
text
|
281
|
+
else
|
282
|
+
@scanner.pos = @scanner.pos - 1
|
283
|
+
err_msg = "A message text needs to start with the double quote character '\"',"
|
284
|
+
err_msg += " but this was found: #{@scanner.peek(10).inspect}"
|
285
|
+
raise PoSyntaxError, err_msg
|
286
|
+
end
|
287
|
+
rescue PoSyntaxError => pe
|
288
|
+
raise PoSyntaxError, "Syntax error in message_line\n" + pe.message, pe.backtrace
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# used to parse all obsolete lines. An obsolete message may only contain obsolete entries
|
293
|
+
def obsoletes
|
294
|
+
if @scanner.scan(/#~/)
|
295
|
+
skip_whitespace
|
296
|
+
add_result(:obsolete, comment_text)
|
297
|
+
obsoletes
|
298
|
+
else
|
299
|
+
raise PoSyntaxError, "All lines must be obsolete after the first obsolete line, but got #{@scanner.peek(10).inspect}." unless @scanner.eos?
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
#########################################
|
304
|
+
### scanning ###
|
305
|
+
#########################################
|
306
|
+
|
307
|
+
# returns the text of a comment
|
308
|
+
#
|
309
|
+
# @return [String] text
|
310
|
+
def comment_text
|
311
|
+
begin
|
312
|
+
text = @scanner.scan(/.*/) # everything until newline
|
313
|
+
text.rstrip! # benchmarked faster too rstrip the string in place
|
314
|
+
raise PoSyntaxError, "Comment text should advance to next line or stop at eos" unless end_of_line
|
315
|
+
text
|
316
|
+
rescue PoSyntaxError => pe
|
317
|
+
raise PoSyntaxError, "Syntax error in commtent_text\n" + pe.message, pe.backtrace
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# returns the text of a message line
|
322
|
+
#
|
323
|
+
# @return [String] text
|
324
|
+
def message_text
|
325
|
+
@scanner.scan_until(/(\\(\\|")|[^"])*/) # this parses anything until an unescaped quote is hit
|
326
|
+
end
|
327
|
+
|
328
|
+
# advances the scanner until the next non whitespace position.
|
329
|
+
# Does not match newlines. See WHITESPACE_REGEX constant
|
330
|
+
def skip_whitespace
|
331
|
+
@scanner.skip(/\p{Blank}+/)
|
332
|
+
end
|
333
|
+
|
334
|
+
# returns true if the scanner is at beginning of next line or end of string
|
335
|
+
def end_of_line
|
336
|
+
@scanner.scan(/\n/)
|
337
|
+
@scanner.eos? || @scanner.bol?
|
338
|
+
end
|
339
|
+
|
340
|
+
# adds text to the given key in results
|
341
|
+
# creates an array if the given key already has a result
|
342
|
+
def add_result(key, text)
|
343
|
+
if @result[key]
|
344
|
+
@result[key].push(text)
|
345
|
+
else
|
346
|
+
@result[key] = [text]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SimplePoParser
|
2
|
+
# Split a PO file into single PO message entities (a message is seperated by two newline)
|
3
|
+
class Tokenizer
|
4
|
+
def initialize
|
5
|
+
@messages = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_file(path)
|
9
|
+
File.open(path, 'r').each_line("\n\n") do |block|
|
10
|
+
block.strip! # dont parse empty blocks
|
11
|
+
@messages << parse_block(block) if block != ''
|
12
|
+
end
|
13
|
+
@messages
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
def parse_block(block)
|
18
|
+
Parser.parse(block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'simple_po_parser/error'
|
2
|
+
require 'simple_po_parser/parser'
|
3
|
+
require 'simple_po_parser/tokenizer'
|
4
|
+
require 'simple_po_parser/version'
|
5
|
+
|
6
|
+
module SimplePoParser
|
7
|
+
class << self
|
8
|
+
# parse po file
|
9
|
+
#
|
10
|
+
# returns an array of po messages as hashes
|
11
|
+
def parse(path)
|
12
|
+
Tokenizer.new.parse_file(path)
|
13
|
+
end
|
14
|
+
|
15
|
+
# parses a single message.
|
16
|
+
def parse_message(message)
|
17
|
+
Parser.parse(message)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'simple_po_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "simple_po_parser"
|
8
|
+
spec.version = SimplePoParser::VERSION
|
9
|
+
spec.authors = ["Dennis-Florian Herr"]
|
10
|
+
spec.email = ["dennis.herr@experteer.com"]
|
11
|
+
spec.summary = %q{A simple PO file to ruby hash parser}
|
12
|
+
spec.description = %q{A simple PO file to ruby hash parser . PO files are translation files generated by GNU/Gettext tool.}
|
13
|
+
spec.homepage = "http://github.com/experteer/simple_po_parser"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^spec/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# Development deps
|
22
|
+
spec.add_development_dependency "bundler", ">= 0"
|
23
|
+
spec.add_development_dependency "rake", ">= 0"
|
24
|
+
# geminabox release
|
25
|
+
spec.add_development_dependency 'geminabox-release', "0.2.1"
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# translator-comment
|
2
|
+
#
|
3
|
+
#. extract
|
4
|
+
#: reference1
|
5
|
+
#: reference2
|
6
|
+
#, flag
|
7
|
+
#| msgctxt "previous context"
|
8
|
+
#| msgid ""
|
9
|
+
#| "multiline\n"
|
10
|
+
#|"previous messageid"
|
11
|
+
#| msgid_plural "previous msgid_plural"
|
12
|
+
msgctxt "Context"
|
13
|
+
msgid "msgid"
|
14
|
+
msgid_plural ""
|
15
|
+
"multiline msgid_plural\n"
|
16
|
+
""
|
17
|
+
msgstr[0] "msgstr 0"
|
18
|
+
msgstr[1] ""
|
19
|
+
"msgstr 1 multiline 1\n"
|
20
|
+
"msgstr 1 line 2\n"
|
21
|
+
msgstr[2] "msgstr 2"
|