simple_po_parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.rspec +3 -0
- data/.travis.yml +8 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +58 -0
- data/LICENSE.txt +22 -0
- data/README.md +30 -0
- data/Rakefile +68 -0
- data/lib/simple_po_parser/error.rb +13 -0
- data/lib/simple_po_parser/parser.rb +352 -0
- data/lib/simple_po_parser/tokenizer.rb +21 -0
- data/lib/simple_po_parser/version.rb +3 -0
- data/lib/simple_po_parser.rb +21 -0
- data/simple_po_parser.gemspec +26 -0
- data/spec/simple_po_parser/fixtures/complex_entry.po +21 -0
- data/spec/simple_po_parser/fixtures/header.po +7 -0
- data/spec/simple_po_parser/fixtures/simple_entry.po +6 -0
- data/spec/simple_po_parser/parser_spec.rb +49 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/utils/random_pofile_generator.rb +175 -0
- data/test/benchmark.po +683 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5af2707fffbad814707531a12d5c255cf0be4912
|
4
|
+
data.tar.gz: 1a0bc546529532459b6059dc3cf858815cf7f6b5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d7b3fe6a2abff7824a0fbb57f4671116370e6b7a84a83d8c871de8b2fdb75f832ecaf4b2f6fe09b6c5794732479685b4f3d130543be23f59a36614cd4e19eaca
|
7
|
+
data.tar.gz: 480e973695ec4c9ea10b51e7971306862df1b7fb7109ad7ac8fc70a03448d87e1a72b87921dc80f16ef5689cfba3013753959909c4b63ba0babfe9c306020558
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
simple_po_parser (0.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
awesome_print (1.7.0)
|
10
|
+
coveralls (0.8.16)
|
11
|
+
json (>= 1.8, < 3)
|
12
|
+
simplecov (~> 0.12.0)
|
13
|
+
term-ansicolor (~> 1.3.0)
|
14
|
+
thor (~> 0.19.1)
|
15
|
+
tins (>= 1.6.0, < 2)
|
16
|
+
diff-lcs (1.2.5)
|
17
|
+
docile (1.1.5)
|
18
|
+
geminabox-release (0.2.1)
|
19
|
+
bundler (>= 1.0.14)
|
20
|
+
json (2.0.2)
|
21
|
+
rake (12.0.0)
|
22
|
+
rspec (3.5.0)
|
23
|
+
rspec-core (~> 3.5.0)
|
24
|
+
rspec-expectations (~> 3.5.0)
|
25
|
+
rspec-mocks (~> 3.5.0)
|
26
|
+
rspec-core (3.5.4)
|
27
|
+
rspec-support (~> 3.5.0)
|
28
|
+
rspec-expectations (3.5.0)
|
29
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
30
|
+
rspec-support (~> 3.5.0)
|
31
|
+
rspec-mocks (3.5.0)
|
32
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
33
|
+
rspec-support (~> 3.5.0)
|
34
|
+
rspec-support (3.5.0)
|
35
|
+
simplecov (0.12.0)
|
36
|
+
docile (~> 1.1.0)
|
37
|
+
json (>= 1.8, < 3)
|
38
|
+
simplecov-html (~> 0.10.0)
|
39
|
+
simplecov-html (0.10.0)
|
40
|
+
term-ansicolor (1.3.2)
|
41
|
+
tins (~> 1.0)
|
42
|
+
thor (0.19.4)
|
43
|
+
tins (1.13.0)
|
44
|
+
|
45
|
+
PLATFORMS
|
46
|
+
ruby
|
47
|
+
|
48
|
+
DEPENDENCIES
|
49
|
+
awesome_print
|
50
|
+
bundler
|
51
|
+
coveralls
|
52
|
+
geminabox-release (= 0.2.1)
|
53
|
+
rake
|
54
|
+
rspec (~> 3.5.0)
|
55
|
+
simple_po_parser!
|
56
|
+
|
57
|
+
BUNDLED WITH
|
58
|
+
1.13.6
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2017 Dennis-Florian Herr @ Experteer GmbH
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# Simple Po Parser
|
2
|
+
|
3
|
+
This is a simple PO file to ruby hash parser, which complies with [GNU PO file specification](https://www.gnu.org/software/gettext/manual/html_node/PO-Files.html). Tested with the msgcat (GNU gettext-tools) 0.18.3 tool.
|
4
|
+
|
5
|
+
## Hash format
|
6
|
+
|
7
|
+
A PO message is parsed into a hash with meaningful keys for each type of line.
|
8
|
+
The values are always arrays of strings.
|
9
|
+
Each string is representing one line of content in the PO file.
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
{
|
13
|
+
:translator_comment => [""],
|
14
|
+
:extracted_comment => [""],
|
15
|
+
:reference => [""],
|
16
|
+
:flag => [""],
|
17
|
+
:previous_msgctxt => [""], # msgctxt of the message used for the fuzzy translation
|
18
|
+
:previous_msgid => [""], # msgid of the messaged used for the fuzzy translation
|
19
|
+
:previous_msgid_plural => [""],
|
20
|
+
:msgctxt => [""],
|
21
|
+
:msgid => [""],
|
22
|
+
:msgid_plural => [""],
|
23
|
+
:msgstr => [""], # for singular messages
|
24
|
+
"msgstr[N]" => [""] # for plural messages, there N is the plural number starting from 0
|
25
|
+
}
|
26
|
+
```
|
27
|
+
|
28
|
+
### License
|
29
|
+
|
30
|
+
License: [MIT](LICENSE.txt) - Copyright (c) 2017 Dennis-Florian Herr @Experteer GmbH
|
data/Rakefile
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
RSpec::Core::RakeTask.new(:spec)
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
require 'geminabox-release'
|
9
|
+
GeminaboxRelease.patch(:use_config => true, :remove_release => false)
|
10
|
+
|
11
|
+
desc "Generate a random po file to \"test/benchmark.po\". Takes optional rake args for number of entries"
|
12
|
+
task 'generate_random_pofile', :messages, :obsoletes do |t, args|
|
13
|
+
args.with_defaults(:messages => "200", :obsoletes => "10")
|
14
|
+
require_relative 'spec/utils/random_pofile_generator'
|
15
|
+
PoParser::RandomPoFileGenerator.generate_file(
|
16
|
+
File.expand_path("test/benchmark.po", __dir__), args[:messages].to_i, args[:obsoletes].to_i
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
namespace :parser do
|
21
|
+
require 'benchmark'
|
22
|
+
require 'simple_po_parser'
|
23
|
+
|
24
|
+
desc "Benchmark of 10 full PoParser runs of test/benchmark.po"
|
25
|
+
task "benchmark" do
|
26
|
+
pofile = File.expand_path("test/benchmark.po", __dir__)
|
27
|
+
Benchmark.bmbm do |x|
|
28
|
+
x.report("Parser:") {10.times { SimplePoParser.parse(pofile) }}
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
desc "Generate 5 random PO files with 100 to 500 messages and benchmark each full PoParser run"
|
33
|
+
task 'five_random_po_full' do
|
34
|
+
include Benchmark
|
35
|
+
require_relative 'spec/utils/random_pofile_generator'
|
36
|
+
pofile = File.expand_path("test/benchmark.po.tmp", __dir__)
|
37
|
+
Benchmark.benchmark(CAPTION, 6, FORMAT, "total:") do |x|
|
38
|
+
total = nil
|
39
|
+
total_length = 0
|
40
|
+
for i in 0..5 do
|
41
|
+
length = (Random.new.rand * 400.0 + 100).to_i
|
42
|
+
total_length += length
|
43
|
+
puts "Benchmarking file of length #{length}"
|
44
|
+
SimplePoParser::RandomPoFileGenerator.generate_file(pofile, length)
|
45
|
+
t = x.report("try#{i}:") {SimplePoParser.parse(pofile)}
|
46
|
+
File.unlink(pofile)
|
47
|
+
total = total ? total+t : t
|
48
|
+
end
|
49
|
+
puts "Total message length #{total_length}"
|
50
|
+
[total]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
desc "Show ruby-prof profiler for spec/fixtures/complex_entry.po"
|
55
|
+
task "profile_parser" do
|
56
|
+
require 'ruby-prof'
|
57
|
+
RubyProf.start
|
58
|
+
po_message = File.read(File.expand_path("spec/simple_po_parser/fixtures/complex_entry.po", __dir__))
|
59
|
+
SimplePoParser.parse_message(po_message)
|
60
|
+
result = RubyProf.stop
|
61
|
+
|
62
|
+
printer = RubyProf::FlatPrinter.new(result)
|
63
|
+
printer.print(STDOUT)
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
module SimplePoParser
|
2
|
+
# FastParser directly using Rubys powerful StringScanner (strscan)
|
3
|
+
#
|
4
|
+
# Important notes about StringScanner.scan:
|
5
|
+
# * scan will return nil if there is no match. Using the regex * (zero or more) quantifier will
|
6
|
+
# let scan return an empty string if there is "no match" as the empty string qualifies as
|
7
|
+
# a match of the regex (zero times). We make use of this "trick"
|
8
|
+
# * the start of line anchor ^ is obsolete as scan will only match start of line.
|
9
|
+
# * rubys regex is by default in single-line mode, therefore scan will only match until
|
10
|
+
# the next newline is hit (unless multi-line mode is explicitly enabled)
|
11
|
+
module Parser
|
12
|
+
require_relative 'error'
|
13
|
+
require 'strscan'
|
14
|
+
extend self
|
15
|
+
|
16
|
+
# parse a single message of the PO format.
|
17
|
+
#
|
18
|
+
# @param message a single PO message in String format without leading or trailing whitespace
|
19
|
+
# @return [Hash] parsed PO message information in Hash format
|
20
|
+
def parse(message)
|
21
|
+
@result = {}
|
22
|
+
@scanner = StringScanner.new(message.strip)
|
23
|
+
begin
|
24
|
+
lines
|
25
|
+
rescue ParserError => pe
|
26
|
+
puts "Parsing error!"
|
27
|
+
puts "#{pe.message}"
|
28
|
+
puts "Backtrace:\n#{pe.backtrace.select{|i| i =~ /lib\/poparser/}.join("\n")}"
|
29
|
+
puts "\nResult up to error: '#{@result}'"
|
30
|
+
exit
|
31
|
+
end
|
32
|
+
@result
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
#########################################
|
38
|
+
### branching ###
|
39
|
+
#########################################
|
40
|
+
|
41
|
+
# arbitary line of a PO message. Can be comment or message
|
42
|
+
# message parsing is always started with checking for msgctxt as content is expected in
|
43
|
+
# msgctxt -> msgid -> msgid_plural -> msgstr order
|
44
|
+
def lines
|
45
|
+
begin
|
46
|
+
if @scanner.scan(/#/)
|
47
|
+
comment
|
48
|
+
else
|
49
|
+
msgctxt
|
50
|
+
end
|
51
|
+
rescue PoSyntaxError => pe
|
52
|
+
# throw a normal ParserError to break the recursion
|
53
|
+
raise ParserError, "Syntax error in lines\n" + pe.message, pe.backtrace
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# match a comment line. called on lines starting with '#'.
|
58
|
+
# Recalls line when the comment line was parsed
|
59
|
+
def comment
|
60
|
+
begin
|
61
|
+
case @scanner.getch
|
62
|
+
when ' '
|
63
|
+
skip_whitespace
|
64
|
+
add_result(:translator_comment, comment_text)
|
65
|
+
lines
|
66
|
+
when '.'
|
67
|
+
skip_whitespace
|
68
|
+
add_result(:extracted_comment, comment_text)
|
69
|
+
lines
|
70
|
+
when ':'
|
71
|
+
skip_whitespace
|
72
|
+
add_result(:reference, comment_text)
|
73
|
+
lines
|
74
|
+
when ','
|
75
|
+
skip_whitespace
|
76
|
+
add_result(:flag, comment_text)
|
77
|
+
lines
|
78
|
+
when '|'
|
79
|
+
skip_whitespace
|
80
|
+
previous_comments
|
81
|
+
lines
|
82
|
+
when "\n"
|
83
|
+
add_result(:translator_comment, "") # empty comment line
|
84
|
+
lines
|
85
|
+
when '~'
|
86
|
+
if @result[:previous_msgctxt] || @result[:previous_msgid] || @result[:previous_msgid_plural]
|
87
|
+
raise PoSyntaxError, "Previous comment entries need to be marked obsolete too in obsolete message entries. But already got: #{@result}"
|
88
|
+
end
|
89
|
+
skip_whitespace
|
90
|
+
add_result(:obsolete, comment_text)
|
91
|
+
obsoletes
|
92
|
+
else
|
93
|
+
@scanner.pos = @scanner.pos - 2
|
94
|
+
raise PoSyntaxError, "Unknown comment type #{@scanner.peek(10).inspect}"
|
95
|
+
end
|
96
|
+
rescue PoSyntaxError => pe
|
97
|
+
raise PoSyntaxError, "Syntax error in comment\n" + pe.message, pe.backtrace
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# matches the msgctxt line and will continue to check for msgid afterwards
|
102
|
+
#
|
103
|
+
# msgctxt is optional
|
104
|
+
def msgctxt
|
105
|
+
begin
|
106
|
+
if @scanner.scan(/msgctxt/)
|
107
|
+
skip_whitespace
|
108
|
+
text = message_line
|
109
|
+
add_result(:msgctxt, text)
|
110
|
+
message_multiline(:msgctxt) if text.empty?
|
111
|
+
end
|
112
|
+
msgid
|
113
|
+
rescue PoSyntaxError => pe
|
114
|
+
raise PoSyntaxError, "Syntax error in msgctxt\n" + pe.message, pe.backtrace
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
# matches the msgid line. Will check for optional msgid_plural.
|
119
|
+
# Will advance to msgstr or msgstr_plural based on msgid_plural
|
120
|
+
#
|
121
|
+
# msgid is required
|
122
|
+
def msgid
|
123
|
+
begin
|
124
|
+
if @scanner.scan(/msgid/)
|
125
|
+
skip_whitespace
|
126
|
+
text = message_line
|
127
|
+
add_result(:msgid, text)
|
128
|
+
message_multiline(:msgid) if text.empty?
|
129
|
+
if msgid_plural
|
130
|
+
msgstr_plural
|
131
|
+
else
|
132
|
+
msgstr
|
133
|
+
end
|
134
|
+
else
|
135
|
+
err_msg = "Message without msgid is not allowed."
|
136
|
+
err_msg += "The Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
137
|
+
raise PoSyntaxError, err_msg
|
138
|
+
end
|
139
|
+
rescue PoSyntaxError => pe
|
140
|
+
raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
|
141
|
+
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
# matches the msgid_plural line.
|
146
|
+
#
|
147
|
+
# msgid_plural is optional
|
148
|
+
#
|
149
|
+
# @return [boolean] true if msgid_plural is present, false otherwise
|
150
|
+
def msgid_plural
|
151
|
+
begin
|
152
|
+
if @scanner.scan(/msgid_plural/)
|
153
|
+
skip_whitespace
|
154
|
+
text = message_line
|
155
|
+
add_result(:msgid_plural, text)
|
156
|
+
message_multiline(:msgid_plural) if text.empty?
|
157
|
+
true
|
158
|
+
else
|
159
|
+
false
|
160
|
+
end
|
161
|
+
rescue PoSyntaxError => pe
|
162
|
+
raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# matches the msgstr singular line
|
167
|
+
#
|
168
|
+
# msgstr is required in singular translations
|
169
|
+
def msgstr
|
170
|
+
begin
|
171
|
+
if @scanner.scan(/msgstr/)
|
172
|
+
skip_whitespace
|
173
|
+
text = message_line
|
174
|
+
add_result(:msgstr, text)
|
175
|
+
message_multiline(:msgstr) if text.empty?
|
176
|
+
skip_whitespace
|
177
|
+
raise PoSyntaxError, "Unexpected content after expected message end #{@scanner.peek(10).inspect}" unless @scanner.eos?
|
178
|
+
else
|
179
|
+
raise PoSyntaxError, "Singular message without msgstr is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
180
|
+
end
|
181
|
+
rescue PoSyntaxError => pe
|
182
|
+
raise PoSyntaxError, "Syntax error in msgstr\n" + pe.message, pe.backtrace
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
def msgstr_plural(num = 0)
|
188
|
+
begin
|
189
|
+
msgstr_key = @scanner.scan(/msgstr\[\d\]/) # matches 'msgstr[0]' to 'msgstr[9]'
|
190
|
+
if msgstr_key
|
191
|
+
# msgstr plurals must come in 0-based index in order
|
192
|
+
msgstr_num = msgstr_key.match(/\d/)[0].to_i
|
193
|
+
raise PoSyntaxError, "Bad 'msgstr[index]' index." if msgstr_num != num
|
194
|
+
skip_whitespace
|
195
|
+
text = message_line
|
196
|
+
add_result(msgstr_key, text)
|
197
|
+
message_multiline(msgstr_key) if text.empty?
|
198
|
+
msgstr_plural(num+1)
|
199
|
+
elsif num == 0 # and msgstr_key was false
|
200
|
+
raise PoSyntaxError, "Plural message without msgstr[0] is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
|
201
|
+
else
|
202
|
+
raise PoSyntaxError, "End of message was expected, but line started unexpectedly with #{@scanner.peek(10).inspect}" unless @scanner.eos?
|
203
|
+
end
|
204
|
+
rescue PoSyntaxError => pe
|
205
|
+
raise PoSyntaxError, "Syntax error in msgstr_plural\n" + pe.message, pe.backtrace
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def previous_comments
|
210
|
+
begin
|
211
|
+
# next part must be msgctxt, msgid or msgid_plural
|
212
|
+
if @scanner.scan(/msg/)
|
213
|
+
if @scanner.scan(/id/)
|
214
|
+
if @scanner.scan(/_plural/)
|
215
|
+
key = :previous_msgid_plural
|
216
|
+
else
|
217
|
+
key = :previous_msgid
|
218
|
+
end
|
219
|
+
elsif @scanner.scan(/ctxt/)
|
220
|
+
key = :previous_msgctxt
|
221
|
+
else
|
222
|
+
raise PoSyntaxError, "Previous comment type #{("msg" + @scanner.peek(10)).inspect} unknown."
|
223
|
+
end
|
224
|
+
skip_whitespace
|
225
|
+
text = message_line
|
226
|
+
add_result(key, text)
|
227
|
+
previous_multiline(key) if text.empty?
|
228
|
+
else
|
229
|
+
raise PoSyntaxError, "Previous comments must start with '#| msg'. #{@scanner.peek(10).inspect} unknown."
|
230
|
+
end
|
231
|
+
rescue PoSyntaxError => pe
|
232
|
+
raise PoSyntaxError, "Syntax error in previous_comments\n" + pe.message, pe.backtrace
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
def previous_multiline(key)
|
237
|
+
begin
|
238
|
+
# scan multilines until no further multiline is hit
|
239
|
+
# /#\|\p{Blank}"/ needs to catch the double quote to ensure it hits a previous
|
240
|
+
# multiline and not another line type.
|
241
|
+
if @scanner.scan(/#\|\p{Blank}*"/)
|
242
|
+
@scanner.pos = @scanner.pos - 1 # go one character back, so we can reuse the "message line" method
|
243
|
+
add_result(key, message_line)
|
244
|
+
previous_multiline(key) # go on until we no longer hit a multiline line
|
245
|
+
end
|
246
|
+
rescue PoSyntaxError => pe
|
247
|
+
raise PoSyntaxError, "Syntax error in previous_multiline\n" + pe.message, pe.backtrace
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def message_multiline(key)
|
252
|
+
begin
|
253
|
+
skip_whitespace
|
254
|
+
if @scanner.check(/"/)
|
255
|
+
add_result(key, message_line)
|
256
|
+
message_multiline(key)
|
257
|
+
end
|
258
|
+
rescue PoSyntaxError => pe
|
259
|
+
raise PoSyntaxError, "Syntax error in message_multiline with key '#{key}'\n" + pe.message, pe.backtrace
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
# identifies a message line and returns it's text or raises an error
|
264
|
+
#
|
265
|
+
# @return [String] message_text
|
266
|
+
def message_line
|
267
|
+
begin
|
268
|
+
if @scanner.getch == '"'
|
269
|
+
text = message_text
|
270
|
+
unless @scanner.getch == '"'
|
271
|
+
err_msg = "The message text '#{text}' must be finished with the double quote character '\"'."
|
272
|
+
raise PoSyntaxError, err_msg
|
273
|
+
end
|
274
|
+
skip_whitespace
|
275
|
+
unless end_of_line
|
276
|
+
err_msg = "There should be only whitespace until the end of line"
|
277
|
+
err_msg += "after the double quote character of a message text."
|
278
|
+
raise PoSyntaxError.new(err_msg)
|
279
|
+
end
|
280
|
+
text
|
281
|
+
else
|
282
|
+
@scanner.pos = @scanner.pos - 1
|
283
|
+
err_msg = "A message text needs to start with the double quote character '\"',"
|
284
|
+
err_msg += " but this was found: #{@scanner.peek(10).inspect}"
|
285
|
+
raise PoSyntaxError, err_msg
|
286
|
+
end
|
287
|
+
rescue PoSyntaxError => pe
|
288
|
+
raise PoSyntaxError, "Syntax error in message_line\n" + pe.message, pe.backtrace
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
# used to parse all obsolete lines. An obsolete message may only contain obsolete entries
|
293
|
+
def obsoletes
|
294
|
+
if @scanner.scan(/#~/)
|
295
|
+
skip_whitespace
|
296
|
+
add_result(:obsolete, comment_text)
|
297
|
+
obsoletes
|
298
|
+
else
|
299
|
+
raise PoSyntaxError, "All lines must be obsolete after the first obsolete line, but got #{@scanner.peek(10).inspect}." unless @scanner.eos?
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
#########################################
|
304
|
+
### scanning ###
|
305
|
+
#########################################
|
306
|
+
|
307
|
+
# returns the text of a comment
|
308
|
+
#
|
309
|
+
# @return [String] text
|
310
|
+
def comment_text
|
311
|
+
begin
|
312
|
+
text = @scanner.scan(/.*/) # everything until newline
|
313
|
+
text.rstrip! # benchmarked faster too rstrip the string in place
|
314
|
+
raise PoSyntaxError, "Comment text should advance to next line or stop at eos" unless end_of_line
|
315
|
+
text
|
316
|
+
rescue PoSyntaxError => pe
|
317
|
+
raise PoSyntaxError, "Syntax error in commtent_text\n" + pe.message, pe.backtrace
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# returns the text of a message line
|
322
|
+
#
|
323
|
+
# @return [String] text
|
324
|
+
def message_text
|
325
|
+
@scanner.scan_until(/(\\(\\|")|[^"])*/) # this parses anything until an unescaped quote is hit
|
326
|
+
end
|
327
|
+
|
328
|
+
# advances the scanner until the next non whitespace position.
|
329
|
+
# Does not match newlines. See WHITESPACE_REGEX constant
|
330
|
+
def skip_whitespace
|
331
|
+
@scanner.skip(/\p{Blank}+/)
|
332
|
+
end
|
333
|
+
|
334
|
+
# returns true if the scanner is at beginning of next line or end of string
|
335
|
+
def end_of_line
|
336
|
+
@scanner.scan(/\n/)
|
337
|
+
@scanner.eos? || @scanner.bol?
|
338
|
+
end
|
339
|
+
|
340
|
+
# adds text to the given key in results
|
341
|
+
# creates an array if the given key already has a result
|
342
|
+
def add_result(key, text)
|
343
|
+
if @result[key]
|
344
|
+
@result[key].push(text)
|
345
|
+
else
|
346
|
+
@result[key] = [text]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module SimplePoParser
|
2
|
+
# Split a PO file into single PO message entities (a message is seperated by two newline)
|
3
|
+
class Tokenizer
|
4
|
+
def initialize
|
5
|
+
@messages = []
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse_file(path)
|
9
|
+
File.open(path, 'r').each_line("\n\n") do |block|
|
10
|
+
block.strip! # dont parse empty blocks
|
11
|
+
@messages << parse_block(block) if block != ''
|
12
|
+
end
|
13
|
+
@messages
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
def parse_block(block)
|
18
|
+
Parser.parse(block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'simple_po_parser/error'
|
2
|
+
require 'simple_po_parser/parser'
|
3
|
+
require 'simple_po_parser/tokenizer'
|
4
|
+
require 'simple_po_parser/version'
|
5
|
+
|
6
|
+
module SimplePoParser
|
7
|
+
class << self
|
8
|
+
# parse po file
|
9
|
+
#
|
10
|
+
# returns an array of po messages as hashes
|
11
|
+
def parse(path)
|
12
|
+
Tokenizer.new.parse_file(path)
|
13
|
+
end
|
14
|
+
|
15
|
+
# parses a single message.
|
16
|
+
def parse_message(message)
|
17
|
+
Parser.parse(message)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'simple_po_parser/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "simple_po_parser"
|
8
|
+
spec.version = SimplePoParser::VERSION
|
9
|
+
spec.authors = ["Dennis-Florian Herr"]
|
10
|
+
spec.email = ["dennis.herr@experteer.com"]
|
11
|
+
spec.summary = %q{A simple PO file to ruby hash parser}
|
12
|
+
spec.description = %q{A simple PO file to ruby hash parser . PO files are translation files generated by GNU/Gettext tool.}
|
13
|
+
spec.homepage = "http://github.com/experteer/simple_po_parser"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^spec/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# Development deps
|
22
|
+
spec.add_development_dependency "bundler", ">= 0"
|
23
|
+
spec.add_development_dependency "rake", ">= 0"
|
24
|
+
# geminabox release
|
25
|
+
spec.add_development_dependency 'geminabox-release', "0.2.1"
|
26
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# translator-comment
|
2
|
+
#
|
3
|
+
#. extract
|
4
|
+
#: reference1
|
5
|
+
#: reference2
|
6
|
+
#, flag
|
7
|
+
#| msgctxt "previous context"
|
8
|
+
#| msgid ""
|
9
|
+
#| "multiline\n"
|
10
|
+
#|"previous messageid"
|
11
|
+
#| msgid_plural "previous msgid_plural"
|
12
|
+
msgctxt "Context"
|
13
|
+
msgid "msgid"
|
14
|
+
msgid_plural ""
|
15
|
+
"multiline msgid_plural\n"
|
16
|
+
""
|
17
|
+
msgstr[0] "msgstr 0"
|
18
|
+
msgstr[1] ""
|
19
|
+
"msgstr 1 multiline 1\n"
|
20
|
+
"msgstr 1 line 2\n"
|
21
|
+
msgstr[2] "msgstr 2"
|