tmx_importer 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/tmx_importer/version.rb +1 -1
- data/lib/tmx_importer.rb +14 -10
- data/spec/test_sample_files/strange_encoding.tmx +0 -0
- data/spec/tmx_importer_spec.rb +12 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9171f015c2f0e45c3772d64d0f98e53949abf89a
|
4
|
+
data.tar.gz: f227573b58faf7b215695d9a72ff12df051bd6e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9dd9c281a0453d994244f87aeaf466c46aa98336a3bae7a598344df23bb01b66779d235f305533e421f04b1a6ca1a695745a28b7311abb18ed3563b724478a8
|
7
|
+
data.tar.gz: 8d61b7d51f8b6ed56c21b68639f52aaa3c3f9ed1e6ee158cf1ecfed7669c77cdafb65945e718d36228545d281e9b385ed416639e0413ecfee258ed2922ee43b1
|
data/lib/tmx_importer/version.rb
CHANGED
data/lib/tmx_importer.rb
CHANGED
@@ -3,9 +3,6 @@ require 'xml'
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'pretty_strings'
|
5
5
|
|
6
|
-
Encoding.default_internal = Encoding::UTF_8
|
7
|
-
Encoding.default_external = Encoding::UTF_8
|
8
|
-
|
9
6
|
module TmxImporter
|
10
7
|
class Tmx
|
11
8
|
attr_reader :file_path, :encoding
|
@@ -18,6 +15,12 @@ module TmxImporter
|
|
18
15
|
seg: { lang: "", counter: 0, vals: [], role: "" },
|
19
16
|
language_pairs: []
|
20
17
|
}
|
18
|
+
@src_regex = Regexp.new('(?<=srclang=\S)\S+(?=")|(?=\')'.encode(@encoding))
|
19
|
+
@src_string = 'srclang='.encode(@encoding).freeze
|
20
|
+
@tu_regex = Regexp.new('<\/tu>'.encode(@encoding))
|
21
|
+
@seg_regex = Regexp.new('<\/seg>'.encode(@encoding))
|
22
|
+
@lang_regex = Regexp.new('(?<=[^cn]lang=\S)\S+(?=")|(?=\')'.encode(@encoding))
|
23
|
+
@lang_string = 'lang'.encode(@encoding).freeze
|
21
24
|
raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
|
22
25
|
end
|
23
26
|
|
@@ -43,11 +46,12 @@ module TmxImporter
|
|
43
46
|
end
|
44
47
|
|
45
48
|
def analyze_line(line)
|
46
|
-
@doc[:source_language] = line.scan(
|
47
|
-
@doc[:tu][:counter] += line.scan(
|
48
|
-
@doc[:seg][:counter] += line.scan(
|
49
|
-
if line.include?(
|
50
|
-
@doc[:seg][:lang] = line.scan(
|
49
|
+
@doc[:source_language] = line.scan(@src_regex)[0].encode('UTF-8') if line.include?(@src_string)
|
50
|
+
@doc[:tu][:counter] += line.scan(@tu_regex).count
|
51
|
+
@doc[:seg][:counter] += line.scan(@seg_regex).count
|
52
|
+
if line.include?(@lang_string)
|
53
|
+
@doc[:seg][:lang] = line.scan(@lang_regex)[0]
|
54
|
+
@doc[:seg][:lang] = @doc[:seg][:lang].encode('UTF-8') unless @doc[:seg][:lang].nil?
|
51
55
|
write_language_pair
|
52
56
|
end
|
53
57
|
end
|
@@ -105,7 +109,7 @@ module TmxImporter
|
|
105
109
|
if @doc[:seg][:lang] != @doc[:source_language] &&
|
106
110
|
@doc[:seg][:lang].split('-')[0].downcase != @doc[:source_language].split('-')[0].downcase &&
|
107
111
|
@doc[:source_language] != '*all*'
|
108
|
-
@doc[:language_pairs] << [@doc[:source_language], @doc[:seg][:lang]]
|
112
|
+
@doc[:language_pairs] << [@doc[:source_language].force_encoding("UTF-8"), @doc[:seg][:lang].force_encoding("UTF-8")]
|
109
113
|
@doc[:seg][:role] = 'source'
|
110
114
|
elsif @doc[:source_language] == '*all*'
|
111
115
|
@doc[:source_language] = @doc[:seg][:lang]
|
@@ -117,7 +121,7 @@ module TmxImporter
|
|
117
121
|
|
118
122
|
def write_tu(reader)
|
119
123
|
@doc[:tu][:lang] = reader.get_attribute("srclang")
|
120
|
-
@doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate")).to_s
|
124
|
+
@doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate").force_encoding('UTF-8')).to_s
|
121
125
|
@doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
|
122
126
|
end
|
123
127
|
|
Binary file
|
data/spec/tmx_importer_spec.rb
CHANGED
@@ -67,6 +67,12 @@ describe TmxImporter do
|
|
67
67
|
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
|
68
68
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
69
69
|
end
|
70
|
+
|
71
|
+
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
72
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
73
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
|
74
|
+
expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
|
75
|
+
end
|
70
76
|
end
|
71
77
|
|
72
78
|
describe '#import' do
|
@@ -117,5 +123,11 @@ describe TmxImporter do
|
|
117
123
|
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
|
118
124
|
expect(tmx.import[1][2][1]).to eq("target")
|
119
125
|
end
|
126
|
+
|
127
|
+
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
128
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
129
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
|
130
|
+
expect(tmx.import[1][1][3]).to eq("en")
|
131
|
+
end
|
120
132
|
end
|
121
133
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tmx_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- spec/test_sample_files/multiple_language_pairs.tmx
|
104
104
|
- spec/test_sample_files/out_of_order_segments.tmx
|
105
105
|
- spec/test_sample_files/srclang_all.tmx
|
106
|
+
- spec/test_sample_files/strange_encoding.tmx
|
106
107
|
- spec/test_sample_files/test_tm(utf-16LE BOM).tmx
|
107
108
|
- spec/test_sample_files/test_tm(utf-16LE).tmx
|
108
109
|
- spec/test_sample_files/test_tm(utf-8).tmx
|
@@ -140,6 +141,7 @@ test_files:
|
|
140
141
|
- spec/test_sample_files/multiple_language_pairs.tmx
|
141
142
|
- spec/test_sample_files/out_of_order_segments.tmx
|
142
143
|
- spec/test_sample_files/srclang_all.tmx
|
144
|
+
- spec/test_sample_files/strange_encoding.tmx
|
143
145
|
- spec/test_sample_files/test_tm(utf-16LE BOM).tmx
|
144
146
|
- spec/test_sample_files/test_tm(utf-16LE).tmx
|
145
147
|
- spec/test_sample_files/test_tm(utf-8).tmx
|