tmx_importer 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/tmx_importer/version.rb +1 -1
- data/lib/tmx_importer.rb +14 -10
- data/spec/test_sample_files/strange_encoding.tmx +0 -0
- data/spec/tmx_importer_spec.rb +12 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9171f015c2f0e45c3772d64d0f98e53949abf89a
|
4
|
+
data.tar.gz: f227573b58faf7b215695d9a72ff12df051bd6e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b9dd9c281a0453d994244f87aeaf466c46aa98336a3bae7a598344df23bb01b66779d235f305533e421f04b1a6ca1a695745a28b7311abb18ed3563b724478a8
|
7
|
+
data.tar.gz: 8d61b7d51f8b6ed56c21b68639f52aaa3c3f9ed1e6ee158cf1ecfed7669c77cdafb65945e718d36228545d281e9b385ed416639e0413ecfee258ed2922ee43b1
|
data/lib/tmx_importer/version.rb
CHANGED
data/lib/tmx_importer.rb
CHANGED
@@ -3,9 +3,6 @@ require 'xml'
|
|
3
3
|
require 'open-uri'
|
4
4
|
require 'pretty_strings'
|
5
5
|
|
6
|
-
Encoding.default_internal = Encoding::UTF_8
|
7
|
-
Encoding.default_external = Encoding::UTF_8
|
8
|
-
|
9
6
|
module TmxImporter
|
10
7
|
class Tmx
|
11
8
|
attr_reader :file_path, :encoding
|
@@ -18,6 +15,12 @@ module TmxImporter
|
|
18
15
|
seg: { lang: "", counter: 0, vals: [], role: "" },
|
19
16
|
language_pairs: []
|
20
17
|
}
|
18
|
+
@src_regex = Regexp.new('(?<=srclang=\S)\S+(?=")|(?=\')'.encode(@encoding))
|
19
|
+
@src_string = 'srclang='.encode(@encoding).freeze
|
20
|
+
@tu_regex = Regexp.new('<\/tu>'.encode(@encoding))
|
21
|
+
@seg_regex = Regexp.new('<\/seg>'.encode(@encoding))
|
22
|
+
@lang_regex = Regexp.new('(?<=[^cn]lang=\S)\S+(?=")|(?=\')'.encode(@encoding))
|
23
|
+
@lang_string = 'lang'.encode(@encoding).freeze
|
21
24
|
raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
|
22
25
|
end
|
23
26
|
|
@@ -43,11 +46,12 @@ module TmxImporter
|
|
43
46
|
end
|
44
47
|
|
45
48
|
def analyze_line(line)
|
46
|
-
@doc[:source_language] = line.scan(
|
47
|
-
@doc[:tu][:counter] += line.scan(
|
48
|
-
@doc[:seg][:counter] += line.scan(
|
49
|
-
if line.include?(
|
50
|
-
@doc[:seg][:lang] = line.scan(
|
49
|
+
@doc[:source_language] = line.scan(@src_regex)[0].encode('UTF-8') if line.include?(@src_string)
|
50
|
+
@doc[:tu][:counter] += line.scan(@tu_regex).count
|
51
|
+
@doc[:seg][:counter] += line.scan(@seg_regex).count
|
52
|
+
if line.include?(@lang_string)
|
53
|
+
@doc[:seg][:lang] = line.scan(@lang_regex)[0]
|
54
|
+
@doc[:seg][:lang] = @doc[:seg][:lang].encode('UTF-8') unless @doc[:seg][:lang].nil?
|
51
55
|
write_language_pair
|
52
56
|
end
|
53
57
|
end
|
@@ -105,7 +109,7 @@ module TmxImporter
|
|
105
109
|
if @doc[:seg][:lang] != @doc[:source_language] &&
|
106
110
|
@doc[:seg][:lang].split('-')[0].downcase != @doc[:source_language].split('-')[0].downcase &&
|
107
111
|
@doc[:source_language] != '*all*'
|
108
|
-
@doc[:language_pairs] << [@doc[:source_language], @doc[:seg][:lang]]
|
112
|
+
@doc[:language_pairs] << [@doc[:source_language].force_encoding("UTF-8"), @doc[:seg][:lang].force_encoding("UTF-8")]
|
109
113
|
@doc[:seg][:role] = 'source'
|
110
114
|
elsif @doc[:source_language] == '*all*'
|
111
115
|
@doc[:source_language] = @doc[:seg][:lang]
|
@@ -117,7 +121,7 @@ module TmxImporter
|
|
117
121
|
|
118
122
|
def write_tu(reader)
|
119
123
|
@doc[:tu][:lang] = reader.get_attribute("srclang")
|
120
|
-
@doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate")).to_s
|
124
|
+
@doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate").force_encoding('UTF-8')).to_s
|
121
125
|
@doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
|
122
126
|
end
|
123
127
|
|
Binary file
|
data/spec/tmx_importer_spec.rb
CHANGED
@@ -67,6 +67,12 @@ describe TmxImporter do
|
|
67
67
|
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
|
68
68
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
69
69
|
end
|
70
|
+
|
71
|
+
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
72
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
73
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
|
74
|
+
expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
|
75
|
+
end
|
70
76
|
end
|
71
77
|
|
72
78
|
describe '#import' do
|
@@ -117,5 +123,11 @@ describe TmxImporter do
|
|
117
123
|
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
|
118
124
|
expect(tmx.import[1][2][1]).to eq("target")
|
119
125
|
end
|
126
|
+
|
127
|
+
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
128
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
129
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
|
130
|
+
expect(tmx.import[1][1][3]).to eq("en")
|
131
|
+
end
|
120
132
|
end
|
121
133
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tmx_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- spec/test_sample_files/multiple_language_pairs.tmx
|
104
104
|
- spec/test_sample_files/out_of_order_segments.tmx
|
105
105
|
- spec/test_sample_files/srclang_all.tmx
|
106
|
+
- spec/test_sample_files/strange_encoding.tmx
|
106
107
|
- spec/test_sample_files/test_tm(utf-16LE BOM).tmx
|
107
108
|
- spec/test_sample_files/test_tm(utf-16LE).tmx
|
108
109
|
- spec/test_sample_files/test_tm(utf-8).tmx
|
@@ -140,6 +141,7 @@ test_files:
|
|
140
141
|
- spec/test_sample_files/multiple_language_pairs.tmx
|
141
142
|
- spec/test_sample_files/out_of_order_segments.tmx
|
142
143
|
- spec/test_sample_files/srclang_all.tmx
|
144
|
+
- spec/test_sample_files/strange_encoding.tmx
|
143
145
|
- spec/test_sample_files/test_tm(utf-16LE BOM).tmx
|
144
146
|
- spec/test_sample_files/test_tm(utf-16LE).tmx
|
145
147
|
- spec/test_sample_files/test_tm(utf-8).tmx
|