tmx_importer 0.5.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/lib/tmx_importer.rb +8 -3
- data/lib/tmx_importer/version.rb +1 -1
- data/spec/tmx_importer_spec.rb +23 -23
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bea8f8bb85029d0b336fdf89adecdcce179fc7df
|
4
|
+
data.tar.gz: c005d43b81b20c87a88b15f0814f63550655240c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 464e050de392148bd8348bc7ce23f5834f9f112cad6deedf971ba1f8a8095e2f9adac1d8ff670f9031546696e54296cac5213f9ac3e8faf90691dcf0fb373c71
|
7
|
+
data.tar.gz: 180830aae81b47dcff661e1b90435558226457c177ca1a10a691ccbd8d6d08f45f0e8705d1154524975dc2de607a4c6d0dc7e0829b1e7bb3aed1155291bed43e
|
data/README.md
CHANGED
@@ -23,8 +23,9 @@ gem 'tmx_importer'
|
|
23
23
|
|
24
24
|
```ruby
|
25
25
|
# Get the high level stats of a TMX file
|
26
|
+
# Including the encoding is optional. If not included the gem will attempt to detect the encoding.
|
26
27
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
27
|
-
TmxImporter::Tmx.new(file_path: file_path
|
28
|
+
TmxImporter::Tmx.new(file_path: file_path).stats
|
28
29
|
# => {:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]}
|
29
30
|
|
30
31
|
# Extract the segments of a TMX file
|
data/lib/tmx_importer.rb
CHANGED
@@ -7,17 +7,22 @@ require 'charlock_holmes'
|
|
7
7
|
module TmxImporter
|
8
8
|
class Tmx
|
9
9
|
attr_reader :file_path, :encoding
|
10
|
-
def initialize(file_path:,
|
10
|
+
def initialize(file_path:, **args)
|
11
11
|
@file_path = file_path
|
12
|
-
|
12
|
+
if args[:encoding].nil?
|
13
|
+
@encoding = CharlockHolmes::EncodingDetector.detect(File.read(@file_path)[0..100_000])[:encoding]
|
14
|
+
else
|
15
|
+
@encoding = args[:encoding].upcase
|
16
|
+
end
|
13
17
|
@doc = {
|
14
18
|
source_language: "",
|
15
19
|
tu: { id: "", counter: 0, vals: [], lang: "", creation_date: "" },
|
16
20
|
seg: { lang: "", counter: 0, vals: [], role: "" },
|
17
21
|
language_pairs: []
|
18
22
|
}
|
19
|
-
|
23
|
+
raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil?
|
20
24
|
raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
|
25
|
+
@text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), @encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
|
21
26
|
end
|
22
27
|
|
23
28
|
def stats
|
data/lib/tmx_importer/version.rb
CHANGED
data/spec/tmx_importer_spec.rb
CHANGED
@@ -5,36 +5,36 @@ describe TmxImporter do
|
|
5
5
|
expect(TmxImporter::VERSION).not_to be nil
|
6
6
|
end
|
7
7
|
|
8
|
-
it 'raises an error if the encoding is not supported' do
|
9
|
-
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
10
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
|
11
|
-
end
|
12
|
-
|
13
8
|
it 'raises an error if the wrong encoding is specified in the file' do
|
14
9
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_encoding.tmx')
|
15
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: '
|
10
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
|
16
11
|
end
|
17
12
|
|
18
13
|
it 'raises an error if the file contains bad markup' do
|
19
14
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-8).tmx')
|
20
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path
|
15
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
|
21
16
|
end
|
22
17
|
|
23
18
|
it 'raises an error if the file contains bad markup 2' do
|
24
19
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-16).tmx')
|
25
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path
|
20
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'raises an error if the encoding can not be determined' do
|
24
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE).tmx')
|
25
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).import).to raise_error }
|
26
26
|
end
|
27
27
|
|
28
28
|
describe '#stats' do
|
29
29
|
it 'reports the stats of a UTF-8 TMX file' do
|
30
30
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
31
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
31
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
32
32
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'reports the stats of a UTF-8 TMX file 2' do
|
36
36
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
|
37
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
37
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
38
38
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de", "en"]]})
|
39
39
|
end
|
40
40
|
|
@@ -46,31 +46,31 @@ describe TmxImporter do
|
|
46
46
|
|
47
47
|
it 'reports the stats of a UTF-16LE BOM TMX file' do
|
48
48
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
|
49
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
49
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
50
50
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'reports the stats of a multiple language pair TMX file' do
|
54
54
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
|
55
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
55
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
56
56
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'reports the stats of a srclang equals *all* TMX file' do
|
60
60
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
|
61
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
61
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
62
62
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
|
63
63
|
end
|
64
64
|
|
65
65
|
it 'reports the stats of a TMX file with out of order segments' do
|
66
66
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
67
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
67
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
68
68
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
72
72
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
73
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
73
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
74
74
|
expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
|
75
75
|
end
|
76
76
|
end
|
@@ -78,13 +78,13 @@ describe TmxImporter do
|
|
78
78
|
describe '#import' do
|
79
79
|
it 'imports a UTF-8 TMX file' do
|
80
80
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
81
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
81
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path).import
|
82
82
|
expect(tmx[1][2][3]).to eq("de-DE")
|
83
83
|
end
|
84
84
|
|
85
85
|
it 'imports a UTF-8 TMX file 2' do
|
86
86
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
|
87
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
87
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
88
88
|
expect(tmx.import[1][2][4]).to eq("Rückenlehneneinstellung")
|
89
89
|
end
|
90
90
|
|
@@ -96,37 +96,37 @@ describe TmxImporter do
|
|
96
96
|
|
97
97
|
it 'imports a UTF-16LE BOM TMX file' do
|
98
98
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
|
99
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
99
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
100
100
|
expect(tmx.import[1][2][3]).to eq("de-DE")
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'imports a multiple language pair TMX file' do
|
104
104
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
|
105
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
105
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
106
106
|
expect(tmx.import[1][2][3]).to eq("it")
|
107
107
|
end
|
108
108
|
|
109
109
|
it 'imports a srclang equals *all* TMX file' do
|
110
110
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
|
111
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
111
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
112
112
|
expect(tmx.import[1][2][3]).to eq("it")
|
113
113
|
end
|
114
114
|
|
115
115
|
it 'imports a TMX file with out of order segments' do
|
116
116
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
117
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
117
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
118
118
|
expect(tmx.import[1][2][3]).to eq("en-US")
|
119
119
|
end
|
120
120
|
|
121
121
|
it 'imports a TMX file with out of order segments' do
|
122
122
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
123
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
123
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
124
124
|
expect(tmx.import[1][2][1]).to eq("target")
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
128
128
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
129
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
129
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
130
130
|
expect(tmx.import[1][1][3]).to eq("en")
|
131
131
|
end
|
132
132
|
end
|