tmx_importer 0.5.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/lib/tmx_importer.rb +8 -3
- data/lib/tmx_importer/version.rb +1 -1
- data/spec/tmx_importer_spec.rb +23 -23
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bea8f8bb85029d0b336fdf89adecdcce179fc7df
|
4
|
+
data.tar.gz: c005d43b81b20c87a88b15f0814f63550655240c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 464e050de392148bd8348bc7ce23f5834f9f112cad6deedf971ba1f8a8095e2f9adac1d8ff670f9031546696e54296cac5213f9ac3e8faf90691dcf0fb373c71
|
7
|
+
data.tar.gz: 180830aae81b47dcff661e1b90435558226457c177ca1a10a691ccbd8d6d08f45f0e8705d1154524975dc2de607a4c6d0dc7e0829b1e7bb3aed1155291bed43e
|
data/README.md
CHANGED
@@ -23,8 +23,9 @@ gem 'tmx_importer'
|
|
23
23
|
|
24
24
|
```ruby
|
25
25
|
# Get the high level stats of a TMX file
|
26
|
+
# Including the encoding is optional. If not included the gem will attempt to detect the encoding.
|
26
27
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
27
|
-
TmxImporter::Tmx.new(file_path: file_path
|
28
|
+
TmxImporter::Tmx.new(file_path: file_path).stats
|
28
29
|
# => {:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]}
|
29
30
|
|
30
31
|
# Extract the segments of a TMX file
|
data/lib/tmx_importer.rb
CHANGED
@@ -7,17 +7,22 @@ require 'charlock_holmes'
|
|
7
7
|
module TmxImporter
|
8
8
|
class Tmx
|
9
9
|
attr_reader :file_path, :encoding
|
10
|
-
def initialize(file_path:,
|
10
|
+
def initialize(file_path:, **args)
|
11
11
|
@file_path = file_path
|
12
|
-
|
12
|
+
if args[:encoding].nil?
|
13
|
+
@encoding = CharlockHolmes::EncodingDetector.detect(File.read(@file_path)[0..100_000])[:encoding]
|
14
|
+
else
|
15
|
+
@encoding = args[:encoding].upcase
|
16
|
+
end
|
13
17
|
@doc = {
|
14
18
|
source_language: "",
|
15
19
|
tu: { id: "", counter: 0, vals: [], lang: "", creation_date: "" },
|
16
20
|
seg: { lang: "", counter: 0, vals: [], role: "" },
|
17
21
|
language_pairs: []
|
18
22
|
}
|
19
|
-
|
23
|
+
raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil?
|
20
24
|
raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
|
25
|
+
@text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), @encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
|
21
26
|
end
|
22
27
|
|
23
28
|
def stats
|
data/lib/tmx_importer/version.rb
CHANGED
data/spec/tmx_importer_spec.rb
CHANGED
@@ -5,36 +5,36 @@ describe TmxImporter do
|
|
5
5
|
expect(TmxImporter::VERSION).not_to be nil
|
6
6
|
end
|
7
7
|
|
8
|
-
it 'raises an error if the encoding is not supported' do
|
9
|
-
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
10
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
|
11
|
-
end
|
12
|
-
|
13
8
|
it 'raises an error if the wrong encoding is specified in the file' do
|
14
9
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_encoding.tmx')
|
15
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: '
|
10
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
|
16
11
|
end
|
17
12
|
|
18
13
|
it 'raises an error if the file contains bad markup' do
|
19
14
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-8).tmx')
|
20
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path
|
15
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
|
21
16
|
end
|
22
17
|
|
23
18
|
it 'raises an error if the file contains bad markup 2' do
|
24
19
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-16).tmx')
|
25
|
-
-> { expect(TmxImporter::Tmx.new(file_path: file_path
|
20
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'raises an error if the encoding can not be determined' do
|
24
|
+
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE).tmx')
|
25
|
+
-> { expect(TmxImporter::Tmx.new(file_path: file_path).import).to raise_error }
|
26
26
|
end
|
27
27
|
|
28
28
|
describe '#stats' do
|
29
29
|
it 'reports the stats of a UTF-8 TMX file' do
|
30
30
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
31
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
31
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
32
32
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'reports the stats of a UTF-8 TMX file 2' do
|
36
36
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
|
37
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
37
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
38
38
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de", "en"]]})
|
39
39
|
end
|
40
40
|
|
@@ -46,31 +46,31 @@ describe TmxImporter do
|
|
46
46
|
|
47
47
|
it 'reports the stats of a UTF-16LE BOM TMX file' do
|
48
48
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
|
49
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
49
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
50
50
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
51
51
|
end
|
52
52
|
|
53
53
|
it 'reports the stats of a multiple language pair TMX file' do
|
54
54
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
|
55
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
55
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
56
56
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'reports the stats of a srclang equals *all* TMX file' do
|
60
60
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
|
61
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
61
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
62
62
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
|
63
63
|
end
|
64
64
|
|
65
65
|
it 'reports the stats of a TMX file with out of order segments' do
|
66
66
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
67
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
67
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
68
68
|
expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
72
72
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
73
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
73
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
74
74
|
expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
|
75
75
|
end
|
76
76
|
end
|
@@ -78,13 +78,13 @@ describe TmxImporter do
|
|
78
78
|
describe '#import' do
|
79
79
|
it 'imports a UTF-8 TMX file' do
|
80
80
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
|
81
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
81
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path).import
|
82
82
|
expect(tmx[1][2][3]).to eq("de-DE")
|
83
83
|
end
|
84
84
|
|
85
85
|
it 'imports a UTF-8 TMX file 2' do
|
86
86
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
|
87
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
87
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
88
88
|
expect(tmx.import[1][2][4]).to eq("Rückenlehneneinstellung")
|
89
89
|
end
|
90
90
|
|
@@ -96,37 +96,37 @@ describe TmxImporter do
|
|
96
96
|
|
97
97
|
it 'imports a UTF-16LE BOM TMX file' do
|
98
98
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
|
99
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
99
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
100
100
|
expect(tmx.import[1][2][3]).to eq("de-DE")
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'imports a multiple language pair TMX file' do
|
104
104
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
|
105
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
105
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
106
106
|
expect(tmx.import[1][2][3]).to eq("it")
|
107
107
|
end
|
108
108
|
|
109
109
|
it 'imports a srclang equals *all* TMX file' do
|
110
110
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
|
111
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
111
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
112
112
|
expect(tmx.import[1][2][3]).to eq("it")
|
113
113
|
end
|
114
114
|
|
115
115
|
it 'imports a TMX file with out of order segments' do
|
116
116
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
117
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
117
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
118
118
|
expect(tmx.import[1][2][3]).to eq("en-US")
|
119
119
|
end
|
120
120
|
|
121
121
|
it 'imports a TMX file with out of order segments' do
|
122
122
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
|
123
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
123
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
124
124
|
expect(tmx.import[1][2][1]).to eq("target")
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'imports a TMX file with UTF-16 LE BOM encoding' do
|
128
128
|
file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
|
129
|
-
tmx = TmxImporter::Tmx.new(file_path: file_path
|
129
|
+
tmx = TmxImporter::Tmx.new(file_path: file_path)
|
130
130
|
expect(tmx.import[1][1][3]).to eq("en")
|
131
131
|
end
|
132
132
|
end
|