tmx_importer 0.5.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07447d45355fb271c832db8228367a0d14d04c5f
4
- data.tar.gz: ab468fdf9295379cd036b351648f37eb3477468f
3
+ metadata.gz: bea8f8bb85029d0b336fdf89adecdcce179fc7df
4
+ data.tar.gz: c005d43b81b20c87a88b15f0814f63550655240c
5
5
  SHA512:
6
- metadata.gz: 508a95ae93f7a43af68362b97ab429d5f746486ad2bc2216174d35490b1c9952e8059e5eb111074c05ba40055d1e255d34d23439caae5ebfcebfa6dcceb0116e
7
- data.tar.gz: 5aa98d1fb3a4ed65554f149c9d8be21fc2a3fe2b9cc02034bb63718ec1a7ec68049da278c9793d036cf450aaf7099ce1799eccc01e554e724d69e52614d11daa
6
+ metadata.gz: 464e050de392148bd8348bc7ce23f5834f9f112cad6deedf971ba1f8a8095e2f9adac1d8ff670f9031546696e54296cac5213f9ac3e8faf90691dcf0fb373c71
7
+ data.tar.gz: 180830aae81b47dcff661e1b90435558226457c177ca1a10a691ccbd8d6d08f45f0e8705d1154524975dc2de607a4c6d0dc7e0829b1e7bb3aed1155291bed43e
data/README.md CHANGED
@@ -23,8 +23,9 @@ gem 'tmx_importer'
23
23
 
24
24
  ```ruby
25
25
  # Get the high level stats of a TMX file
26
+ # Including the encoding is optional. If not included the gem will attempt to detect the encoding.
26
27
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
27
- TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats
28
+ TmxImporter::Tmx.new(file_path: file_path).stats
28
29
  # => {:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]}
29
30
 
30
31
  # Extract the segments of a TMX file
data/lib/tmx_importer.rb CHANGED
@@ -7,17 +7,22 @@ require 'charlock_holmes'
7
7
  module TmxImporter
8
8
  class Tmx
9
9
  attr_reader :file_path, :encoding
10
- def initialize(file_path:, encoding:)
10
+ def initialize(file_path:, **args)
11
11
  @file_path = file_path
12
- @encoding = encoding.upcase
12
+ if args[:encoding].nil?
13
+ @encoding = CharlockHolmes::EncodingDetector.detect(File.read(@file_path)[0..100_000])[:encoding]
14
+ else
15
+ @encoding = args[:encoding].upcase
16
+ end
13
17
  @doc = {
14
18
  source_language: "",
15
19
  tu: { id: "", counter: 0, vals: [], lang: "", creation_date: "" },
16
20
  seg: { lang: "", counter: 0, vals: [], role: "" },
17
21
  language_pairs: []
18
22
  }
19
- @text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
23
+ raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil?
20
24
  raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
25
+ @text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), @encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
21
26
  end
22
27
 
23
28
  def stats
@@ -1,3 +1,3 @@
1
1
  module TmxImporter
2
- VERSION = "0.5.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -5,36 +5,36 @@ describe TmxImporter do
5
5
  expect(TmxImporter::VERSION).not_to be nil
6
6
  end
7
7
 
8
- it 'raises an error if the encoding is not supported' do
9
- file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
10
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
11
- end
12
-
13
8
  it 'raises an error if the wrong encoding is specified in the file' do
14
9
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_encoding.tmx')
15
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats).to raise_error }
10
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
16
11
  end
17
12
 
18
13
  it 'raises an error if the file contains bad markup' do
19
14
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-8).tmx')
20
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats).to raise_error }
15
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
21
16
  end
22
17
 
23
18
  it 'raises an error if the file contains bad markup 2' do
24
19
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-16).tmx')
25
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le').stats).to raise_error }
20
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
21
+ end
22
+
23
+ it 'raises an error if the encoding can not be determined' do
24
+ file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE).tmx')
25
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).import).to raise_error }
26
26
  end
27
27
 
28
28
  describe '#stats' do
29
29
  it 'reports the stats of a UTF-8 TMX file' do
30
30
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
31
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
31
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
32
32
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
33
33
  end
34
34
 
35
35
  it 'reports the stats of a UTF-8 TMX file 2' do
36
36
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
37
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
37
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
38
38
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de", "en"]]})
39
39
  end
40
40
 
@@ -46,31 +46,31 @@ describe TmxImporter do
46
46
 
47
47
  it 'reports the stats of a UTF-16LE BOM TMX file' do
48
48
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
49
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le')
49
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
50
50
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
51
51
  end
52
52
 
53
53
  it 'reports the stats of a multiple language pair TMX file' do
54
54
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
55
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
55
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
56
56
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
57
57
  end
58
58
 
59
59
  it 'reports the stats of a srclang equals *all* TMX file' do
60
60
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
61
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
61
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
62
62
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
63
63
  end
64
64
 
65
65
  it 'reports the stats of a TMX file with out of order segments' do
66
66
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
67
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
67
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
68
68
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
69
69
  end
70
70
 
71
71
  it 'imports a TMX file with UTF-16 LE BOM encoding' do
72
72
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
73
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
73
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
74
74
  expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
75
75
  end
76
76
  end
@@ -78,13 +78,13 @@ describe TmxImporter do
78
78
  describe '#import' do
79
79
  it 'imports a UTF-8 TMX file' do
80
80
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
81
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').import
81
+ tmx = TmxImporter::Tmx.new(file_path: file_path).import
82
82
  expect(tmx[1][2][3]).to eq("de-DE")
83
83
  end
84
84
 
85
85
  it 'imports a UTF-8 TMX file 2' do
86
86
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
87
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
87
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
88
88
  expect(tmx.import[1][2][4]).to eq("Rückenlehneneinstellung")
89
89
  end
90
90
 
@@ -96,37 +96,37 @@ describe TmxImporter do
96
96
 
97
97
  it 'imports a UTF-16LE BOM TMX file' do
98
98
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
99
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le')
99
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
100
100
  expect(tmx.import[1][2][3]).to eq("de-DE")
101
101
  end
102
102
 
103
103
  it 'imports a multiple language pair TMX file' do
104
104
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
105
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
105
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
106
106
  expect(tmx.import[1][2][3]).to eq("it")
107
107
  end
108
108
 
109
109
  it 'imports a srclang equals *all* TMX file' do
110
110
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
111
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
111
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
112
112
  expect(tmx.import[1][2][3]).to eq("it")
113
113
  end
114
114
 
115
115
  it 'imports a TMX file with out of order segments' do
116
116
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
117
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
117
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
118
118
  expect(tmx.import[1][2][3]).to eq("en-US")
119
119
  end
120
120
 
121
121
  it 'imports a TMX file with out of order segments' do
122
122
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
123
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
123
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
124
124
  expect(tmx.import[1][2][1]).to eq("target")
125
125
  end
126
126
 
127
127
  it 'imports a TMX file with UTF-16 LE BOM encoding' do
128
128
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
129
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
129
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
130
130
  expect(tmx.import[1][1][3]).to eq("en")
131
131
  end
132
132
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias