tmx_importer 0.5.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 07447d45355fb271c832db8228367a0d14d04c5f
4
- data.tar.gz: ab468fdf9295379cd036b351648f37eb3477468f
3
+ metadata.gz: bea8f8bb85029d0b336fdf89adecdcce179fc7df
4
+ data.tar.gz: c005d43b81b20c87a88b15f0814f63550655240c
5
5
  SHA512:
6
- metadata.gz: 508a95ae93f7a43af68362b97ab429d5f746486ad2bc2216174d35490b1c9952e8059e5eb111074c05ba40055d1e255d34d23439caae5ebfcebfa6dcceb0116e
7
- data.tar.gz: 5aa98d1fb3a4ed65554f149c9d8be21fc2a3fe2b9cc02034bb63718ec1a7ec68049da278c9793d036cf450aaf7099ce1799eccc01e554e724d69e52614d11daa
6
+ metadata.gz: 464e050de392148bd8348bc7ce23f5834f9f112cad6deedf971ba1f8a8095e2f9adac1d8ff670f9031546696e54296cac5213f9ac3e8faf90691dcf0fb373c71
7
+ data.tar.gz: 180830aae81b47dcff661e1b90435558226457c177ca1a10a691ccbd8d6d08f45f0e8705d1154524975dc2de607a4c6d0dc7e0829b1e7bb3aed1155291bed43e
data/README.md CHANGED
@@ -23,8 +23,9 @@ gem 'tmx_importer'
23
23
 
24
24
  ```ruby
25
25
  # Get the high level stats of a TMX file
26
+ # Including the encoding is optional. If not included the gem will attempt to detect the encoding.
26
27
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
27
- TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats
28
+ TmxImporter::Tmx.new(file_path: file_path).stats
28
29
  # => {:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]}
29
30
 
30
31
  # Extract the segments of a TMX file
data/lib/tmx_importer.rb CHANGED
@@ -7,17 +7,22 @@ require 'charlock_holmes'
7
7
  module TmxImporter
8
8
  class Tmx
9
9
  attr_reader :file_path, :encoding
10
- def initialize(file_path:, encoding:)
10
+ def initialize(file_path:, **args)
11
11
  @file_path = file_path
12
- @encoding = encoding.upcase
12
+ if args[:encoding].nil?
13
+ @encoding = CharlockHolmes::EncodingDetector.detect(File.read(@file_path)[0..100_000])[:encoding]
14
+ else
15
+ @encoding = args[:encoding].upcase
16
+ end
13
17
  @doc = {
14
18
  source_language: "",
15
19
  tu: { id: "", counter: 0, vals: [], lang: "", creation_date: "" },
16
20
  seg: { lang: "", counter: 0, vals: [], role: "" },
17
21
  language_pairs: []
18
22
  }
19
- @text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
23
+ raise "Encoding type could not be determined. Please set an encoding of UTF-8, UTF-16LE, or UTF-16BE" if @encoding.nil?
20
24
  raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
25
+ @text = CharlockHolmes::Converter.convert(File.read(open(@file_path)), @encoding, 'UTF-8') if !@encoding.eql?('UTF-8')
21
26
  end
22
27
 
23
28
  def stats
@@ -1,3 +1,3 @@
1
1
  module TmxImporter
2
- VERSION = "0.5.0"
2
+ VERSION = "1.1.0"
3
3
  end
@@ -5,36 +5,36 @@ describe TmxImporter do
5
5
  expect(TmxImporter::VERSION).not_to be nil
6
6
  end
7
7
 
8
- it 'raises an error if the encoding is not supported' do
9
- file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
10
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
11
- end
12
-
13
8
  it 'raises an error if the wrong encoding is specified in the file' do
14
9
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_encoding.tmx')
15
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats).to raise_error }
10
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'ISO-8859-9').stats).to raise_error }
16
11
  end
17
12
 
18
13
  it 'raises an error if the file contains bad markup' do
19
14
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-8).tmx')
20
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').stats).to raise_error }
15
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
21
16
  end
22
17
 
23
18
  it 'raises an error if the file contains bad markup 2' do
24
19
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/bad_markup(utf-16).tmx')
25
- -> { expect(TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le').stats).to raise_error }
20
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).stats).to raise_error }
21
+ end
22
+
23
+ it 'raises an error if the encoding can not be determined' do
24
+ file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE).tmx')
25
+ -> { expect(TmxImporter::Tmx.new(file_path: file_path).import).to raise_error }
26
26
  end
27
27
 
28
28
  describe '#stats' do
29
29
  it 'reports the stats of a UTF-8 TMX file' do
30
30
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
31
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
31
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
32
32
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
33
33
  end
34
34
 
35
35
  it 'reports the stats of a UTF-8 TMX file 2' do
36
36
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
37
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
37
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
38
38
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de", "en"]]})
39
39
  end
40
40
 
@@ -46,31 +46,31 @@ describe TmxImporter do
46
46
 
47
47
  it 'reports the stats of a UTF-16LE BOM TMX file' do
48
48
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
49
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le')
49
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
50
50
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
51
51
  end
52
52
 
53
53
  it 'reports the stats of a multiple language pair TMX file' do
54
54
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
55
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
55
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
56
56
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
57
57
  end
58
58
 
59
59
  it 'reports the stats of a srclang equals *all* TMX file' do
60
60
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
61
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
61
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
62
62
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>10, :language_pairs=>[["de-DE", "en-US"], ["de-DE", "it"], ["de-DE", "fr"]]})
63
63
  end
64
64
 
65
65
  it 'reports the stats of a TMX file with out of order segments' do
66
66
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
67
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
67
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
68
68
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
69
69
  end
70
70
 
71
71
  it 'imports a TMX file with UTF-16 LE BOM encoding' do
72
72
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
73
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
73
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
74
74
  expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
75
75
  end
76
76
  end
@@ -78,13 +78,13 @@ describe TmxImporter do
78
78
  describe '#import' do
79
79
  it 'imports a UTF-8 TMX file' do
80
80
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-8).tmx')
81
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8').import
81
+ tmx = TmxImporter::Tmx.new(file_path: file_path).import
82
82
  expect(tmx[1][2][3]).to eq("de-DE")
83
83
  end
84
84
 
85
85
  it 'imports a UTF-8 TMX file 2' do
86
86
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm_2(utf-8).tmx')
87
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
87
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
88
88
  expect(tmx.import[1][2][4]).to eq("Rückenlehneneinstellung")
89
89
  end
90
90
 
@@ -96,37 +96,37 @@ describe TmxImporter do
96
96
 
97
97
  it 'imports a UTF-16LE BOM TMX file' do
98
98
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/test_tm(utf-16LE BOM).tmx')
99
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16le')
99
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
100
100
  expect(tmx.import[1][2][3]).to eq("de-DE")
101
101
  end
102
102
 
103
103
  it 'imports a multiple language pair TMX file' do
104
104
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/multiple_language_pairs.tmx')
105
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
105
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
106
106
  expect(tmx.import[1][2][3]).to eq("it")
107
107
  end
108
108
 
109
109
  it 'imports a srclang equals *all* TMX file' do
110
110
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/srclang_all.tmx')
111
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
111
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
112
112
  expect(tmx.import[1][2][3]).to eq("it")
113
113
  end
114
114
 
115
115
  it 'imports a TMX file with out of order segments' do
116
116
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
117
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
117
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
118
118
  expect(tmx.import[1][2][3]).to eq("en-US")
119
119
  end
120
120
 
121
121
  it 'imports a TMX file with out of order segments' do
122
122
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/out_of_order_segments.tmx')
123
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
123
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
124
124
  expect(tmx.import[1][2][1]).to eq("target")
125
125
  end
126
126
 
127
127
  it 'imports a TMX file with UTF-16 LE BOM encoding' do
128
128
  file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
129
- tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
129
+ tmx = TmxImporter::Tmx.new(file_path: file_path)
130
130
  expect(tmx.import[1][1][3]).to eq("en")
131
131
  end
132
132
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias