tmx_importer 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 879c5a494ffdf3380ca31ffc402dc031261db2bb
4
- data.tar.gz: 09cd7465ac1b8cb93fc21edc411cba48ca20ae7d
3
+ metadata.gz: 9171f015c2f0e45c3772d64d0f98e53949abf89a
4
+ data.tar.gz: f227573b58faf7b215695d9a72ff12df051bd6e9
5
5
  SHA512:
6
- metadata.gz: a1e9252071f93e9a8a65f811a931e7c0aef4f914b7709d28780be54f69f543508773ac2fe379d2c67587abcaa2ce074b41bb9e1eceb628e6f61c432c5c9f0d80
7
- data.tar.gz: 7e8e76da0b91f2ba041e75e7d86d348d6ad1fe66f03506173e0535d6e0395f9e12a0bd04df9699e4141a931bc1da1aad74498af37c54e0fdd7bba1bc4a30c7c9
6
+ metadata.gz: b9dd9c281a0453d994244f87aeaf466c46aa98336a3bae7a598344df23bb01b66779d235f305533e421f04b1a6ca1a695745a28b7311abb18ed3563b724478a8
7
+ data.tar.gz: 8d61b7d51f8b6ed56c21b68639f52aaa3c3f9ed1e6ee158cf1ecfed7669c77cdafb65945e718d36228545d281e9b385ed416639e0413ecfee258ed2922ee43b1
@@ -1,3 +1,3 @@
1
1
  module TmxImporter
2
- VERSION = "0.2.1"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/tmx_importer.rb CHANGED
@@ -3,9 +3,6 @@ require 'xml'
3
3
  require 'open-uri'
4
4
  require 'pretty_strings'
5
5
 
6
- Encoding.default_internal = Encoding::UTF_8
7
- Encoding.default_external = Encoding::UTF_8
8
-
9
6
  module TmxImporter
10
7
  class Tmx
11
8
  attr_reader :file_path, :encoding
@@ -18,6 +15,12 @@ module TmxImporter
18
15
  seg: { lang: "", counter: 0, vals: [], role: "" },
19
16
  language_pairs: []
20
17
  }
18
+ @src_regex = Regexp.new('(?<=srclang=\S)\S+(?=")|(?=\')'.encode(@encoding))
19
+ @src_string = 'srclang='.encode(@encoding).freeze
20
+ @tu_regex = Regexp.new('<\/tu>'.encode(@encoding))
21
+ @seg_regex = Regexp.new('<\/seg>'.encode(@encoding))
22
+ @lang_regex = Regexp.new('(?<=[^cn]lang=\S)\S+(?=")|(?=\')'.encode(@encoding))
23
+ @lang_string = 'lang'.encode(@encoding).freeze
21
24
  raise "Encoding type not supported. Please choose an encoding of UTF-8, UTF-16LE, or UTF-16BE" unless @encoding.eql?('UTF-8') || @encoding.eql?('UTF-16LE') || @encoding.eql?('UTF-16BE')
22
25
  end
23
26
 
@@ -43,11 +46,12 @@ module TmxImporter
43
46
  end
44
47
 
45
48
  def analyze_line(line)
46
- @doc[:source_language] = line.scan(/(?<=srclang=\S)\S+(?=")|(?=')/)[0] if line.include?('srclang=')
47
- @doc[:tu][:counter] += line.scan(/<\/tu>/).count
48
- @doc[:seg][:counter] += line.scan(/<\/seg>/).count
49
- if line.include?('lang')
50
- @doc[:seg][:lang] = line.scan(/(?<=[^cn]lang=\S)\S+(?=")|(?=')/)[0]
49
+ @doc[:source_language] = line.scan(@src_regex)[0].encode('UTF-8') if line.include?(@src_string)
50
+ @doc[:tu][:counter] += line.scan(@tu_regex).count
51
+ @doc[:seg][:counter] += line.scan(@seg_regex).count
52
+ if line.include?(@lang_string)
53
+ @doc[:seg][:lang] = line.scan(@lang_regex)[0]
54
+ @doc[:seg][:lang] = @doc[:seg][:lang].encode('UTF-8') unless @doc[:seg][:lang].nil?
51
55
  write_language_pair
52
56
  end
53
57
  end
@@ -105,7 +109,7 @@ module TmxImporter
105
109
  if @doc[:seg][:lang] != @doc[:source_language] &&
106
110
  @doc[:seg][:lang].split('-')[0].downcase != @doc[:source_language].split('-')[0].downcase &&
107
111
  @doc[:source_language] != '*all*'
108
- @doc[:language_pairs] << [@doc[:source_language], @doc[:seg][:lang]]
112
+ @doc[:language_pairs] << [@doc[:source_language].force_encoding("UTF-8"), @doc[:seg][:lang].force_encoding("UTF-8")]
109
113
  @doc[:seg][:role] = 'source'
110
114
  elsif @doc[:source_language] == '*all*'
111
115
  @doc[:source_language] = @doc[:seg][:lang]
@@ -117,7 +121,7 @@ module TmxImporter
117
121
 
118
122
  def write_tu(reader)
119
123
  @doc[:tu][:lang] = reader.get_attribute("srclang")
120
- @doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate")).to_s
124
+ @doc[:tu][:creation_date] = reader.get_attribute("creationdate").nil? ? DateTime.now.to_s : DateTime.parse(reader.get_attribute("creationdate").force_encoding('UTF-8')).to_s
121
125
  @doc[:tu][:vals] << [@doc[:tu][:id], @doc[:tu][:creation_date]]
122
126
  end
123
127
 
@@ -67,6 +67,12 @@ describe TmxImporter do
67
67
  tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
68
68
  expect(tmx.stats).to eq({:tu_count=>4, :seg_count=>8, :language_pairs=>[["de-DE", "en-US"]]})
69
69
  end
70
+
71
+ it 'imports a TMX file with UTF-16 LE BOM encoding' do
72
+ file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
73
+ tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
74
+ expect(tmx.stats).to eq({:tu_count=>1, :seg_count=>2, :language_pairs=>[["tr", "en"]]})
75
+ end
70
76
  end
71
77
 
72
78
  describe '#import' do
@@ -117,5 +123,11 @@ describe TmxImporter do
117
123
  tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-8')
118
124
  expect(tmx.import[1][2][1]).to eq("target")
119
125
  end
126
+
127
+ it 'imports a TMX file with UTF-16 LE BOM encoding' do
128
+ file_path = File.expand_path('../tmx_importer/spec/test_sample_files/strange_encoding.tmx')
129
+ tmx = TmxImporter::Tmx.new(file_path: file_path, encoding: 'utf-16LE')
130
+ expect(tmx.import[1][1][3]).to eq("en")
131
+ end
120
132
  end
121
133
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmx_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
@@ -103,6 +103,7 @@ files:
103
103
  - spec/test_sample_files/multiple_language_pairs.tmx
104
104
  - spec/test_sample_files/out_of_order_segments.tmx
105
105
  - spec/test_sample_files/srclang_all.tmx
106
+ - spec/test_sample_files/strange_encoding.tmx
106
107
  - spec/test_sample_files/test_tm(utf-16LE BOM).tmx
107
108
  - spec/test_sample_files/test_tm(utf-16LE).tmx
108
109
  - spec/test_sample_files/test_tm(utf-8).tmx
@@ -140,6 +141,7 @@ test_files:
140
141
  - spec/test_sample_files/multiple_language_pairs.tmx
141
142
  - spec/test_sample_files/out_of_order_segments.tmx
142
143
  - spec/test_sample_files/srclang_all.tmx
144
+ - spec/test_sample_files/strange_encoding.tmx
143
145
  - spec/test_sample_files/test_tm(utf-16LE BOM).tmx
144
146
  - spec/test_sample_files/test_tm(utf-16LE).tmx
145
147
  - spec/test_sample_files/test_tm(utf-8).tmx