txt_tm_importer 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
4
- data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
3
+ metadata.gz: fd3e21f0db8fdc8d66a5d30a7af0d3adcbab92df
4
+ data.tar.gz: 05e34ce94dd18bb7339bb623822330cc611ade04
5
5
  SHA512:
6
- metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
7
- data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
6
+ metadata.gz: 41f3e80b4578c8709df95962e541c56c0a47792662a63a4686cb8e3253dc4bf06fe40b61545da092c17754e4571735d8b806be7732b79ef6f653de8e38c2c612
7
+ data.tar.gz: 7bf1d31a1372156e1ef825ffa3b1fb26ba053f6bf49c6e15b3d90c46e8bcf2c66dcb8943147a08df2edadbe32ac1e1e7846435a6a57336163fd3790c7f616498
@@ -75,18 +75,32 @@ module TxtTmImporter
75
75
  wordfast_lines.each_with_index do |line, index|
76
76
  next if line.empty? || line.gsub(/\s+/, '').empty?
77
77
  line_array = line.split("\t")
78
- @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
79
- @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
78
+ @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
79
+ @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
80
80
  next if index.eql?(0)
81
81
  timestamp = create_timestamp(line.split("\t")[0])
82
82
  @doc[:tu][:creation_date] = timestamp unless timestamp.nil?
83
83
  generate_unique_id
84
84
  write_tu
85
- write_seg(remove_wordfast_tags(line_array[4]), 'source', line_array[3]) unless line_array[4].nil?
86
- write_seg(remove_wordfast_tags(line_array[6]), 'target', line_array[5]) unless line_array[6].nil?
85
+ write_seg(remove_wordfast_tags(line_array[4]), 'source', @doc[:source_language]) unless line_array[4].nil?
86
+ write_seg(remove_wordfast_tags(line_array[6]), 'target', @doc[:target_language]) unless line_array[6].nil?
87
87
  end
88
88
  end
89
89
 
90
+ def wordfast_stats
91
+ lines = wordfast_lines
92
+ lines.each_with_index do |line, index|
93
+ next if line.empty? || line.gsub(/\s+/, '').empty?
94
+ next if index.eql?(0)
95
+ @doc[:tu][:counter] += 1
96
+ @doc[:source_language] = line.split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
97
+ @doc[:target_language] = line.split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
98
+ @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
99
+ @doc[:language_pairs] = @doc[:language_pairs].uniq
100
+ end
101
+ @doc[:seg][:counter] = @doc[:tu][:counter] * 2
102
+ end
103
+
90
104
  def import_twb_file
91
105
  role_counter = 0
92
106
  tu_tracker = 0
@@ -110,19 +124,6 @@ module TxtTmImporter
110
124
  end
111
125
  end
112
126
 
113
- def wordfast_stats
114
- lines = wordfast_lines
115
- lines.each_with_index do |line, index|
116
- next if line.empty? || line.gsub(/\s+/, '').empty?
117
- next if index.eql?(0)
118
- @doc[:tu][:counter] += 1
119
- end
120
- @doc[:seg][:counter] = @doc[:tu][:counter] * 2
121
- @doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
122
- @doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
123
- @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
124
- end
125
-
126
127
  def twb_export_file_stats
127
128
  @doc[:tu][:counter] = @text.scan(/<\/TrU>/).count
128
129
  @doc[:seg][:counter] = @text.scan(/<Seg/).count
@@ -1,3 +1,3 @@
1
1
  module TxtTmImporter
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -29,6 +29,12 @@ describe TxtTmImporter do
29
29
  txt = TxtTmImporter::Tm.new(file_path: file_path).stats
30
30
  expect(txt).to eq({:tu_count=>2, :seg_count=>4, :language_pairs=>[['ES-EM', 'EN-US']]})
31
31
  end
32
+
33
+ it 'reports the stats of a wordfast txt file 5' do
34
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
35
+ txt = TxtTmImporter::Tm.new(file_path: file_path).stats
36
+ expect(txt).to eq({:tu_count=>407, :seg_count=>814, :language_pairs=>[["PT-BR", "EN-US"], ["IT-IT", "EN-US"], ["PT-BR", "EN-GB"]]})
37
+ end
32
38
  end
33
39
 
34
40
  describe '#import' do
@@ -151,5 +157,17 @@ describe TxtTmImporter do
151
157
  txt = TxtTmImporter::Tm.new(file_path: file_path).import
152
158
  expect(txt[1][2][4]).to eq("La renovación de procesos con nuevos equipamientos beneficiará directamente a clientes y pacientes que utilizan medicamentos y alimentación parenteral suministrados por el grupo")
153
159
  end
160
+
161
+ it 'imports a txt file 21' do
162
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
163
+ txt = TxtTmImporter::Tm.new(file_path: file_path).import
164
+ expect(txt[1][2][4]).to eq("Mundo Físico VS Contratual")
165
+ end
166
+
167
+ it 'imports a txt file 22' do
168
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
169
+ txt = TxtTmImporter::Tm.new(file_path: file_path).import
170
+ expect(txt[0][-1][0]).to eq(txt[1][-1][0])
171
+ end
154
172
  end
155
173
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: txt_tm_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
@@ -100,6 +100,7 @@ files:
100
100
  - spec/sample_files/wordfast_1(utf-8).txt
101
101
  - spec/sample_files/wordfast_1.txt
102
102
  - spec/sample_files/wordfast_2.txt
103
+ - spec/sample_files/wordfast_multiple.txt
103
104
  - spec/spec_helper.rb
104
105
  - spec/txt_tm_importer_spec.rb
105
106
  - txt_tm_importer.gemspec
@@ -131,5 +132,6 @@ test_files:
131
132
  - spec/sample_files/wordfast_1(utf-8).txt
132
133
  - spec/sample_files/wordfast_1.txt
133
134
  - spec/sample_files/wordfast_2.txt
135
+ - spec/sample_files/wordfast_multiple.txt
134
136
  - spec/spec_helper.rb
135
137
  - spec/txt_tm_importer_spec.rb