txt_tm_importer 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
4
- data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
3
+ metadata.gz: fd3e21f0db8fdc8d66a5d30a7af0d3adcbab92df
4
+ data.tar.gz: 05e34ce94dd18bb7339bb623822330cc611ade04
5
5
  SHA512:
6
- metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
7
- data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
6
+ metadata.gz: 41f3e80b4578c8709df95962e541c56c0a47792662a63a4686cb8e3253dc4bf06fe40b61545da092c17754e4571735d8b806be7732b79ef6f653de8e38c2c612
7
+ data.tar.gz: 7bf1d31a1372156e1ef825ffa3b1fb26ba053f6bf49c6e15b3d90c46e8bcf2c66dcb8943147a08df2edadbe32ac1e1e7846435a6a57336163fd3790c7f616498
@@ -75,18 +75,32 @@ module TxtTmImporter
75
75
  wordfast_lines.each_with_index do |line, index|
76
76
  next if line.empty? || line.gsub(/\s+/, '').empty?
77
77
  line_array = line.split("\t")
78
- @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
79
- @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
78
+ @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
79
+ @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
80
80
  next if index.eql?(0)
81
81
  timestamp = create_timestamp(line.split("\t")[0])
82
82
  @doc[:tu][:creation_date] = timestamp unless timestamp.nil?
83
83
  generate_unique_id
84
84
  write_tu
85
- write_seg(remove_wordfast_tags(line_array[4]), 'source', line_array[3]) unless line_array[4].nil?
86
- write_seg(remove_wordfast_tags(line_array[6]), 'target', line_array[5]) unless line_array[6].nil?
85
+ write_seg(remove_wordfast_tags(line_array[4]), 'source', @doc[:source_language]) unless line_array[4].nil?
86
+ write_seg(remove_wordfast_tags(line_array[6]), 'target', @doc[:target_language]) unless line_array[6].nil?
87
87
  end
88
88
  end
89
89
 
90
+ def wordfast_stats
91
+ lines = wordfast_lines
92
+ lines.each_with_index do |line, index|
93
+ next if line.empty? || line.gsub(/\s+/, '').empty?
94
+ next if index.eql?(0)
95
+ @doc[:tu][:counter] += 1
96
+ @doc[:source_language] = line.split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
97
+ @doc[:target_language] = line.split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
98
+ @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
99
+ @doc[:language_pairs] = @doc[:language_pairs].uniq
100
+ end
101
+ @doc[:seg][:counter] = @doc[:tu][:counter] * 2
102
+ end
103
+
90
104
  def import_twb_file
91
105
  role_counter = 0
92
106
  tu_tracker = 0
@@ -110,19 +124,6 @@ module TxtTmImporter
110
124
  end
111
125
  end
112
126
 
113
- def wordfast_stats
114
- lines = wordfast_lines
115
- lines.each_with_index do |line, index|
116
- next if line.empty? || line.gsub(/\s+/, '').empty?
117
- next if index.eql?(0)
118
- @doc[:tu][:counter] += 1
119
- end
120
- @doc[:seg][:counter] = @doc[:tu][:counter] * 2
121
- @doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
122
- @doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
123
- @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
124
- end
125
-
126
127
  def twb_export_file_stats
127
128
  @doc[:tu][:counter] = @text.scan(/<\/TrU>/).count
128
129
  @doc[:seg][:counter] = @text.scan(/<Seg/).count
@@ -1,3 +1,3 @@
1
1
  module TxtTmImporter
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -29,6 +29,12 @@ describe TxtTmImporter do
29
29
  txt = TxtTmImporter::Tm.new(file_path: file_path).stats
30
30
  expect(txt).to eq({:tu_count=>2, :seg_count=>4, :language_pairs=>[['ES-EM', 'EN-US']]})
31
31
  end
32
+
33
+ it 'reports the stats of a wordfast txt file 5' do
34
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
35
+ txt = TxtTmImporter::Tm.new(file_path: file_path).stats
36
+ expect(txt).to eq({:tu_count=>407, :seg_count=>814, :language_pairs=>[["PT-BR", "EN-US"], ["IT-IT", "EN-US"], ["PT-BR", "EN-GB"]]})
37
+ end
32
38
  end
33
39
 
34
40
  describe '#import' do
@@ -151,5 +157,17 @@ describe TxtTmImporter do
151
157
  txt = TxtTmImporter::Tm.new(file_path: file_path).import
152
158
  expect(txt[1][2][4]).to eq("La renovación de procesos con nuevos equipamientos beneficiará directamente a clientes y pacientes que utilizan medicamentos y alimentación parenteral suministrados por el grupo")
153
159
  end
160
+
161
+ it 'imports a txt file 21' do
162
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
163
+ txt = TxtTmImporter::Tm.new(file_path: file_path).import
164
+ expect(txt[1][2][4]).to eq("Mundo Físico VS Contratual")
165
+ end
166
+
167
+ it 'imports a txt file 22' do
168
+ file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
169
+ txt = TxtTmImporter::Tm.new(file_path: file_path).import
170
+ expect(txt[0][-1][0]).to eq(txt[1][-1][0])
171
+ end
154
172
  end
155
173
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: txt_tm_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
@@ -100,6 +100,7 @@ files:
100
100
  - spec/sample_files/wordfast_1(utf-8).txt
101
101
  - spec/sample_files/wordfast_1.txt
102
102
  - spec/sample_files/wordfast_2.txt
103
+ - spec/sample_files/wordfast_multiple.txt
103
104
  - spec/spec_helper.rb
104
105
  - spec/txt_tm_importer_spec.rb
105
106
  - txt_tm_importer.gemspec
@@ -131,5 +132,6 @@ test_files:
131
132
  - spec/sample_files/wordfast_1(utf-8).txt
132
133
  - spec/sample_files/wordfast_1.txt
133
134
  - spec/sample_files/wordfast_2.txt
135
+ - spec/sample_files/wordfast_multiple.txt
134
136
  - spec/spec_helper.rb
135
137
  - spec/txt_tm_importer_spec.rb