txt_tm_importer 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/txt_tm_importer.rb +18 -17
- data/lib/txt_tm_importer/version.rb +1 -1
- data/spec/sample_files/wordfast_multiple.txt +0 -0
- data/spec/txt_tm_importer_spec.rb +18 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fd3e21f0db8fdc8d66a5d30a7af0d3adcbab92df
|
4
|
+
data.tar.gz: 05e34ce94dd18bb7339bb623822330cc611ade04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41f3e80b4578c8709df95962e541c56c0a47792662a63a4686cb8e3253dc4bf06fe40b61545da092c17754e4571735d8b806be7732b79ef6f653de8e38c2c612
|
7
|
+
data.tar.gz: 7bf1d31a1372156e1ef825ffa3b1fb26ba053f6bf49c6e15b3d90c46e8bcf2c66dcb8943147a08df2edadbe32ac1e1e7846435a6a57336163fd3790c7f616498
|
data/lib/txt_tm_importer.rb
CHANGED
@@ -75,18 +75,32 @@ module TxtTmImporter
|
|
75
75
|
wordfast_lines.each_with_index do |line, index|
|
76
76
|
next if line.empty? || line.gsub(/\s+/, '').empty?
|
77
77
|
line_array = line.split("\t")
|
78
|
-
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
|
79
|
-
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
|
78
|
+
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
|
79
|
+
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
|
80
80
|
next if index.eql?(0)
|
81
81
|
timestamp = create_timestamp(line.split("\t")[0])
|
82
82
|
@doc[:tu][:creation_date] = timestamp unless timestamp.nil?
|
83
83
|
generate_unique_id
|
84
84
|
write_tu
|
85
|
-
write_seg(remove_wordfast_tags(line_array[4]), 'source',
|
86
|
-
write_seg(remove_wordfast_tags(line_array[6]), 'target',
|
85
|
+
write_seg(remove_wordfast_tags(line_array[4]), 'source', @doc[:source_language]) unless line_array[4].nil?
|
86
|
+
write_seg(remove_wordfast_tags(line_array[6]), 'target', @doc[:target_language]) unless line_array[6].nil?
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
90
|
+
def wordfast_stats
|
91
|
+
lines = wordfast_lines
|
92
|
+
lines.each_with_index do |line, index|
|
93
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
94
|
+
next if index.eql?(0)
|
95
|
+
@doc[:tu][:counter] += 1
|
96
|
+
@doc[:source_language] = line.split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
97
|
+
@doc[:target_language] = line.split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
98
|
+
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
|
99
|
+
@doc[:language_pairs] = @doc[:language_pairs].uniq
|
100
|
+
end
|
101
|
+
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
102
|
+
end
|
103
|
+
|
90
104
|
def import_twb_file
|
91
105
|
role_counter = 0
|
92
106
|
tu_tracker = 0
|
@@ -110,19 +124,6 @@ module TxtTmImporter
|
|
110
124
|
end
|
111
125
|
end
|
112
126
|
|
113
|
-
def wordfast_stats
|
114
|
-
lines = wordfast_lines
|
115
|
-
lines.each_with_index do |line, index|
|
116
|
-
next if line.empty? || line.gsub(/\s+/, '').empty?
|
117
|
-
next if index.eql?(0)
|
118
|
-
@doc[:tu][:counter] += 1
|
119
|
-
end
|
120
|
-
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
121
|
-
@doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
122
|
-
@doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
123
|
-
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
|
124
|
-
end
|
125
|
-
|
126
127
|
def twb_export_file_stats
|
127
128
|
@doc[:tu][:counter] = @text.scan(/<\/TrU>/).count
|
128
129
|
@doc[:seg][:counter] = @text.scan(/<Seg/).count
|
Binary file
|
@@ -29,6 +29,12 @@ describe TxtTmImporter do
|
|
29
29
|
txt = TxtTmImporter::Tm.new(file_path: file_path).stats
|
30
30
|
expect(txt).to eq({:tu_count=>2, :seg_count=>4, :language_pairs=>[['ES-EM', 'EN-US']]})
|
31
31
|
end
|
32
|
+
|
33
|
+
it 'reports the stats of a wordfast txt file 5' do
|
34
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
35
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).stats
|
36
|
+
expect(txt).to eq({:tu_count=>407, :seg_count=>814, :language_pairs=>[["PT-BR", "EN-US"], ["IT-IT", "EN-US"], ["PT-BR", "EN-GB"]]})
|
37
|
+
end
|
32
38
|
end
|
33
39
|
|
34
40
|
describe '#import' do
|
@@ -151,5 +157,17 @@ describe TxtTmImporter do
|
|
151
157
|
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
152
158
|
expect(txt[1][2][4]).to eq("La renovación de procesos con nuevos equipamientos beneficiará directamente a clientes y pacientes que utilizan medicamentos y alimentación parenteral suministrados por el grupo")
|
153
159
|
end
|
160
|
+
|
161
|
+
it 'imports a txt file 21' do
|
162
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
163
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
164
|
+
expect(txt[1][2][4]).to eq("Mundo Físico VS Contratual")
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'imports a txt file 22' do
|
168
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
169
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
170
|
+
expect(txt[0][-1][0]).to eq(txt[1][-1][0])
|
171
|
+
end
|
154
172
|
end
|
155
173
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: txt_tm_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
@@ -100,6 +100,7 @@ files:
|
|
100
100
|
- spec/sample_files/wordfast_1(utf-8).txt
|
101
101
|
- spec/sample_files/wordfast_1.txt
|
102
102
|
- spec/sample_files/wordfast_2.txt
|
103
|
+
- spec/sample_files/wordfast_multiple.txt
|
103
104
|
- spec/spec_helper.rb
|
104
105
|
- spec/txt_tm_importer_spec.rb
|
105
106
|
- txt_tm_importer.gemspec
|
@@ -131,5 +132,6 @@ test_files:
|
|
131
132
|
- spec/sample_files/wordfast_1(utf-8).txt
|
132
133
|
- spec/sample_files/wordfast_1.txt
|
133
134
|
- spec/sample_files/wordfast_2.txt
|
135
|
+
- spec/sample_files/wordfast_multiple.txt
|
134
136
|
- spec/spec_helper.rb
|
135
137
|
- spec/txt_tm_importer_spec.rb
|