txt_tm_importer 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/txt_tm_importer.rb +18 -17
- data/lib/txt_tm_importer/version.rb +1 -1
- data/spec/sample_files/wordfast_multiple.txt +0 -0
- data/spec/txt_tm_importer_spec.rb +18 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fd3e21f0db8fdc8d66a5d30a7af0d3adcbab92df
|
|
4
|
+
data.tar.gz: 05e34ce94dd18bb7339bb623822330cc611ade04
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 41f3e80b4578c8709df95962e541c56c0a47792662a63a4686cb8e3253dc4bf06fe40b61545da092c17754e4571735d8b806be7732b79ef6f653de8e38c2c612
|
|
7
|
+
data.tar.gz: 7bf1d31a1372156e1ef825ffa3b1fb26ba053f6bf49c6e15b3d90c46e8bcf2c66dcb8943147a08df2edadbe32ac1e1e7846435a6a57336163fd3790c7f616498
|
data/lib/txt_tm_importer.rb
CHANGED
|
@@ -75,18 +75,32 @@ module TxtTmImporter
|
|
|
75
75
|
wordfast_lines.each_with_index do |line, index|
|
|
76
76
|
next if line.empty? || line.gsub(/\s+/, '').empty?
|
|
77
77
|
line_array = line.split("\t")
|
|
78
|
-
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
|
|
79
|
-
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
|
|
78
|
+
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '')
|
|
79
|
+
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '')
|
|
80
80
|
next if index.eql?(0)
|
|
81
81
|
timestamp = create_timestamp(line.split("\t")[0])
|
|
82
82
|
@doc[:tu][:creation_date] = timestamp unless timestamp.nil?
|
|
83
83
|
generate_unique_id
|
|
84
84
|
write_tu
|
|
85
|
-
write_seg(remove_wordfast_tags(line_array[4]), 'source',
|
|
86
|
-
write_seg(remove_wordfast_tags(line_array[6]), 'target',
|
|
85
|
+
write_seg(remove_wordfast_tags(line_array[4]), 'source', @doc[:source_language]) unless line_array[4].nil?
|
|
86
|
+
write_seg(remove_wordfast_tags(line_array[6]), 'target', @doc[:target_language]) unless line_array[6].nil?
|
|
87
87
|
end
|
|
88
88
|
end
|
|
89
89
|
|
|
90
|
+
def wordfast_stats
|
|
91
|
+
lines = wordfast_lines
|
|
92
|
+
lines.each_with_index do |line, index|
|
|
93
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
|
94
|
+
next if index.eql?(0)
|
|
95
|
+
@doc[:tu][:counter] += 1
|
|
96
|
+
@doc[:source_language] = line.split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
|
97
|
+
@doc[:target_language] = line.split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
|
98
|
+
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
|
|
99
|
+
@doc[:language_pairs] = @doc[:language_pairs].uniq
|
|
100
|
+
end
|
|
101
|
+
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
|
102
|
+
end
|
|
103
|
+
|
|
90
104
|
def import_twb_file
|
|
91
105
|
role_counter = 0
|
|
92
106
|
tu_tracker = 0
|
|
@@ -110,19 +124,6 @@ module TxtTmImporter
|
|
|
110
124
|
end
|
|
111
125
|
end
|
|
112
126
|
|
|
113
|
-
def wordfast_stats
|
|
114
|
-
lines = wordfast_lines
|
|
115
|
-
lines.each_with_index do |line, index|
|
|
116
|
-
next if line.empty? || line.gsub(/\s+/, '').empty?
|
|
117
|
-
next if index.eql?(0)
|
|
118
|
-
@doc[:tu][:counter] += 1
|
|
119
|
-
end
|
|
120
|
-
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
|
121
|
-
@doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
|
122
|
-
@doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
|
123
|
-
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]]
|
|
124
|
-
end
|
|
125
|
-
|
|
126
127
|
def twb_export_file_stats
|
|
127
128
|
@doc[:tu][:counter] = @text.scan(/<\/TrU>/).count
|
|
128
129
|
@doc[:seg][:counter] = @text.scan(/<Seg/).count
|
|
Binary file
|
|
@@ -29,6 +29,12 @@ describe TxtTmImporter do
|
|
|
29
29
|
txt = TxtTmImporter::Tm.new(file_path: file_path).stats
|
|
30
30
|
expect(txt).to eq({:tu_count=>2, :seg_count=>4, :language_pairs=>[['ES-EM', 'EN-US']]})
|
|
31
31
|
end
|
|
32
|
+
|
|
33
|
+
it 'reports the stats of a wordfast txt file 5' do
|
|
34
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
|
35
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).stats
|
|
36
|
+
expect(txt).to eq({:tu_count=>407, :seg_count=>814, :language_pairs=>[["PT-BR", "EN-US"], ["IT-IT", "EN-US"], ["PT-BR", "EN-GB"]]})
|
|
37
|
+
end
|
|
32
38
|
end
|
|
33
39
|
|
|
34
40
|
describe '#import' do
|
|
@@ -151,5 +157,17 @@ describe TxtTmImporter do
|
|
|
151
157
|
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
|
152
158
|
expect(txt[1][2][4]).to eq("La renovación de procesos con nuevos equipamientos beneficiará directamente a clientes y pacientes que utilizan medicamentos y alimentación parenteral suministrados por el grupo")
|
|
153
159
|
end
|
|
160
|
+
|
|
161
|
+
it 'imports a txt file 21' do
|
|
162
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
|
163
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
|
164
|
+
expect(txt[1][2][4]).to eq("Mundo Físico VS Contratual")
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
it 'imports a txt file 22' do
|
|
168
|
+
file_path = File.expand_path('../txt_tm_importer/spec/sample_files/wordfast_multiple.txt')
|
|
169
|
+
txt = TxtTmImporter::Tm.new(file_path: file_path).import
|
|
170
|
+
expect(txt[0][-1][0]).to eq(txt[1][-1][0])
|
|
171
|
+
end
|
|
154
172
|
end
|
|
155
173
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: txt_tm_importer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.5.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kevin S. Dias
|
|
@@ -100,6 +100,7 @@ files:
|
|
|
100
100
|
- spec/sample_files/wordfast_1(utf-8).txt
|
|
101
101
|
- spec/sample_files/wordfast_1.txt
|
|
102
102
|
- spec/sample_files/wordfast_2.txt
|
|
103
|
+
- spec/sample_files/wordfast_multiple.txt
|
|
103
104
|
- spec/spec_helper.rb
|
|
104
105
|
- spec/txt_tm_importer_spec.rb
|
|
105
106
|
- txt_tm_importer.gemspec
|
|
@@ -131,5 +132,6 @@ test_files:
|
|
|
131
132
|
- spec/sample_files/wordfast_1(utf-8).txt
|
|
132
133
|
- spec/sample_files/wordfast_1.txt
|
|
133
134
|
- spec/sample_files/wordfast_2.txt
|
|
135
|
+
- spec/sample_files/wordfast_multiple.txt
|
|
134
136
|
- spec/spec_helper.rb
|
|
135
137
|
- spec/txt_tm_importer_spec.rb
|