txt_tm_importer 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/txt_tm_importer.rb +9 -4
- data/lib/txt_tm_importer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
|
4
|
+
data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
|
7
|
+
data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
|
data/lib/txt_tm_importer.rb
CHANGED
@@ -73,6 +73,7 @@ module TxtTmImporter
|
|
73
73
|
|
74
74
|
def import_wordfast_file
|
75
75
|
wordfast_lines.each_with_index do |line, index|
|
76
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
76
77
|
line_array = line.split("\t")
|
77
78
|
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
|
78
79
|
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
|
@@ -96,7 +97,7 @@ module TxtTmImporter
|
|
96
97
|
if line.include?('<Seg')
|
97
98
|
write_tu if tu_tracker.eql?(1)
|
98
99
|
tu_tracker = 0 if tu_tracker > 0
|
99
|
-
language = line.scan(/(?<=<Seg L=)
|
100
|
+
language = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)[^>]+(?=>)/).empty?
|
100
101
|
if role_counter.eql?(0)
|
101
102
|
write_seg(line.scan(/(?<=>).+/)[0], 'source', language)
|
102
103
|
role_counter += 1
|
@@ -111,7 +112,11 @@ module TxtTmImporter
|
|
111
112
|
|
112
113
|
def wordfast_stats
|
113
114
|
lines = wordfast_lines
|
114
|
-
|
115
|
+
lines.each_with_index do |line, index|
|
116
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
117
|
+
next if index.eql?(0)
|
118
|
+
@doc[:tu][:counter] += 1
|
119
|
+
end
|
115
120
|
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
116
121
|
@doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
117
122
|
@doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
@@ -124,8 +129,8 @@ module TxtTmImporter
|
|
124
129
|
role_counter = 0
|
125
130
|
@text.each_line do |line|
|
126
131
|
if line.include?('<Seg L=')
|
127
|
-
@doc[:source_language] = line.scan(/(?<=<Seg L=)
|
128
|
-
@doc[:target_language] = line.scan(/(?<=<Seg L=)
|
132
|
+
@doc[:source_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
|
133
|
+
@doc[:target_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
|
129
134
|
role_counter += 1 if role_counter.eql?(0)
|
130
135
|
end
|
131
136
|
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]] if !@doc[:source_language].nil? && !@doc[:target_language].nil? && role_counter > 0
|