txt_tm_importer 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/txt_tm_importer.rb +9 -4
- data/lib/txt_tm_importer/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
|
|
4
|
+
data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
|
|
7
|
+
data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
|
data/lib/txt_tm_importer.rb
CHANGED
|
@@ -73,6 +73,7 @@ module TxtTmImporter
|
|
|
73
73
|
|
|
74
74
|
def import_wordfast_file
|
|
75
75
|
wordfast_lines.each_with_index do |line, index|
|
|
76
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
|
76
77
|
line_array = line.split("\t")
|
|
77
78
|
@doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
|
|
78
79
|
@doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
|
|
@@ -96,7 +97,7 @@ module TxtTmImporter
|
|
|
96
97
|
if line.include?('<Seg')
|
|
97
98
|
write_tu if tu_tracker.eql?(1)
|
|
98
99
|
tu_tracker = 0 if tu_tracker > 0
|
|
99
|
-
language = line.scan(/(?<=<Seg L=)
|
|
100
|
+
language = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)[^>]+(?=>)/).empty?
|
|
100
101
|
if role_counter.eql?(0)
|
|
101
102
|
write_seg(line.scan(/(?<=>).+/)[0], 'source', language)
|
|
102
103
|
role_counter += 1
|
|
@@ -111,7 +112,11 @@ module TxtTmImporter
|
|
|
111
112
|
|
|
112
113
|
def wordfast_stats
|
|
113
114
|
lines = wordfast_lines
|
|
114
|
-
|
|
115
|
+
lines.each_with_index do |line, index|
|
|
116
|
+
next if line.empty? || line.gsub(/\s+/, '').empty?
|
|
117
|
+
next if index.eql?(0)
|
|
118
|
+
@doc[:tu][:counter] += 1
|
|
119
|
+
end
|
|
115
120
|
@doc[:seg][:counter] = @doc[:tu][:counter] * 2
|
|
116
121
|
@doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
|
|
117
122
|
@doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
|
|
@@ -124,8 +129,8 @@ module TxtTmImporter
|
|
|
124
129
|
role_counter = 0
|
|
125
130
|
@text.each_line do |line|
|
|
126
131
|
if line.include?('<Seg L=')
|
|
127
|
-
@doc[:source_language] = line.scan(/(?<=<Seg L=)
|
|
128
|
-
@doc[:target_language] = line.scan(/(?<=<Seg L=)
|
|
132
|
+
@doc[:source_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
|
|
133
|
+
@doc[:target_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
|
|
129
134
|
role_counter += 1 if role_counter.eql?(0)
|
|
130
135
|
end
|
|
131
136
|
@doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]] if !@doc[:source_language].nil? && !@doc[:target_language].nil? && role_counter > 0
|