txt_tm_importer 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7f16238c6b6ad668720bc31949b5ea75e4cc9079
4
- data.tar.gz: ea4f891cbf848221e16cb34ce76200b14934b727
3
+ metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
4
+ data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
5
5
  SHA512:
6
- metadata.gz: 20b8130a49d629c8404b8ed8ac89e36d7084be82a7c509efc0f3e8d951745c8c90d30b843861d385e482671f62a04ff9de1c375903738c99445bd3d258d2b8ba
7
- data.tar.gz: 03143efba5a48ebe19104d197a47f96b29ed8b2aa92d6c4c125d06f4e65f5a47e99cf7a3e48e72b76db485a6eadb0e5a65043101aab0e0f1dc6d8e8a935f93dc
6
+ metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
7
+ data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
@@ -73,6 +73,7 @@ module TxtTmImporter
73
73
 
74
74
  def import_wordfast_file
75
75
  wordfast_lines.each_with_index do |line, index|
76
+ next if line.empty? || line.gsub(/\s+/, '').empty?
76
77
  line_array = line.split("\t")
77
78
  @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
78
79
  @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
@@ -96,7 +97,7 @@ module TxtTmImporter
96
97
  if line.include?('<Seg')
97
98
  write_tu if tu_tracker.eql?(1)
98
99
  tu_tracker = 0 if tu_tracker > 0
99
- language = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty?
100
+ language = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)[^>]+(?=>)/).empty?
100
101
  if role_counter.eql?(0)
101
102
  write_seg(line.scan(/(?<=>).+/)[0], 'source', language)
102
103
  role_counter += 1
@@ -111,7 +112,11 @@ module TxtTmImporter
111
112
 
112
113
  def wordfast_stats
113
114
  lines = wordfast_lines
114
- @doc[:tu][:counter] = lines.size - 1
115
+ lines.each_with_index do |line, index|
116
+ next if line.empty? || line.gsub(/\s+/, '').empty?
117
+ next if index.eql?(0)
118
+ @doc[:tu][:counter] += 1
119
+ end
115
120
  @doc[:seg][:counter] = @doc[:tu][:counter] * 2
116
121
  @doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
117
122
  @doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
@@ -124,8 +129,8 @@ module TxtTmImporter
124
129
  role_counter = 0
125
130
  @text.each_line do |line|
126
131
  if line.include?('<Seg L=')
127
- @doc[:source_language] = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
128
- @doc[:target_language] = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
132
+ @doc[:source_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
133
+ @doc[:target_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
129
134
  role_counter += 1 if role_counter.eql?(0)
130
135
  end
131
136
  @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]] if !@doc[:source_language].nil? && !@doc[:target_language].nil? && role_counter > 0
@@ -1,3 +1,3 @@
1
1
  module TxtTmImporter
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: txt_tm_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias