txt_tm_importer 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7f16238c6b6ad668720bc31949b5ea75e4cc9079
4
- data.tar.gz: ea4f891cbf848221e16cb34ce76200b14934b727
3
+ metadata.gz: 8a437d5112648108747b10abbaf3b2253ba2bf78
4
+ data.tar.gz: 86f0997da5a86514c9c3b11ef1a9a251795f769e
5
5
  SHA512:
6
- metadata.gz: 20b8130a49d629c8404b8ed8ac89e36d7084be82a7c509efc0f3e8d951745c8c90d30b843861d385e482671f62a04ff9de1c375903738c99445bd3d258d2b8ba
7
- data.tar.gz: 03143efba5a48ebe19104d197a47f96b29ed8b2aa92d6c4c125d06f4e65f5a47e99cf7a3e48e72b76db485a6eadb0e5a65043101aab0e0f1dc6d8e8a935f93dc
6
+ metadata.gz: 975b00b591daa6f1561be0e7541dae0a92d1dbc485419c5981a594903ebfc61bb561936484d52499f776a2d9a01b1ceec2f28a595f4e01d4fec0aad0739b0a6b
7
+ data.tar.gz: c399dbc772ab1764988718efda2c3d6fb291b79dedadf4d468ec68411cbb66ac18293986da19323991e6b9b2ee5ca406bc8b250c52a72beeb12504c1b6a02015
@@ -73,6 +73,7 @@ module TxtTmImporter
73
73
 
74
74
  def import_wordfast_file
75
75
  wordfast_lines.each_with_index do |line, index|
76
+ next if line.empty? || line.gsub(/\s+/, '').empty?
76
77
  line_array = line.split("\t")
77
78
  @doc[:source_language] = line_array[3].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
78
79
  @doc[:target_language] = line_array[5].gsub(/%/, '').gsub(/\s/, '') if index.eql?(0)
@@ -96,7 +97,7 @@ module TxtTmImporter
96
97
  if line.include?('<Seg')
97
98
  write_tu if tu_tracker.eql?(1)
98
99
  tu_tracker = 0 if tu_tracker > 0
99
- language = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty?
100
+ language = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)[^>]+(?=>)/).empty?
100
101
  if role_counter.eql?(0)
101
102
  write_seg(line.scan(/(?<=>).+/)[0], 'source', language)
102
103
  role_counter += 1
@@ -111,7 +112,11 @@ module TxtTmImporter
111
112
 
112
113
  def wordfast_stats
113
114
  lines = wordfast_lines
114
- @doc[:tu][:counter] = lines.size - 1
115
+ lines.each_with_index do |line, index|
116
+ next if line.empty? || line.gsub(/\s+/, '').empty?
117
+ next if index.eql?(0)
118
+ @doc[:tu][:counter] += 1
119
+ end
115
120
  @doc[:seg][:counter] = @doc[:tu][:counter] * 2
116
121
  @doc[:source_language] = lines[0].split("\t")[3].gsub(/%/, '').gsub(/\s/, '')
117
122
  @doc[:target_language] = lines[0].split("\t")[5].gsub(/%/, '').gsub(/\s/, '')
@@ -124,8 +129,8 @@ module TxtTmImporter
124
129
  role_counter = 0
125
130
  @text.each_line do |line|
126
131
  if line.include?('<Seg L=')
127
- @doc[:source_language] = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
128
- @doc[:target_language] = line.scan(/(?<=<Seg L=)\S+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
132
+ @doc[:source_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(0)
133
+ @doc[:target_language] = line.scan(/(?<=<Seg L=)[^>]+(?=>)/)[0] if !line.scan(/(?<=<Seg L=)\S+(?=>)/).empty? && role_counter.eql?(1)
129
134
  role_counter += 1 if role_counter.eql?(0)
130
135
  end
131
136
  @doc[:language_pairs] << [@doc[:source_language], @doc[:target_language]] if !@doc[:source_language].nil? && !@doc[:target_language].nil? && role_counter > 0
@@ -1,3 +1,3 @@
1
1
  module TxtTmImporter
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: txt_tm_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias