konjak 0.0.17 → 0.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5fbbfd98e759c97649e0296aeeed743df368383a
4
- data.tar.gz: 45a1b8046c4a4db79712623cf97c57a57f050205
3
+ metadata.gz: c4ffc2d0548951d62d07f5612bd4399e8a8f6519
4
+ data.tar.gz: 77a83feb0b806e2e6a87b56246c7062fa175f364
5
5
  SHA512:
6
- metadata.gz: 8831f0b9af534fb01182de0d11d3e96a62c62ce255dfabc843ed3e3452f3052a7fbb0e22b4937275889fa8fefaf75b2f63ee4623e1548a60d005a9a33cfd9c5f
7
- data.tar.gz: 1a82b13769650fba60bfdbc08450cd5855923714e3025552e22fb9f7f0ef48add62251b797c9edca13ead13b79abd1823077c2f544aa7324b39e064e57568c33
6
+ metadata.gz: c92f4f5d4328b6fe29e5a92451b7e648796c37ec7c044a58ecfbb70e7620cf67494fce6a8052bbce6901caa85a529a5d35001ccafe236c7dd0b97efcd7e399d3
7
+ data.tar.gz: 995318581a91f8fbc41ddf97258777ba3942e6e7c69ab1b303208be8d7f1aecca44c2b0561c1a10adc70f5055e583223f97ad8fcca7787a8165f85ff37f8a8e8
@@ -17,7 +17,7 @@ module Konjak
17
17
 
18
18
  def segmentize(text)
19
19
  segments = [text]
20
- translation_units.each do |translation_unit|
20
+ translation_units(text).each do |translation_unit|
21
21
  segment = translation_unit.variant(@lang).segment
22
22
 
23
23
  pat = compile_pattern(segment)
@@ -35,13 +35,20 @@ module Konjak
35
35
  private
36
36
 
37
37
  def default_options
38
- {min_segment_length: 10}
38
+ {
39
+ min_segment_length: 10,
40
+ max_segment_length: nil
41
+ }
39
42
  end
40
43
 
41
44
  def min_segment_length
42
45
  @options[:min_segment_length]
43
46
  end
44
47
 
48
+ def max_segment_length
49
+ @options[:max_segment_length]
50
+ end
51
+
45
52
  def split(pat, segment, text)
46
53
  texts = []
47
54
  while true
@@ -62,26 +69,53 @@ module Konjak
62
69
  texts << text
63
70
  end
64
71
 
65
- def translation_units
66
- @tmx.body.translation_units.sort_by {|tu|
72
+ def translation_units(text)
73
+ tus = @tmx.body.translation_units
74
+
75
+ tus.select! {|tu|
76
+ segment = tu.variant(@lang).segment
77
+ segment_length = segment.text.length
78
+
79
+ next false if segment_length < min_segment_length
80
+ next false if max_segment_length && max_segment_length < segment_length
81
+
82
+ text =~ compile_pattern(tu.variant(@lang).segment)
83
+ }
84
+
85
+ simular_translation_units_map = {}
86
+
87
+ tus.sort_by! {|tu|
88
+ tu_segment = tu.variant(@lang).segment
89
+ segment_text = tu_segment.text
90
+
91
+ unless simular_translation_units_map[segment_text]
92
+ simular_translation_units = tus.select {|tu2|
93
+ tu2.variant(@lang).segment.text.include?(segment_text)
94
+ }.sort_by! {|tu2| tu2.variant(@lang).segment.text.size }
95
+
96
+ simular_translation_units.each do |tu2|
97
+ simular_translation_units_map[tu2.variant(@lang).segment.text] = simular_translation_units
98
+ end
99
+ end
100
+
101
+ rank = simular_translation_units_map[segment_text].index {|tu2|
102
+ tu2.variant(@lang).segment.text == segment_text
103
+ }
104
+
67
105
  # GTTの場合
68
106
  translation_timestamp = nil
69
-
70
107
  if tm_entry = tu.at('entry_metadata').try(:at, 'tm_entry')
71
108
  source_info = tm_entry.at('source_info')
72
- if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) == tu.variant(@lang).segment.text
109
+ if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) == segment_text
73
110
  translation_timestamp = tm_entry.at('translation').try(:attr, 'translation_timestamp').to_i
74
111
  end
75
112
  end
76
-
77
113
  translation_timestamp ||= 0
78
114
 
79
- segment_length = tu.variant(@lang).segment.text.length
80
-
81
- [-translation_timestamp, -segment_length]
82
- }.reject {|tu|
83
- tu.variant(@lang).segment.text.length < min_segment_length
115
+ [-rank, -translation_timestamp, -segment_text.length]
84
116
  }
117
+
118
+ tus
85
119
  end
86
120
  end
87
121
  end
@@ -1,3 +1,3 @@
1
1
  module Konjak
2
- VERSION = "0.0.17"
2
+ VERSION = "0.0.18"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: konjak
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.17
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Seiei Higa
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-17 00:00:00.000000000 Z
11
+ date: 2015-09-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
216
216
  version: '0'
217
217
  requirements: []
218
218
  rubyforge_project:
219
- rubygems_version: 2.4.5
219
+ rubygems_version: 2.4.5.1
220
220
  signing_key:
221
221
  specification_version: 4
222
222
  summary: TMX(Translation Memory exChange) tools for ruby