konjak 0.0.17 → 0.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/konjak/tmx_segmentor/strategy.rb +46 -12
- data/lib/konjak/version.rb +1 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: c4ffc2d0548951d62d07f5612bd4399e8a8f6519
         | 
| 4 | 
            +
              data.tar.gz: 77a83feb0b806e2e6a87b56246c7062fa175f364
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c92f4f5d4328b6fe29e5a92451b7e648796c37ec7c044a58ecfbb70e7620cf67494fce6a8052bbce6901caa85a529a5d35001ccafe236c7dd0b97efcd7e399d3
         | 
| 7 | 
            +
              data.tar.gz: 995318581a91f8fbc41ddf97258777ba3942e6e7c69ab1b303208be8d7f1aecca44c2b0561c1a10adc70f5055e583223f97ad8fcca7787a8165f85ff37f8a8e8
         | 
| @@ -17,7 +17,7 @@ module Konjak | |
| 17 17 |  | 
| 18 18 | 
             
                  def segmentize(text)
         | 
| 19 19 | 
             
                    segments = [text]
         | 
| 20 | 
            -
                    translation_units.each do |translation_unit|
         | 
| 20 | 
            +
                    translation_units(text).each do |translation_unit|
         | 
| 21 21 | 
             
                      segment = translation_unit.variant(@lang).segment
         | 
| 22 22 |  | 
| 23 23 | 
             
                      pat = compile_pattern(segment)
         | 
| @@ -35,13 +35,20 @@ module Konjak | |
| 35 35 | 
             
                  private
         | 
| 36 36 |  | 
| 37 37 | 
             
                  def default_options
         | 
| 38 | 
            -
                    { | 
| 38 | 
            +
                    {
         | 
| 39 | 
            +
                      min_segment_length: 10,
         | 
| 40 | 
            +
                      max_segment_length: nil
         | 
| 41 | 
            +
                    }
         | 
| 39 42 | 
             
                  end
         | 
| 40 43 |  | 
| 41 44 | 
             
                  def min_segment_length
         | 
| 42 45 | 
             
                    @options[:min_segment_length]
         | 
| 43 46 | 
             
                  end
         | 
| 44 47 |  | 
| 48 | 
            +
                  def max_segment_length
         | 
| 49 | 
            +
                    @options[:max_segment_length]
         | 
| 50 | 
            +
                  end
         | 
| 51 | 
            +
             | 
| 45 52 | 
             
                  def split(pat, segment, text)
         | 
| 46 53 | 
             
                    texts = []
         | 
| 47 54 | 
             
                    while true
         | 
| @@ -62,26 +69,53 @@ module Konjak | |
| 62 69 | 
             
                    texts << text
         | 
| 63 70 | 
             
                  end
         | 
| 64 71 |  | 
| 65 | 
            -
                  def translation_units
         | 
| 66 | 
            -
                    @tmx.body.translation_units | 
| 72 | 
            +
                  def translation_units(text)
         | 
| 73 | 
            +
                    tus = @tmx.body.translation_units
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                    tus.select! {|tu|
         | 
| 76 | 
            +
                      segment = tu.variant(@lang).segment
         | 
| 77 | 
            +
                      segment_length = segment.text.length
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                      next false if segment_length < min_segment_length
         | 
| 80 | 
            +
                      next false if max_segment_length && max_segment_length < segment_length
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                      text =~ compile_pattern(tu.variant(@lang).segment)
         | 
| 83 | 
            +
                    }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                    simular_translation_units_map = {}
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    tus.sort_by! {|tu|
         | 
| 88 | 
            +
                      tu_segment = tu.variant(@lang).segment
         | 
| 89 | 
            +
                      segment_text = tu_segment.text
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                      unless simular_translation_units_map[segment_text]
         | 
| 92 | 
            +
                        simular_translation_units = tus.select {|tu2|
         | 
| 93 | 
            +
                          tu2.variant(@lang).segment.text.include?(segment_text)
         | 
| 94 | 
            +
                        }.sort_by! {|tu2| tu2.variant(@lang).segment.text.size }
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                        simular_translation_units.each do |tu2|
         | 
| 97 | 
            +
                          simular_translation_units_map[tu2.variant(@lang).segment.text] = simular_translation_units
         | 
| 98 | 
            +
                        end
         | 
| 99 | 
            +
                      end
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                      rank = simular_translation_units_map[segment_text].index {|tu2|
         | 
| 102 | 
            +
                        tu2.variant(@lang).segment.text == segment_text
         | 
| 103 | 
            +
                      }
         | 
| 104 | 
            +
             | 
| 67 105 | 
             
                      # GTTの場合
         | 
| 68 106 | 
             
                      translation_timestamp = nil
         | 
| 69 | 
            -
             | 
| 70 107 | 
             
                      if tm_entry = tu.at('entry_metadata').try(:at, 'tm_entry')
         | 
| 71 108 | 
             
                        source_info = tm_entry.at('source_info')
         | 
| 72 | 
            -
                        if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) ==  | 
| 109 | 
            +
                        if source_info.try(:at, 'source_lang').try(:text) == @lang && source_info.try(:at, 'source').try(:text) == segment_text
         | 
| 73 110 | 
             
                          translation_timestamp = tm_entry.at('translation').try(:attr, 'translation_timestamp').to_i
         | 
| 74 111 | 
             
                        end
         | 
| 75 112 | 
             
                      end
         | 
| 76 | 
            -
             | 
| 77 113 | 
             
                      translation_timestamp ||= 0
         | 
| 78 114 |  | 
| 79 | 
            -
                       | 
| 80 | 
            -
             | 
| 81 | 
            -
                      [-translation_timestamp, -segment_length]
         | 
| 82 | 
            -
                    }.reject {|tu|
         | 
| 83 | 
            -
                      tu.variant(@lang).segment.text.length < min_segment_length
         | 
| 115 | 
            +
                      [-rank, -translation_timestamp, -segment_text.length]
         | 
| 84 116 | 
             
                    }
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                    tus
         | 
| 85 119 | 
             
                  end
         | 
| 86 120 | 
             
                end
         | 
| 87 121 | 
             
              end
         | 
    
        data/lib/konjak/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: konjak
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.18
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Seiei Higa
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-09- | 
| 11 | 
            +
            date: 2015-09-18 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: activesupport
         | 
| @@ -216,7 +216,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 216 216 | 
             
                  version: '0'
         | 
| 217 217 | 
             
            requirements: []
         | 
| 218 218 | 
             
            rubyforge_project: 
         | 
| 219 | 
            -
            rubygems_version: 2.4.5
         | 
| 219 | 
            +
            rubygems_version: 2.4.5.1
         | 
| 220 220 | 
             
            signing_key: 
         | 
| 221 221 | 
             
            specification_version: 4
         | 
| 222 222 | 
             
            summary: TMX(Translation Memory exChange) tools for ruby
         |