translatomatic 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.translatomatic/config.yml +18 -0
- data/.travis.yml +33 -33
- data/Gemfile +6 -4
- data/README.de.md +53 -18
- data/README.es.md +55 -20
- data/README.fr.md +54 -19
- data/README.it.md +58 -23
- data/README.ja.md +54 -19
- data/README.ko.md +58 -23
- data/README.md +167 -141
- data/README.ms.md +51 -16
- data/README.pt.md +58 -23
- data/README.ru.md +53 -18
- data/README.sv.md +53 -18
- data/README.zh.md +53 -18
- data/bin/translatomatic +6 -6
- data/bin/travis +24 -26
- data/config/locales/translatomatic/de.yml +22 -11
- data/config/locales/translatomatic/en.yml +21 -12
- data/config/locales/translatomatic/es.yml +22 -11
- data/config/locales/translatomatic/fr.yml +22 -12
- data/config/locales/translatomatic/it.yml +22 -11
- data/config/locales/translatomatic/ja.yml +22 -11
- data/config/locales/translatomatic/ko.yml +22 -11
- data/config/locales/translatomatic/ms.yml +22 -11
- data/config/locales/translatomatic/pt.yml +22 -11
- data/config/locales/translatomatic/ru.yml +22 -11
- data/config/locales/translatomatic/sv.yml +22 -11
- data/config/locales/translatomatic/zh.yml +22 -11
- data/db/migrate/201712170000_initial.rb +25 -25
- data/lib/translatomatic/cli/base.rb +81 -73
- data/lib/translatomatic/cli/config.rb +110 -81
- data/lib/translatomatic/cli/main.rb +85 -72
- data/lib/translatomatic/cli/translate.rb +141 -106
- data/lib/translatomatic/cli.rb +8 -8
- data/lib/translatomatic/config.rb +302 -155
- data/lib/translatomatic/converter.rb +28 -260
- data/lib/translatomatic/database.rb +134 -134
- data/lib/translatomatic/define_options.rb +22 -0
- data/lib/translatomatic/escaped_unicode.rb +0 -0
- data/lib/translatomatic/extractor/base.rb +16 -16
- data/lib/translatomatic/extractor/ruby.rb +6 -6
- data/lib/translatomatic/extractor.rb +5 -5
- data/lib/translatomatic/file_translator.rb +269 -0
- data/lib/translatomatic/http_request.rb +162 -162
- data/lib/translatomatic/locale.rb +76 -76
- data/lib/translatomatic/logger.rb +23 -23
- data/lib/translatomatic/model/locale.rb +25 -25
- data/lib/translatomatic/model/text.rb +19 -19
- data/lib/translatomatic/model.rb +1 -1
- data/lib/translatomatic/option.rb +37 -41
- data/lib/translatomatic/progress_updater.rb +13 -13
- data/lib/translatomatic/resource_file/base.rb +269 -192
- data/lib/translatomatic/resource_file/csv.rb +37 -0
- data/lib/translatomatic/resource_file/html.rb +54 -47
- data/lib/translatomatic/resource_file/markdown.rb +50 -55
- data/lib/translatomatic/resource_file/plist.rb +153 -19
- data/lib/translatomatic/resource_file/po.rb +107 -0
- data/lib/translatomatic/resource_file/properties.rb +91 -90
- data/lib/translatomatic/resource_file/resw.rb +50 -30
- data/lib/translatomatic/resource_file/subtitle.rb +75 -0
- data/lib/translatomatic/resource_file/text.rb +24 -30
- data/lib/translatomatic/resource_file/xcode_strings.rb +75 -80
- data/lib/translatomatic/resource_file/xml.rb +98 -91
- data/lib/translatomatic/resource_file/yaml.rb +94 -116
- data/lib/translatomatic/resource_file.rb +87 -78
- data/lib/translatomatic/string.rb +188 -188
- data/lib/translatomatic/tmx/document.rb +99 -99
- data/lib/translatomatic/translation_result.rb +63 -63
- data/lib/translatomatic/{converter_stats.rb → translation_stats.rb} +17 -17
- data/lib/translatomatic/translator/base.rb +1 -1
- data/lib/translatomatic/translator/google.rb +2 -0
- data/lib/translatomatic/translator.rb +10 -2
- data/lib/translatomatic/util.rb +45 -45
- data/lib/translatomatic/version.rb +7 -7
- data/lib/translatomatic.rb +52 -49
- data/translatomatic.gemspec +3 -2
- metadata +25 -5
| @@ -1,188 +1,188 @@ | |
| 1 | 
            -
            module Translatomatic
         | 
| 2 | 
            -
              # A string object with an associated locale.
         | 
| 3 | 
            -
              class String
         | 
| 4 | 
            -
             | 
| 5 | 
            -
                # @return [String] The string
         | 
| 6 | 
            -
                attr_reader :value
         | 
| 7 | 
            -
             | 
| 8 | 
            -
                # @return [Translatomatic::Locale] The string's locale
         | 
| 9 | 
            -
                attr_reader :locale
         | 
| 10 | 
            -
             | 
| 11 | 
            -
                # @return [Translatomatic::String] If this string is a substring of
         | 
| 12 | 
            -
                #   another string, returns the original string. Otherwise, returns nil.
         | 
| 13 | 
            -
                attr_reader :parent
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                # @return [Number] If this string is a substring of another string,
         | 
| 16 | 
            -
                #   returns the starting offset of this string in the original.
         | 
| 17 | 
            -
                attr_reader :offset
         | 
| 18 | 
            -
             | 
| 19 | 
            -
                def initialize(value, locale, options = {})
         | 
| 20 | 
            -
                  @value = value.to_s || ''
         | 
| 21 | 
            -
                  @locale = Translatomatic::Locale.parse(locale)
         | 
| 22 | 
            -
                  @offset = options[:offset] || 0
         | 
| 23 | 
            -
                  @parent = options[:parent]
         | 
| 24 | 
            -
                end
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                # @return [String] The value of the string
         | 
| 27 | 
            -
                def to_s
         | 
| 28 | 
            -
                  @value
         | 
| 29 | 
            -
                end
         | 
| 30 | 
            -
             | 
| 31 | 
            -
                # @return [Number] The length of the string
         | 
| 32 | 
            -
                def length
         | 
| 33 | 
            -
                  @value.length
         | 
| 34 | 
            -
                end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                # @return [boolean] True if the string is empty
         | 
| 37 | 
            -
                def empty?
         | 
| 38 | 
            -
                  @value.empty?
         | 
| 39 | 
            -
                end
         | 
| 40 | 
            -
             | 
| 41 | 
            -
                # Invokes value.match
         | 
| 42 | 
            -
                # @param pattern [Regexp,String] The regex pattern to match
         | 
| 43 | 
            -
                # @return [MatchData] Object describing the match, or nil if no match
         | 
| 44 | 
            -
                def match(pattern)
         | 
| 45 | 
            -
                  @value.match(pattern)
         | 
| 46 | 
            -
                end
         | 
| 47 | 
            -
             | 
| 48 | 
            -
                # @return [boolean] true if this string is a substring of another string
         | 
| 49 | 
            -
                def substring?
         | 
| 50 | 
            -
                  @parent ? true : false
         | 
| 51 | 
            -
                end
         | 
| 52 | 
            -
             | 
| 53 | 
            -
                # @return [Symbol] The type of string, corresponding to TMX segtype.
         | 
| 54 | 
            -
                # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
         | 
| 55 | 
            -
                def type
         | 
| 56 | 
            -
                  if sentences.length >= 2
         | 
| 57 | 
            -
                    :paragraph
         | 
| 58 | 
            -
                  else
         | 
| 59 | 
            -
                    script = script_data
         | 
| 60 | 
            -
                    @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
         | 
| 61 | 
            -
                  end
         | 
| 62 | 
            -
                end
         | 
| 63 | 
            -
             | 
| 64 | 
            -
                # Find all sentences in the string
         | 
| 65 | 
            -
                # @return [Array<Translatomatic::String] List of sentences
         | 
| 66 | 
            -
                def sentences
         | 
| 67 | 
            -
                  substrings(sentence_regex)
         | 
| 68 | 
            -
                end
         | 
| 69 | 
            -
             | 
| 70 | 
            -
                # Find all substrings matching the given regex
         | 
| 71 | 
            -
                # @return [Array<Translatomatic::String] List of substrings
         | 
| 72 | 
            -
                def substrings(regex)
         | 
| 73 | 
            -
                  matches = matches(@value, regex)
         | 
| 74 | 
            -
                  strings = []
         | 
| 75 | 
            -
                  matches.each do |match|
         | 
| 76 | 
            -
                    substring = match.to_s
         | 
| 77 | 
            -
                    # find leading and trailing whitespace
         | 
| 78 | 
            -
                    next if substring.length == 0
         | 
| 79 | 
            -
             | 
| 80 | 
            -
                    parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
         | 
| 81 | 
            -
                    value = parts[2]
         | 
| 82 | 
            -
                    offset = match.offset(0)[0]
         | 
| 83 | 
            -
                    offset += parts[1].length  # leading whitespace
         | 
| 84 | 
            -
                    strings << self.class.new(value, locale, offset: offset, parent: self)
         | 
| 85 | 
            -
                  end
         | 
| 86 | 
            -
             | 
| 87 | 
            -
                  # return [self] if there's only one substring and it's equal to self
         | 
| 88 | 
            -
                  strings.length == 1 && strings[0].eql?(self) ? [self] : strings
         | 
| 89 | 
            -
                end
         | 
| 90 | 
            -
             | 
| 91 | 
            -
                # @return [boolean] true if other is a {Translatomatic::String} with
         | 
| 92 | 
            -
                #   the same value and locale.
         | 
| 93 | 
            -
                def eql?(other)
         | 
| 94 | 
            -
                  other.kind_of?(Translatomatic::String) && other.hash == hash
         | 
| 95 | 
            -
                end
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                # (see #eql?)
         | 
| 98 | 
            -
                def ==(other)
         | 
| 99 | 
            -
                  eql?(other)
         | 
| 100 | 
            -
                end
         | 
| 101 | 
            -
             | 
| 102 | 
            -
                # @!visibility private
         | 
| 103 | 
            -
                def hash
         | 
| 104 | 
            -
                  [value, locale].hash
         | 
| 105 | 
            -
                end
         | 
| 106 | 
            -
             | 
| 107 | 
            -
                private
         | 
| 108 | 
            -
             | 
| 109 | 
            -
                # @!visibility private
         | 
| 110 | 
            -
                class Script
         | 
| 111 | 
            -
                  attr_reader :language
         | 
| 112 | 
            -
                  attr_reader :delimiter      # sentence delimiter
         | 
| 113 | 
            -
                  attr_reader :trailing_space # delimiter requires trailing space or eol
         | 
| 114 | 
            -
                  attr_reader :left_to_right  # script direction
         | 
| 115 | 
            -
             | 
| 116 | 
            -
                  def initialize(language:, delimiter:, trailing_space:, direction:)
         | 
| 117 | 
            -
                    @language = language
         | 
| 118 | 
            -
                    @delimiter = delimiter
         | 
| 119 | 
            -
                    @trailing_space = trailing_space
         | 
| 120 | 
            -
                    @left_to_right = direction == :ltr
         | 
| 121 | 
            -
                    raise "invalid direction" unless [:ltr, :rtl].include?(direction)
         | 
| 122 | 
            -
                  end
         | 
| 123 | 
            -
                end
         | 
| 124 | 
            -
             | 
| 125 | 
            -
                SCRIPT_DATA = [
         | 
| 126 | 
            -
                  # [language, delimiter, trailing space, direction]
         | 
| 127 | 
            -
                  # japanese, no space after
         | 
| 128 | 
            -
                  ["ja", "\u3002", false, :ltr],
         | 
| 129 | 
            -
                  # chinese, no space after
         | 
| 130 | 
            -
                  ["zh", "\u3002", false, :ltr],  # can be written any direction
         | 
| 131 | 
            -
                   # armenian, space after
         | 
| 132 | 
            -
                  ["hy", ":", true, :ltr],
         | 
| 133 | 
            -
                  # hindi, space after
         | 
| 134 | 
            -
                  ["hi", "।", true, :ltr],
         | 
| 135 | 
            -
                  # urdu, space after, right to left
         | 
| 136 | 
            -
                  ["ur", "\u06d4", true, :rtl],
         | 
| 137 | 
            -
                  # thai, spaces used to separate sentences
         | 
| 138 | 
            -
                  ["th", "\\s", false, :ltr],
         | 
| 139 | 
            -
                  # arabic, right to left
         | 
| 140 | 
            -
                  ["ar", "\\.", true, :rtl],
         | 
| 141 | 
            -
                  # hebrew, right to left
         | 
| 142 | 
            -
                  ["he", "\\.", true, :rtl],
         | 
| 143 | 
            -
                  # all other languages
         | 
| 144 | 
            -
                  ["default", "\\.", true, :ltr],
         | 
| 145 | 
            -
                ]
         | 
| 146 | 
            -
             | 
| 147 | 
            -
                class << self
         | 
| 148 | 
            -
                  attr_reader :script_data
         | 
| 149 | 
            -
                end
         | 
| 150 | 
            -
             | 
| 151 | 
            -
                begin
         | 
| 152 | 
            -
                  script_data = {}
         | 
| 153 | 
            -
                  SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
         | 
| 154 | 
            -
                    script = Script.new(language: lang, delimiter: delimiter,
         | 
| 155 | 
            -
                      trailing_space: trailing, direction: ltr)
         | 
| 156 | 
            -
                    script_data[lang] = script
         | 
| 157 | 
            -
                  end
         | 
| 158 | 
            -
                  @script_data = script_data
         | 
| 159 | 
            -
                end
         | 
| 160 | 
            -
             | 
| 161 | 
            -
                def matches(s, re)
         | 
| 162 | 
            -
                  start_at = 0
         | 
| 163 | 
            -
                  matches = []
         | 
| 164 | 
            -
                  while(m = s.match(re, start_at))
         | 
| 165 | 
            -
                    break if m.to_s.empty?
         | 
| 166 | 
            -
                    matches.push(m)
         | 
| 167 | 
            -
                    start_at = m.end(0)
         | 
| 168 | 
            -
                  end
         | 
| 169 | 
            -
                  matches
         | 
| 170 | 
            -
                end
         | 
| 171 | 
            -
             | 
| 172 | 
            -
                def sentence_regex
         | 
| 173 | 
            -
                  script = script_data
         | 
| 174 | 
            -
                  if script.trailing_space
         | 
| 175 | 
            -
                    regex = /.*?(?:#{script.delimiter}\s+|\z)/m
         | 
| 176 | 
            -
                  else
         | 
| 177 | 
            -
                    # no trailing space after delimiter
         | 
| 178 | 
            -
                    regex = /.*?(?:#{script.delimiter}|\z)/m
         | 
| 179 | 
            -
                  end
         | 
| 180 | 
            -
                end
         | 
| 181 | 
            -
             | 
| 182 | 
            -
                def script_data
         | 
| 183 | 
            -
                  data = self.class.script_data
         | 
| 184 | 
            -
                  data[locale.language] || data["default"]
         | 
| 185 | 
            -
                end
         | 
| 186 | 
            -
             | 
| 187 | 
            -
              end
         | 
| 188 | 
            -
            end
         | 
| 1 | 
            +
            module Translatomatic
         | 
| 2 | 
            +
              # A string object with an associated locale.
         | 
| 3 | 
            +
              class String
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                # @return [String] The string
         | 
| 6 | 
            +
                attr_reader :value
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                # @return [Translatomatic::Locale] The string's locale
         | 
| 9 | 
            +
                attr_reader :locale
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                # @return [Translatomatic::String] If this string is a substring of
         | 
| 12 | 
            +
                #   another string, returns the original string. Otherwise, returns nil.
         | 
| 13 | 
            +
                attr_reader :parent
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                # @return [Number] If this string is a substring of another string,
         | 
| 16 | 
            +
                #   returns the starting offset of this string in the original.
         | 
| 17 | 
            +
                attr_reader :offset
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def initialize(value, locale, options = {})
         | 
| 20 | 
            +
                  @value = value.to_s || ''
         | 
| 21 | 
            +
                  @locale = Translatomatic::Locale.parse(locale)
         | 
| 22 | 
            +
                  @offset = options[:offset] || 0
         | 
| 23 | 
            +
                  @parent = options[:parent]
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                # @return [String] The value of the string
         | 
| 27 | 
            +
                def to_s
         | 
| 28 | 
            +
                  @value
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                # @return [Number] The length of the string
         | 
| 32 | 
            +
                def length
         | 
| 33 | 
            +
                  @value.length
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                # @return [boolean] True if the string is empty
         | 
| 37 | 
            +
                def empty?
         | 
| 38 | 
            +
                  @value.empty?
         | 
| 39 | 
            +
                end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                # Invokes value.match
         | 
| 42 | 
            +
                # @param pattern [Regexp,String] The regex pattern to match
         | 
| 43 | 
            +
                # @return [MatchData] Object describing the match, or nil if no match
         | 
| 44 | 
            +
                def match(pattern)
         | 
| 45 | 
            +
                  @value.match(pattern)
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                # @return [boolean] true if this string is a substring of another string
         | 
| 49 | 
            +
                def substring?
         | 
| 50 | 
            +
                  @parent ? true : false
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                # @return [Symbol] The type of string, corresponding to TMX segtype.
         | 
| 54 | 
            +
                # @see http://xml.coverpages.org/tmxSpec971212.html#SEGTYPE
         | 
| 55 | 
            +
                def type
         | 
| 56 | 
            +
                  if sentences.length >= 2
         | 
| 57 | 
            +
                    :paragraph
         | 
| 58 | 
            +
                  else
         | 
| 59 | 
            +
                    script = script_data
         | 
| 60 | 
            +
                    @value.strip.match(/#{script.delimiter}\s*$/) ? :sentence : :phrase
         | 
| 61 | 
            +
                  end
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                # Find all sentences in the string
         | 
| 65 | 
            +
                # @return [Array<Translatomatic::String] List of sentences
         | 
| 66 | 
            +
                def sentences
         | 
| 67 | 
            +
                  substrings(sentence_regex)
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                # Find all substrings matching the given regex
         | 
| 71 | 
            +
                # @return [Array<Translatomatic::String] List of substrings
         | 
| 72 | 
            +
                def substrings(regex)
         | 
| 73 | 
            +
                  matches = matches(@value, regex)
         | 
| 74 | 
            +
                  strings = []
         | 
| 75 | 
            +
                  matches.each do |match|
         | 
| 76 | 
            +
                    substring = match.to_s
         | 
| 77 | 
            +
                    # find leading and trailing whitespace
         | 
| 78 | 
            +
                    next if substring.length == 0
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    parts = substring.match(/\A(\s*)(.*?)(\s*)\z/m).to_a
         | 
| 81 | 
            +
                    value = parts[2]
         | 
| 82 | 
            +
                    offset = match.offset(0)[0]
         | 
| 83 | 
            +
                    offset += parts[1].length  # leading whitespace
         | 
| 84 | 
            +
                    strings << self.class.new(value, locale, offset: offset, parent: self)
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                  # return [self] if there's only one substring and it's equal to self
         | 
| 88 | 
            +
                  strings.length == 1 && strings[0].eql?(self) ? [self] : strings
         | 
| 89 | 
            +
                end
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                # @return [boolean] true if other is a {Translatomatic::String} with
         | 
| 92 | 
            +
                #   the same value and locale.
         | 
| 93 | 
            +
                def eql?(other)
         | 
| 94 | 
            +
                  other.kind_of?(Translatomatic::String) && other.hash == hash
         | 
| 95 | 
            +
                end
         | 
| 96 | 
            +
             | 
| 97 | 
            +
                # (see #eql?)
         | 
| 98 | 
            +
                def ==(other)
         | 
| 99 | 
            +
                  eql?(other)
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                # @!visibility private
         | 
| 103 | 
            +
                def hash
         | 
| 104 | 
            +
                  [value, locale].hash
         | 
| 105 | 
            +
                end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                private
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                # @!visibility private
         | 
| 110 | 
            +
                class Script
         | 
| 111 | 
            +
                  attr_reader :language
         | 
| 112 | 
            +
                  attr_reader :delimiter      # sentence delimiter
         | 
| 113 | 
            +
                  attr_reader :trailing_space # delimiter requires trailing space or eol
         | 
| 114 | 
            +
                  attr_reader :left_to_right  # script direction
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  def initialize(language:, delimiter:, trailing_space:, direction:)
         | 
| 117 | 
            +
                    @language = language
         | 
| 118 | 
            +
                    @delimiter = delimiter
         | 
| 119 | 
            +
                    @trailing_space = trailing_space
         | 
| 120 | 
            +
                    @left_to_right = direction == :ltr
         | 
| 121 | 
            +
                    raise "invalid direction" unless [:ltr, :rtl].include?(direction)
         | 
| 122 | 
            +
                  end
         | 
| 123 | 
            +
                end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                SCRIPT_DATA = [
         | 
| 126 | 
            +
                  # [language, delimiter, trailing space, direction]
         | 
| 127 | 
            +
                  # japanese, no space after
         | 
| 128 | 
            +
                  ["ja", "\u3002", false, :ltr],
         | 
| 129 | 
            +
                  # chinese, no space after
         | 
| 130 | 
            +
                  ["zh", "\u3002", false, :ltr],  # can be written any direction
         | 
| 131 | 
            +
                   # armenian, space after
         | 
| 132 | 
            +
                  ["hy", ":", true, :ltr],
         | 
| 133 | 
            +
                  # hindi, space after
         | 
| 134 | 
            +
                  ["hi", "।", true, :ltr],
         | 
| 135 | 
            +
                  # urdu, space after, right to left
         | 
| 136 | 
            +
                  ["ur", "\u06d4", true, :rtl],
         | 
| 137 | 
            +
                  # thai, spaces used to separate sentences
         | 
| 138 | 
            +
                  ["th", "\\s", false, :ltr],
         | 
| 139 | 
            +
                  # arabic, right to left
         | 
| 140 | 
            +
                  ["ar", "\\.", true, :rtl],
         | 
| 141 | 
            +
                  # hebrew, right to left
         | 
| 142 | 
            +
                  ["he", "\\.", true, :rtl],
         | 
| 143 | 
            +
                  # all other languages
         | 
| 144 | 
            +
                  ["default", "\\.", true, :ltr],
         | 
| 145 | 
            +
                ]
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                class << self
         | 
| 148 | 
            +
                  attr_reader :script_data
         | 
| 149 | 
            +
                end
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                begin
         | 
| 152 | 
            +
                  script_data = {}
         | 
| 153 | 
            +
                  SCRIPT_DATA.each do |lang, delimiter, trailing, ltr|
         | 
| 154 | 
            +
                    script = Script.new(language: lang, delimiter: delimiter,
         | 
| 155 | 
            +
                      trailing_space: trailing, direction: ltr)
         | 
| 156 | 
            +
                    script_data[lang] = script
         | 
| 157 | 
            +
                  end
         | 
| 158 | 
            +
                  @script_data = script_data
         | 
| 159 | 
            +
                end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                def matches(s, re)
         | 
| 162 | 
            +
                  start_at = 0
         | 
| 163 | 
            +
                  matches = []
         | 
| 164 | 
            +
                  while(m = s.match(re, start_at))
         | 
| 165 | 
            +
                    break if m.to_s.empty?
         | 
| 166 | 
            +
                    matches.push(m)
         | 
| 167 | 
            +
                    start_at = m.end(0)
         | 
| 168 | 
            +
                  end
         | 
| 169 | 
            +
                  matches
         | 
| 170 | 
            +
                end
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                def sentence_regex
         | 
| 173 | 
            +
                  script = script_data
         | 
| 174 | 
            +
                  if script.trailing_space
         | 
| 175 | 
            +
                    regex = /.*?(?:#{script.delimiter}\s+|\z)/m
         | 
| 176 | 
            +
                  else
         | 
| 177 | 
            +
                    # no trailing space after delimiter
         | 
| 178 | 
            +
                    regex = /.*?(?:#{script.delimiter}|\z)/m
         | 
| 179 | 
            +
                  end
         | 
| 180 | 
            +
                end
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                def script_data
         | 
| 183 | 
            +
                  data = self.class.script_data
         | 
| 184 | 
            +
                  data[locale.language] || data["default"]
         | 
| 185 | 
            +
                end
         | 
| 186 | 
            +
             | 
| 187 | 
            +
              end
         | 
| 188 | 
            +
            end
         | 
| @@ -1,99 +1,99 @@ | |
| 1 | 
            -
            module Translatomatic::TMX
         | 
| 2 | 
            -
              # Translation Memory Exchange document
         | 
| 3 | 
            -
              class Document
         | 
| 4 | 
            -
             | 
| 5 | 
            -
                # Create a new instance
         | 
| 6 | 
            -
                # @param units [Array<TranslationUnit>] A list of translation units
         | 
| 7 | 
            -
                # @param source_locale [Locale] Source locale
         | 
| 8 | 
            -
                # @return [Translatomatic::TMX::Document] a new TMX object
         | 
| 9 | 
            -
                def initialize(units, source_locale)
         | 
| 10 | 
            -
                  units = [units] unless units.kind_of?(Array)
         | 
| 11 | 
            -
                  @units = units
         | 
| 12 | 
            -
                  @source_locale = source_locale
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                # @return [String] An XML string
         | 
| 16 | 
            -
                def to_xml(options = {})
         | 
| 17 | 
            -
                  builder = Nokogiri::XML::Builder.new do |xml|
         | 
| 18 | 
            -
                    dtd = options[:dtd] || TMX_DTD
         | 
| 19 | 
            -
                    xml.doc.create_internal_subset('tmx', nil, dtd)
         | 
| 20 | 
            -
                    xml.tmx(version: "1.4") do
         | 
| 21 | 
            -
                      xml.header(creationtool: "Translatomatic",
         | 
| 22 | 
            -
                        creationtoolversion: Translatomatic::VERSION,
         | 
| 23 | 
            -
                        datatype: "PlainText",
         | 
| 24 | 
            -
                        segtype: "phrase",  # default segtype
         | 
| 25 | 
            -
                        adminlang: @source_locale.to_s,
         | 
| 26 | 
            -
                        srclang: @source_locale.to_s,
         | 
| 27 | 
            -
                        "o-tmf": DEFAULT_OTMF
         | 
| 28 | 
            -
                      )
         | 
| 29 | 
            -
                      xml.body { tmx_body(xml) }
         | 
| 30 | 
            -
                    end
         | 
| 31 | 
            -
                  end
         | 
| 32 | 
            -
                  builder.to_xml
         | 
| 33 | 
            -
                end
         | 
| 34 | 
            -
             | 
| 35 | 
            -
                # Create a TMX document from the given converter
         | 
| 36 | 
            -
                # @param texts [Array<Translatomatic::Model::Text>] List of texts
         | 
| 37 | 
            -
                # @return [Translatomatic::TMX::Document] TMX document
         | 
| 38 | 
            -
                def self.from_texts(texts)
         | 
| 39 | 
            -
                  # group texts by from_text_id to create units
         | 
| 40 | 
            -
                  # source_locale: use from_text.locale
         | 
| 41 | 
            -
                  # origin: use text.translator
         | 
| 42 | 
            -
                  sources = texts.select { |i| i.from_text.nil? }
         | 
| 43 | 
            -
                  source_locales = sources.collect { |i| i.locale }.uniq
         | 
| 44 | 
            -
                  raise t("tmx.multiple_locales") if source_locales.length > 1
         | 
| 45 | 
            -
                  units = units_from_texts(texts)
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                  return new(units, source_locales[0])
         | 
| 48 | 
            -
                end
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                def self.valid?(xml)
         | 
| 51 | 
            -
                  options = Nokogiri::XML::ParseOptions::DTDVALID
         | 
| 52 | 
            -
                  doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
         | 
| 53 | 
            -
                  doc.internal_subset.validate(doc)
         | 
| 54 | 
            -
                end
         | 
| 55 | 
            -
             | 
| 56 | 
            -
                private
         | 
| 57 | 
            -
             | 
| 58 | 
            -
                class << self
         | 
| 59 | 
            -
                  include Translatomatic::Util
         | 
| 60 | 
            -
                end
         | 
| 61 | 
            -
             | 
| 62 | 
            -
                TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
         | 
| 63 | 
            -
                DEFAULT_OTMF = "Translatomatic"
         | 
| 64 | 
            -
             | 
| 65 | 
            -
                def tmx_body(xml)
         | 
| 66 | 
            -
                  @units.each do |unit|
         | 
| 67 | 
            -
                    xml.tu("segtype": unit.strings[0].type) do
         | 
| 68 | 
            -
                      unit.strings.each do |string|
         | 
| 69 | 
            -
                        xml.tuv("xml:lang": string.locale.to_s) do
         | 
| 70 | 
            -
                          xml.seg string.value
         | 
| 71 | 
            -
                        end
         | 
| 72 | 
            -
                      end
         | 
| 73 | 
            -
                    end
         | 
| 74 | 
            -
                  end
         | 
| 75 | 
            -
                end
         | 
| 76 | 
            -
             | 
| 77 | 
            -
                # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
         | 
| 78 | 
            -
                def self.units_from_texts(texts)
         | 
| 79 | 
            -
                  # group texts by from_text_id
         | 
| 80 | 
            -
                  texts_by_from_id = {}
         | 
| 81 | 
            -
                  texts.each do |text|
         | 
| 82 | 
            -
                    id = text.from_text_id || text.id
         | 
| 83 | 
            -
                    list = (texts_by_from_id[id] ||= [])
         | 
| 84 | 
            -
                    list << text
         | 
| 85 | 
            -
                  end
         | 
| 86 | 
            -
             | 
| 87 | 
            -
                  # create list of Translation Units
         | 
| 88 | 
            -
                  texts_by_from_id.values.collect do |list|
         | 
| 89 | 
            -
                    strings = list.uniq.collect { |i| string(i.value, i.locale) }
         | 
| 90 | 
            -
                    tmx_unit(strings)
         | 
| 91 | 
            -
                  end
         | 
| 92 | 
            -
                end
         | 
| 93 | 
            -
             | 
| 94 | 
            -
                def self.tmx_unit(strings)
         | 
| 95 | 
            -
                  Translatomatic::TMX::TranslationUnit.new(strings)
         | 
| 96 | 
            -
                end
         | 
| 97 | 
            -
             | 
| 98 | 
            -
              end # class
         | 
| 99 | 
            -
            end   # module
         | 
| 1 | 
            +
            module Translatomatic::TMX
         | 
| 2 | 
            +
              # Translation Memory Exchange document
         | 
| 3 | 
            +
              class Document
         | 
| 4 | 
            +
             | 
| 5 | 
            +
                # Create a new instance
         | 
| 6 | 
            +
                # @param units [Array<TranslationUnit>] A list of translation units
         | 
| 7 | 
            +
                # @param source_locale [Locale] Source locale
         | 
| 8 | 
            +
                # @return [Translatomatic::TMX::Document] a new TMX object
         | 
| 9 | 
            +
                def initialize(units, source_locale)
         | 
| 10 | 
            +
                  units = [units] unless units.kind_of?(Array)
         | 
| 11 | 
            +
                  @units = units
         | 
| 12 | 
            +
                  @source_locale = source_locale
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                # @return [String] An XML string
         | 
| 16 | 
            +
                def to_xml(options = {})
         | 
| 17 | 
            +
                  builder = Nokogiri::XML::Builder.new do |xml|
         | 
| 18 | 
            +
                    dtd = options[:dtd] || TMX_DTD
         | 
| 19 | 
            +
                    xml.doc.create_internal_subset('tmx', nil, dtd)
         | 
| 20 | 
            +
                    xml.tmx(version: "1.4") do
         | 
| 21 | 
            +
                      xml.header(creationtool: "Translatomatic",
         | 
| 22 | 
            +
                        creationtoolversion: Translatomatic::VERSION,
         | 
| 23 | 
            +
                        datatype: "PlainText",
         | 
| 24 | 
            +
                        segtype: "phrase",  # default segtype
         | 
| 25 | 
            +
                        adminlang: @source_locale.to_s,
         | 
| 26 | 
            +
                        srclang: @source_locale.to_s,
         | 
| 27 | 
            +
                        "o-tmf": DEFAULT_OTMF
         | 
| 28 | 
            +
                      )
         | 
| 29 | 
            +
                      xml.body { tmx_body(xml) }
         | 
| 30 | 
            +
                    end
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
                  builder.to_xml
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                # Create a TMX document from the given converter
         | 
| 36 | 
            +
                # @param texts [Array<Translatomatic::Model::Text>] List of texts
         | 
| 37 | 
            +
                # @return [Translatomatic::TMX::Document] TMX document
         | 
| 38 | 
            +
                def self.from_texts(texts)
         | 
| 39 | 
            +
                  # group texts by from_text_id to create units
         | 
| 40 | 
            +
                  # source_locale: use from_text.locale
         | 
| 41 | 
            +
                  # origin: use text.translator
         | 
| 42 | 
            +
                  sources = texts.select { |i| i.from_text.nil? }
         | 
| 43 | 
            +
                  source_locales = sources.collect { |i| i.locale }.uniq
         | 
| 44 | 
            +
                  raise t("tmx.multiple_locales") if source_locales.length > 1
         | 
| 45 | 
            +
                  units = units_from_texts(texts)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  return new(units, source_locales[0])
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                def self.valid?(xml)
         | 
| 51 | 
            +
                  options = Nokogiri::XML::ParseOptions::DTDVALID
         | 
| 52 | 
            +
                  doc = Nokogiri::XML::Document.parse(xml, nil, nil, options)
         | 
| 53 | 
            +
                  doc.internal_subset.validate(doc)
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                private
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                class << self
         | 
| 59 | 
            +
                  include Translatomatic::Util
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                TMX_DTD = "http://www.ttt.org/oscarstandards/tmx/tmx14.dtd"
         | 
| 63 | 
            +
                DEFAULT_OTMF = "Translatomatic"
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                def tmx_body(xml)
         | 
| 66 | 
            +
                  @units.each do |unit|
         | 
| 67 | 
            +
                    xml.tu("segtype": unit.strings[0].type) do
         | 
| 68 | 
            +
                      unit.strings.each do |string|
         | 
| 69 | 
            +
                        xml.tuv("xml:lang": string.locale.to_s) do
         | 
| 70 | 
            +
                          xml.seg string.value
         | 
| 71 | 
            +
                        end
         | 
| 72 | 
            +
                      end
         | 
| 73 | 
            +
                    end
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                # @return [Array<Translatomatic::TMX::TranslationUnit] translation unit list
         | 
| 78 | 
            +
                def self.units_from_texts(texts)
         | 
| 79 | 
            +
                  # group texts by from_text_id
         | 
| 80 | 
            +
                  texts_by_from_id = {}
         | 
| 81 | 
            +
                  texts.each do |text|
         | 
| 82 | 
            +
                    id = text.from_text_id || text.id
         | 
| 83 | 
            +
                    list = (texts_by_from_id[id] ||= [])
         | 
| 84 | 
            +
                    list << text
         | 
| 85 | 
            +
                  end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                  # create list of Translation Units
         | 
| 88 | 
            +
                  texts_by_from_id.values.collect do |list|
         | 
| 89 | 
            +
                    strings = list.uniq.collect { |i| string(i.value, i.locale) }
         | 
| 90 | 
            +
                    tmx_unit(strings)
         | 
| 91 | 
            +
                  end
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                def self.tmx_unit(strings)
         | 
| 95 | 
            +
                  Translatomatic::TMX::TranslationUnit.new(strings)
         | 
| 96 | 
            +
                end
         | 
| 97 | 
            +
             | 
| 98 | 
            +
              end # class
         | 
| 99 | 
            +
            end   # module
         |