subtitle 0.1.8 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
 - data/lib/allfather.rb +83 -0
 - data/lib/dfxp.rb +30 -0
 - data/lib/engines/aws.rb +102 -0
 - data/lib/engines/gcp.rb +0 -0
 - data/lib/engines/translator.rb +58 -0
 - data/lib/scc.rb +43 -23
 - data/lib/srt.rb +86 -61
 - data/lib/subtitle.rb +72 -27
 - data/lib/ttml.rb +180 -0
 - data/lib/vtt.rb +95 -61
 - metadata +21 -26
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: f603ac76acbb145807944c0f948d6550eee197cc
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 7d06b0e8ee047ab1790237fca1c478da03725541
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 204c3af4231e25e6caaa198e9a8b7d46b4f917afcc8abdbce27bccbb94908d28d21c1b0318aa36a1ba63c83c064963e24c7398a99a0cd183c3ca10568fb6fe34
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: fb867912d76f039abf21fd1495c0f9b9139a594cde028f1d50184bd15d007a59113366507b7a85f1039ce0c638d423fbd7cff99d22792f83af59ccede48167dc
         
     | 
    
        data/lib/allfather.rb
    ADDED
    
    | 
         @@ -0,0 +1,83 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # 
         
     | 
| 
      
 2 
     | 
    
         
            +
            # A Module that kind of acts as an interface where the generic methods
         
     | 
| 
      
 3 
     | 
    
         
            +
            # that applies to each caption type can be defined
         
     | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # To use for a new caption type, simply include this module and provide
         
     | 
| 
      
 6 
     | 
    
         
            +
            # caption specific implementations
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            module AllFather
         
     | 
| 
      
 9 
     | 
    
         
            +
              
         
     | 
| 
      
 10 
     | 
    
         
            +
              # 
         
     | 
| 
      
 11 
     | 
    
         
            +
              # Valid file extensions that we support; Keep expanding as we grow
         
     | 
| 
      
 12 
     | 
    
         
            +
              #
         
     | 
| 
      
 13 
     | 
    
         
            +
              VALID_FILES = [".scc", ".srt", ".vtt", ".ttml", ".dfxp"]
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              # 
         
     | 
| 
      
 16 
     | 
    
         
            +
              # Generic exception class that is raised for validation errors
         
     | 
| 
      
 17 
     | 
    
         
            +
              #
         
     | 
| 
      
 18 
     | 
    
         
            +
              class InvalidInputException < StandardError; end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              #
         
     | 
| 
      
 21 
     | 
    
         
            +
              # Lang inference failure exception
         
     | 
| 
      
 22 
     | 
    
         
            +
              #
         
     | 
| 
      
 23 
     | 
    
         
            +
              class LangDetectionFailureException < StandardError; end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
              #
         
     | 
| 
      
 26 
     | 
    
         
            +
              # Method to do basic validations like is this a valid file to even
         
     | 
| 
      
 27 
     | 
    
         
            +
              # accept for any future transactions 
         
     | 
| 
      
 28 
     | 
    
         
            +
              #
         
     | 
| 
      
 29 
     | 
    
         
            +
              # ==== Returns:
         
     | 
| 
      
 30 
     | 
    
         
            +
              # true if the file is valid and false otherwise
         
     | 
| 
      
 31 
     | 
    
         
            +
              #
         
     | 
| 
      
 32 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 33 
     | 
    
         
            +
                raise "Not Implemented. Class #{self.class.name} doesn't implement is_valid?"
         
     | 
| 
      
 34 
     | 
    
         
            +
              end
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
              #
         
     | 
| 
      
 37 
     | 
    
         
            +
              # Method to infer the language(s) of the caption by inspecting the file
         
     | 
| 
      
 38 
     | 
    
         
            +
              # depending on the type of the caption file
         
     | 
| 
      
 39 
     | 
    
         
            +
              #
         
     | 
| 
      
 40 
     | 
    
         
            +
              # ==== Returns
         
     | 
| 
      
 41 
     | 
    
         
            +
              #
         
     | 
| 
      
 42 
     | 
    
         
            +
              # * The ISO 639-1 Letter Language codes
         
     | 
| 
      
 43 
     | 
    
         
            +
              # 
         
     | 
| 
      
 44 
     | 
    
         
            +
              def infer_languages
         
     | 
| 
      
 45 
     | 
    
         
            +
                raise "Not Implemented. Class #{self.class.name} doesn't implement infer_languages"
         
     | 
| 
      
 46 
     | 
    
         
            +
              end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
              #
         
     | 
| 
      
 49 
     | 
    
         
            +
              # Method to translate the caption from one language to another
         
     | 
| 
      
 50 
     | 
    
         
            +
              #
         
     | 
| 
      
 51 
     | 
    
         
            +
              # :args: src_lang, target_lang, output_file
         
     | 
| 
      
 52 
     | 
    
         
            +
              #
         
     | 
| 
      
 53 
     | 
    
         
            +
              # * +input_caption+   - A Valid input caption file. Refer to #is_valid?
         
     | 
| 
      
 54 
     | 
    
         
            +
              # * +src_lang+        - can be inferred using #infer_language method
         
     | 
| 
      
 55 
     | 
    
         
            +
              # * +target_lang+     - Target 2 letter ISO language code to which the source needs to be translated in to.
         
     | 
| 
      
 56 
     | 
    
         
            +
              # * +output_file+     - Output file. Can be a fully qualified path or just file name
         
     | 
| 
      
 57 
     | 
    
         
            +
              #
         
     | 
| 
      
 58 
     | 
    
         
            +
              # ==== Raises
         
     | 
| 
      
 59 
     | 
    
         
            +
              # 
         
     | 
| 
      
 60 
     | 
    
         
            +
              # InvalidInputException shall be raised if
         
     | 
| 
      
 61 
     | 
    
         
            +
              # 1. The input file doesn't exist or is unreadable or is invalid caption
         
     | 
| 
      
 62 
     | 
    
         
            +
              # 2. The output file can't be written
         
     | 
| 
      
 63 
     | 
    
         
            +
              # 3. The target_lang is not a valid ISO 639-1 Letter Language code
         
     | 
| 
      
 64 
     | 
    
         
            +
              #
         
     | 
| 
      
 65 
     | 
    
         
            +
              def translate(src_lang, target_lang, output_file)
         
     | 
| 
      
 66 
     | 
    
         
            +
                # Check if a non empty output file is present and error out to avoid
         
     | 
| 
      
 67 
     | 
    
         
            +
                # the danger or overwriting some important file !!
         
     | 
| 
      
 68 
     | 
    
         
            +
                if File.exists?(output_file) && File.size(output_file) > 0
         
     | 
| 
      
 69 
     | 
    
         
            +
                  raise InvalidInputException.new("Output file #{output_file} is not empty.")
         
     | 
| 
      
 70 
     | 
    
         
            +
                else
         
     | 
| 
      
 71 
     | 
    
         
            +
                  # Just open the file in writable mode and close it just to ensure that
         
     | 
| 
      
 72 
     | 
    
         
            +
                  # we can write the output file
         
     | 
| 
      
 73 
     | 
    
         
            +
                  File.open(output_file, "w") {|f|
         
     | 
| 
      
 74 
     | 
    
         
            +
                  }
         
     | 
| 
      
 75 
     | 
    
         
            +
                end
         
     | 
| 
      
 76 
     | 
    
         
            +
                # Check if the file is writable ?
         
     | 
| 
      
 77 
     | 
    
         
            +
                unless File.writable?(output_file)
         
     | 
| 
      
 78 
     | 
    
         
            +
                  raise InvalidInputException.new("Output file #{output_file} not writable.")
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
                # Further checks can be done only in caption specific implementations
         
     | 
| 
      
 81 
     | 
    
         
            +
                # or translation engine specific implementation
         
     | 
| 
      
 82 
     | 
    
         
            +
              end
         
     | 
| 
      
 83 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/dfxp.rb
    ADDED
    
    | 
         @@ -0,0 +1,30 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative "ttml"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Library to handle DFXP Files
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            # Uses the translator available to do the necessary language operations
         
     | 
| 
      
 9 
     | 
    
         
            +
            # as defined by the AllFather
         
     | 
| 
      
 10 
     | 
    
         
            +
            #
         
     | 
| 
      
 11 
     | 
    
         
            +
            class DFXP < TTML
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            	def initialize(cc_file, translator, opts={})
         
     | 
| 
      
 14 
     | 
    
         
            +
                @cc_file = cc_file
         
     | 
| 
      
 15 
     | 
    
         
            +
                @translator = translator
         
     | 
| 
      
 16 
     | 
    
         
            +
                @force_detect = opts[:force_detect] || false
         
     | 
| 
      
 17 
     | 
    
         
            +
                raise "Invalid TTML file provided" unless is_valid?
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 21 
     | 
    
         
            +
                # Do any VTT specific validations here
         
     | 
| 
      
 22 
     | 
    
         
            +
                if @cc_file =~ /^.*\.(dfxp)$/
         
     | 
| 
      
 23 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                # TODO: Check if it's required to do a File read to see if this
         
     | 
| 
      
 26 
     | 
    
         
            +
                # a well-formed XML. Another is to see if lang is available in each div
         
     | 
| 
      
 27 
     | 
    
         
            +
                return false
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
              
         
     | 
| 
      
 30 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/engines/aws.rb
    ADDED
    
    | 
         @@ -0,0 +1,102 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'aws-sdk'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'aws-sdk'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'translator'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            # 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Provides Language services using Amazon Translate
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            # Module can be intialized using multiple options
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # == Credential Referencing Order
         
     | 
| 
      
 11 
     | 
    
         
            +
            #
         
     | 
| 
      
 12 
     | 
    
         
            +
            # * [Arguments]             - Pass the credentials access_key_id and secret_access_key as arguments
         
     | 
| 
      
 13 
     | 
    
         
            +
            # * [Environment route]     - AWS_ACCESS_KEY_ID & AWS_SECRET_ACCESS_KEY can be exposed as 
         
     | 
| 
      
 14 
     | 
    
         
            +
            # environment variables
         
     | 
| 
      
 15 
     | 
    
         
            +
            # * [Profile Name]          - The application uses the credentials of the system and picks the
         
     | 
| 
      
 16 
     | 
    
         
            +
            # credentials referred to by the profile
         
     | 
| 
      
 17 
     | 
    
         
            +
            #
         
     | 
| 
      
 18 
     | 
    
         
            +
            class AwsEngine
         
     | 
| 
      
 19 
     | 
    
         
            +
              include Translator
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              DEFAULT_REGION = ENV["AWS_DEFAULT_REGION"] || "us-east-1"
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              # 
         
     | 
| 
      
 24 
     | 
    
         
            +
              # :args: options
         
     | 
| 
      
 25 
     | 
    
         
            +
              #
         
     | 
| 
      
 26 
     | 
    
         
            +
              # ==== Arguments
         
     | 
| 
      
 27 
     | 
    
         
            +
              # options can carry the following details
         
     | 
| 
      
 28 
     | 
    
         
            +
              #
         
     | 
| 
      
 29 
     | 
    
         
            +
              # * [:access_key_id]      - access key id
         
     | 
| 
      
 30 
     | 
    
         
            +
              # * [:secret_access_key]  - Secret access key
         
     | 
| 
      
 31 
     | 
    
         
            +
              # * [:env]                - true for using credentials from environment variables
         
     | 
| 
      
 32 
     | 
    
         
            +
              # * [:profile]            - profile name for using shared credentials setup
         
     | 
| 
      
 33 
     | 
    
         
            +
              # * [:region]             - If not provided defaults to us-east-1
         
     | 
| 
      
 34 
     | 
    
         
            +
              #
         
     | 
| 
      
 35 
     | 
    
         
            +
              # ==== raises                 
         
     | 
| 
      
 36 
     | 
    
         
            +
              #
         
     | 
| 
      
 37 
     | 
    
         
            +
              # * EngineInitializationException if credentials cannot be setup due to lack of details
         
     | 
| 
      
 38 
     | 
    
         
            +
              # * Aws Exceptions if profile name is invalid or invalid credentials are passed
         
     | 
| 
      
 39 
     | 
    
         
            +
              # 
         
     | 
| 
      
 40 
     | 
    
         
            +
              def initialize(options)
         
     | 
| 
      
 41 
     | 
    
         
            +
                access_key_id = nil
         
     | 
| 
      
 42 
     | 
    
         
            +
                secret_access_key = nil
         
     | 
| 
      
 43 
     | 
    
         
            +
                @region = options[:region] || DEFAULT_REGION
         
     | 
| 
      
 44 
     | 
    
         
            +
                if options[:env]
         
     | 
| 
      
 45 
     | 
    
         
            +
                  access_key_id = ENV["AWS_ACCESS_KEY_ID"]
         
     | 
| 
      
 46 
     | 
    
         
            +
                  secret_access_key = ENV["AWS_SECRET_ACCESS_KEY"]
         
     | 
| 
      
 47 
     | 
    
         
            +
                elsif options[:access_key_id] && options[:secret_access_key]
         
     | 
| 
      
 48 
     | 
    
         
            +
                  access_key_id = options[:access_key_id]
         
     | 
| 
      
 49 
     | 
    
         
            +
                  secret_access_key = options[:secret_access_key]
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
                if access_key_id && secret_access_key
         
     | 
| 
      
 52 
     | 
    
         
            +
                  Aws.config.update({
         
     | 
| 
      
 53 
     | 
    
         
            +
                    region: options[:region] || DEFAULT_REGION,
         
     | 
| 
      
 54 
     | 
    
         
            +
                    credentials: Aws::Credentials.new(access_key_id, secret_access_key)
         
     | 
| 
      
 55 
     | 
    
         
            +
                  })
         
     | 
| 
      
 56 
     | 
    
         
            +
                elsif options[:profile]
         
     | 
| 
      
 57 
     | 
    
         
            +
                  credentials = Aws::SharedCredentials.new(profile_name: options[:profile])
         
     | 
| 
      
 58 
     | 
    
         
            +
                  Aws.config.update({
         
     | 
| 
      
 59 
     | 
    
         
            +
                    region: @region,
         
     | 
| 
      
 60 
     | 
    
         
            +
                    credentials: credentials.credentials
         
     | 
| 
      
 61 
     | 
    
         
            +
                  })
         
     | 
| 
      
 62 
     | 
    
         
            +
                else
         
     | 
| 
      
 63 
     | 
    
         
            +
                  raise Translator::EngineInitializationException.new(
         
     | 
| 
      
 64 
     | 
    
         
            +
                    "Failed to initialize Aws Engine. Credentials are missing / not provided")
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
                @translate_service  = Aws::Translate::Client.new(region: @region)
         
     | 
| 
      
 67 
     | 
    
         
            +
                @comprehend_service = Aws::Comprehend::Client.new(region: @region)
         
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
              # 
         
     | 
| 
      
 71 
     | 
    
         
            +
              # Invokes the language detection API of AWS and returns only the language
         
     | 
| 
      
 72 
     | 
    
         
            +
              # of the highest score and returns the ISO 639-1 code
         
     | 
| 
      
 73 
     | 
    
         
            +
              #
         
     | 
| 
      
 74 
     | 
    
         
            +
              # :args: text
         
     | 
| 
      
 75 
     | 
    
         
            +
              # 
         
     | 
| 
      
 76 
     | 
    
         
            +
              # ===== Arguments 
         
     | 
| 
      
 77 
     | 
    
         
            +
              # * +text+ - The text for which the language is to be inferred
         
     | 
| 
      
 78 
     | 
    
         
            +
              # 
         
     | 
| 
      
 79 
     | 
    
         
            +
              def infer_language(text)
         
     | 
| 
      
 80 
     | 
    
         
            +
                response = @comprehend_service.detect_dominant_language({ text: "#{text}" })
         
     | 
| 
      
 81 
     | 
    
         
            +
                response[:languages][0][:language_code]
         
     | 
| 
      
 82 
     | 
    
         
            +
              end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
              # 
         
     | 
| 
      
 85 
     | 
    
         
            +
              # Invokes the translation API of AWS and returns the translated text
         
     | 
| 
      
 86 
     | 
    
         
            +
              # as per the arguments provided
         
     | 
| 
      
 87 
     | 
    
         
            +
              # Will Raise exception if a translation cannot be made between the source
         
     | 
| 
      
 88 
     | 
    
         
            +
              # and target language codes or if the lang code is invalid
         
     | 
| 
      
 89 
     | 
    
         
            +
              #
         
     | 
| 
      
 90 
     | 
    
         
            +
              # :args: input_text, src_lang, target_lang
         
     | 
| 
      
 91 
     | 
    
         
            +
              #
         
     | 
| 
      
 92 
     | 
    
         
            +
              # * +input_text+      - The text that needs to be translated
         
     | 
| 
      
 93 
     | 
    
         
            +
              # * +src_lang+        - The source language of the text
         
     | 
| 
      
 94 
     | 
    
         
            +
              # * +target_lang+     - The target language to which the input_text needs to be translated to
         
     | 
| 
      
 95 
     | 
    
         
            +
              #
         
     | 
| 
      
 96 
     | 
    
         
            +
              def translate(input_text, src_lang, target_lang)
         
     | 
| 
      
 97 
     | 
    
         
            +
                response = @translate_service.translate_text({ :text => "#{input_text}" , 
         
     | 
| 
      
 98 
     | 
    
         
            +
                  :source_language_code => "#{src_lang}", :target_language_code => "#{target_lang}"})
         
     | 
| 
      
 99 
     | 
    
         
            +
                response.translated_text
         
     | 
| 
      
 100 
     | 
    
         
            +
              end
         
     | 
| 
      
 101 
     | 
    
         
            +
            end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
    
        data/lib/engines/gcp.rb
    ADDED
    
    | 
         
            File without changes
         
     | 
| 
         @@ -0,0 +1,58 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # 
         
     | 
| 
      
 2 
     | 
    
         
            +
            # A Module that kind of acts as an interface where the methods
         
     | 
| 
      
 3 
     | 
    
         
            +
            # expected out of each vendor is encapsulated into
         
     | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # To use for a new vendor, simply include this module and provide
         
     | 
| 
      
 6 
     | 
    
         
            +
            # caption specific implementations
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            module Translator
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              # 
         
     | 
| 
      
 11 
     | 
    
         
            +
              # Constants For Engines
         
     | 
| 
      
 12 
     | 
    
         
            +
              ENGINE_AWS = 1
         
     | 
| 
      
 13 
     | 
    
         
            +
              ENGINE_GCP = 2
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              #
         
     | 
| 
      
 16 
     | 
    
         
            +
              # Keys for each Engine
         
     | 
| 
      
 17 
     | 
    
         
            +
              AWS_KEYS = [:access_key_id, :secret_access_key, :profile]
         
     | 
| 
      
 18 
     | 
    
         
            +
              GCP_KEYS = [:api_key, :project_id, :creds_path]
         
     | 
| 
      
 19 
     | 
    
         
            +
              
         
     | 
| 
      
 20 
     | 
    
         
            +
              ENGINE_KEYS = {ENGINE_AWS => AWS_KEYS, ENGINE_GCP => GCP_KEYS}
         
     | 
| 
      
 21 
     | 
    
         
            +
              #
         
     | 
| 
      
 22 
     | 
    
         
            +
              # This exception shall be raised when we fail to initialize an
         
     | 
| 
      
 23 
     | 
    
         
            +
              # engine for the purposes of language detection / translation
         
     | 
| 
      
 24 
     | 
    
         
            +
              #
         
     | 
| 
      
 25 
     | 
    
         
            +
              # ==== Example
         
     | 
| 
      
 26 
     | 
    
         
            +
              # * When credentials are not passed
         
     | 
| 
      
 27 
     | 
    
         
            +
              #
         
     | 
| 
      
 28 
     | 
    
         
            +
              class EngineInitializationException < StandardError; end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              #
         
     | 
| 
      
 31 
     | 
    
         
            +
              # Method to infer the language by inspecting the text
         
     | 
| 
      
 32 
     | 
    
         
            +
              # passed as argument
         
     | 
| 
      
 33 
     | 
    
         
            +
              #
         
     | 
| 
      
 34 
     | 
    
         
            +
              # :args: text
         
     | 
| 
      
 35 
     | 
    
         
            +
              #
         
     | 
| 
      
 36 
     | 
    
         
            +
              # * +text+ - String whose language needs to be inferred
         
     | 
| 
      
 37 
     | 
    
         
            +
              #
         
     | 
| 
      
 38 
     | 
    
         
            +
              # ==== Returns
         
     | 
| 
      
 39 
     | 
    
         
            +
              #
         
     | 
| 
      
 40 
     | 
    
         
            +
              # * The ISO 639-1 Letter Language code
         
     | 
| 
      
 41 
     | 
    
         
            +
              # 
         
     | 
| 
      
 42 
     | 
    
         
            +
              def infer_language(text)
         
     | 
| 
      
 43 
     | 
    
         
            +
                raise "Not Implemented. Class #{self.class.name} doesn't implement infer_language"
         
     | 
| 
      
 44 
     | 
    
         
            +
              end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
              #
         
     | 
| 
      
 47 
     | 
    
         
            +
              # Method to translate from given language to another
         
     | 
| 
      
 48 
     | 
    
         
            +
              #
         
     | 
| 
      
 49 
     | 
    
         
            +
              # :args: input_text, src_lang, target_lang, output_file
         
     | 
| 
      
 50 
     | 
    
         
            +
              #
         
     | 
| 
      
 51 
     | 
    
         
            +
              # * +input_text+      - Text which needs to be translated
         
     | 
| 
      
 52 
     | 
    
         
            +
              # * +src_lang+        - can be inferred using #infer_language method
         
     | 
| 
      
 53 
     | 
    
         
            +
              # * +target_lang+     - Target 2 letter ISO language code to which the source needs to be translated in to.
         
     | 
| 
      
 54 
     | 
    
         
            +
              # 
         
     | 
| 
      
 55 
     | 
    
         
            +
              def translate(input_text, src_lang, target_lang)
         
     | 
| 
      
 56 
     | 
    
         
            +
                raise "Not Implemented. Class #{self.class.name} doesn't implement translate"
         
     | 
| 
      
 57 
     | 
    
         
            +
              end
         
     | 
| 
      
 58 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/scc.rb
    CHANGED
    
    | 
         @@ -1,13 +1,47 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Library to handle SCC Files
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # Uses the translator available to do the necessary language operations
         
     | 
| 
      
 8 
     | 
    
         
            +
            # as defined by the AllFather
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
       4 
10 
     | 
    
         
             
            class SCC
         
     | 
| 
       5 
11 
     | 
    
         | 
| 
       6 
     | 
    
         
            -
               
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
              include AllFather
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def initialize(cc_file, translator)
         
     | 
| 
      
 15 
     | 
    
         
            +
                @cc_file = cc_file
         
     | 
| 
      
 16 
     | 
    
         
            +
                @translator = translator
         
     | 
| 
      
 17 
     | 
    
         
            +
                raise "Invalid SCC file provided" unless is_valid?
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 21 
     | 
    
         
            +
                # Do any SCC specific validations here
         
     | 
| 
      
 22 
     | 
    
         
            +
                if @cc_file =~ /^.*\.(scc)$/
         
     | 
| 
      
 23 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                return false
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              def infer_languages
         
     | 
| 
      
 29 
     | 
    
         
            +
                lang = nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                begin
         
     | 
| 
      
 31 
     | 
    
         
            +
                  sample_text = get_text(@cc_file, 100)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  lang = @translator.infer_language(sample_text)
         
     | 
| 
      
 33 
     | 
    
         
            +
                rescue StandardError => e
         
     | 
| 
      
 34 
     | 
    
         
            +
                  puts "Error while detecting the language due to #{e.message}"
         
     | 
| 
      
 35 
     | 
    
         
            +
                end
         
     | 
| 
      
 36 
     | 
    
         
            +
                lang
         
     | 
| 
       9 
37 
     | 
    
         
             
              end
         
     | 
| 
       10 
38 
     | 
    
         | 
| 
      
 39 
     | 
    
         
            +
              def translate(src_lang, dest_lang, out_file)
         
     | 
| 
      
 40 
     | 
    
         
            +
                raise "Not Implemented. Class #{self.class.name} doesn't implement translate yet !!"
         
     | 
| 
      
 41 
     | 
    
         
            +
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
              private
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
       11 
45 
     | 
    
         
             
              def get_text(srt_file, num_chars)
         
     | 
| 
       12 
46 
     | 
    
         
             
                ccfile = File.open(srt_file, 'r:UTF-8', &:read)
         
     | 
| 
       13 
47 
     | 
    
         
             
                text_sample = ""
         
     | 
| 
         @@ -15,12 +49,12 @@ class SCC 
     | 
|
| 
       15 
49 
     | 
    
         
             
                  if line =~ /^\d\d:\d\d:\d\d:\d\d\s/
         
     | 
| 
       16 
50 
     | 
    
         
             
                    scc_text_code = line.gsub(/^\d\d:\d\d:\d\d:\d\d\s/, '')
         
     | 
| 
       17 
51 
     | 
    
         
             
                    text_sample << decode(scc_text_code)
         
     | 
| 
       18 
     | 
    
         
            -
                    if text_sample.length > (num_chars+1)
         
     | 
| 
      
 52 
     | 
    
         
            +
                    if text_sample.length > (num_chars + 1)
         
     | 
| 
       19 
53 
     | 
    
         
             
                      break
         
     | 
| 
       20 
54 
     | 
    
         
             
                    end
         
     | 
| 
       21 
55 
     | 
    
         
             
                  end
         
     | 
| 
       22 
56 
     | 
    
         
             
                end
         
     | 
| 
       23 
     | 
    
         
            -
                return text_sample[0,num_chars]
         
     | 
| 
      
 57 
     | 
    
         
            +
                return text_sample[0, num_chars]
         
     | 
| 
       24 
58 
     | 
    
         
             
              end
         
     | 
| 
       25 
59 
     | 
    
         | 
| 
       26 
60 
     | 
    
         
             
              def decode(scc_code_text)
         
     | 
| 
         @@ -31,7 +65,7 @@ class SCC 
     | 
|
| 
       31 
65 
     | 
    
         
             
                hex_codes.each do | code |
         
     | 
| 
       32 
66 
     | 
    
         
             
                  if ["94", "91", "92", "97", "15", "16", "10", "13"].include?(code)
         
     | 
| 
       33 
67 
     | 
    
         
             
                    skip_next = true
         
     | 
| 
       34 
     | 
    
         
            -
                    skip_count = skip_count +1
         
     | 
| 
      
 68 
     | 
    
         
            +
                    skip_count = skip_count + 1
         
     | 
| 
       35 
69 
     | 
    
         
             
                    next
         
     | 
| 
       36 
70 
     | 
    
         
             
                  end
         
     | 
| 
       37 
71 
     | 
    
         
             
                  if skip_count == 1 && skip_next
         
     | 
| 
         @@ -60,18 +94,4 @@ class SCC 
     | 
|
| 
       60 
94 
     | 
    
         
             
                end
         
     | 
| 
       61 
95 
     | 
    
         
             
                encoded_str
         
     | 
| 
       62 
96 
     | 
    
         
             
              end
         
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
              def detect_lang(scc_file)
         
     | 
| 
       65 
     | 
    
         
            -
                lang = nil
         
     | 
| 
       66 
     | 
    
         
            -
                begin
         
     | 
| 
       67 
     | 
    
         
            -
                  sample_text = get_text(scc_file, 100)
         
     | 
| 
       68 
     | 
    
         
            -
                  response = @comp.detect_dominant_language( {
         
     | 
| 
       69 
     | 
    
         
            -
                                                                 text: "#{sample_text}"
         
     | 
| 
       70 
     | 
    
         
            -
                                                             })
         
     | 
| 
       71 
     | 
    
         
            -
                  lang = response[:languages][0][:language_code] rescue nil
         
     | 
| 
       72 
     | 
    
         
            -
                rescue => error
         
     | 
| 
       73 
     | 
    
         
            -
                  puts "Error while detecting the language!!"
         
     | 
| 
       74 
     | 
    
         
            -
                end
         
     | 
| 
       75 
     | 
    
         
            -
                lang
         
     | 
| 
       76 
     | 
    
         
            -
              end
         
     | 
| 
       77 
     | 
    
         
            -
            end
         
     | 
| 
      
 97 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/srt.rb
    CHANGED
    
    | 
         @@ -1,81 +1,106 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Library to handle SRT Files
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # Uses the translator available to do the necessary language operations
         
     | 
| 
      
 8 
     | 
    
         
            +
            # as defined by the AllFather
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
       4 
10 
     | 
    
         
             
            class SRT
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              include AllFather
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def initialize(cc_file, translator)
         
     | 
| 
      
 15 
     | 
    
         
            +
                @cc_file = cc_file
         
     | 
| 
      
 16 
     | 
    
         
            +
                @translator = translator
         
     | 
| 
      
 17 
     | 
    
         
            +
                raise "Invalid SRT file provided" unless is_valid?
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 21 
     | 
    
         
            +
                # Do any SRT specific validations here
         
     | 
| 
      
 22 
     | 
    
         
            +
                if @cc_file =~ /^.*\.(srt)$/
         
     | 
| 
      
 23 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 24 
     | 
    
         
            +
                end
         
     | 
| 
      
 25 
     | 
    
         
            +
                return false
         
     | 
| 
       8 
26 
     | 
    
         
             
              end
         
     | 
| 
       9 
27 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
              def  
     | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                   
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                     
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                     
     | 
| 
       22 
     | 
    
         
            -
                       
     | 
| 
       23 
     | 
    
         
            -
                       
     | 
| 
      
 28 
     | 
    
         
            +
              def translate(src_lang, dest_lang, out_file)
         
     | 
| 
      
 29 
     | 
    
         
            +
                super(src_lang, dest_lang, out_file)
         
     | 
| 
      
 30 
     | 
    
         
            +
                begin
         
     | 
| 
      
 31 
     | 
    
         
            +
                  ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
         
     | 
| 
      
 32 
     | 
    
         
            +
                  outfile = File.open(out_file, "w")
         
     | 
| 
      
 33 
     | 
    
         
            +
                  text_collection = false
         
     | 
| 
      
 34 
     | 
    
         
            +
                  text_sample = ""
         
     | 
| 
      
 35 
     | 
    
         
            +
                  ccfile.each_line do | line |
         
     | 
| 
      
 36 
     | 
    
         
            +
                    if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
         
     | 
| 
      
 37 
     | 
    
         
            +
                      text_collection = true
         
     | 
| 
      
 38 
     | 
    
         
            +
                      outfile.puts line
         
     | 
| 
      
 39 
     | 
    
         
            +
                    elsif line.strip.empty? && !text_sample.empty?
         
     | 
| 
      
 40 
     | 
    
         
            +
                      json_text = JSON.parse(text_sample) rescue nil
         
     | 
| 
      
 41 
     | 
    
         
            +
                      if json_text.nil?
         
     | 
| 
      
 42 
     | 
    
         
            +
                        trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
         
     | 
| 
      
 43 
     | 
    
         
            +
                        outfile.puts trans_resp
         
     | 
| 
      
 44 
     | 
    
         
            +
                      else
         
     | 
| 
      
 45 
     | 
    
         
            +
                        outfile.puts text_sample
         
     | 
| 
      
 46 
     | 
    
         
            +
                      end
         
     | 
| 
       24 
47 
     | 
    
         
             
                      outfile.puts
         
     | 
| 
      
 48 
     | 
    
         
            +
                      text_sample = ""
         
     | 
| 
      
 49 
     | 
    
         
            +
                      text_collection = false
         
     | 
| 
      
 50 
     | 
    
         
            +
                    elsif text_collection
         
     | 
| 
      
 51 
     | 
    
         
            +
                      text_sample << line
         
     | 
| 
       25 
52 
     | 
    
         
             
                    else
         
     | 
| 
       26 
     | 
    
         
            -
                      outfile.puts  
     | 
| 
       27 
     | 
    
         
            -
                      outfile.puts
         
     | 
| 
      
 53 
     | 
    
         
            +
                      outfile.puts line
         
     | 
| 
       28 
54 
     | 
    
         
             
                    end
         
     | 
| 
       29 
     | 
    
         
            -
                    text_sample = ""
         
     | 
| 
       30 
     | 
    
         
            -
                    text_collection = false
         
     | 
| 
       31 
     | 
    
         
            -
                  elsif text_collection
         
     | 
| 
       32 
     | 
    
         
            -
                    text_sample << line
         
     | 
| 
       33 
     | 
    
         
            -
                  else
         
     | 
| 
       34 
     | 
    
         
            -
                    outfile.puts line
         
     | 
| 
       35 
55 
     | 
    
         
             
                  end
         
     | 
| 
       36 
     | 
    
         
            -
                  next
         
     | 
| 
       37 
     | 
    
         
            -
                end
         
     | 
| 
       38 
56 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
      
 57 
     | 
    
         
            +
                  if !text_sample.empty?
         
     | 
| 
      
 58 
     | 
    
         
            +
                    trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
         
     | 
| 
      
 59 
     | 
    
         
            +
                    outfile.puts trans_resp
         
     | 
| 
      
 60 
     | 
    
         
            +
                    outfile.puts
         
     | 
| 
      
 61 
     | 
    
         
            +
                  end
         
     | 
| 
      
 62 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 63 
     | 
    
         
            +
                  ccfile.close rescue nil
         
     | 
| 
       43 
64 
     | 
    
         
             
                  outfile.close
         
     | 
| 
       44 
65 
     | 
    
         
             
                end
         
     | 
| 
       45 
66 
     | 
    
         
             
              end
         
     | 
| 
       46 
67 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
                 
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                 
     | 
| 
       53 
     | 
    
         
            -
                   
     | 
| 
       54 
     | 
    
         
            -
                  if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
         
     | 
| 
       55 
     | 
    
         
            -
                    text_collection = true
         
     | 
| 
       56 
     | 
    
         
            -
                  elsif line.strip.empty?
         
     | 
| 
       57 
     | 
    
         
            -
                    text_collection = false
         
     | 
| 
       58 
     | 
    
         
            -
                  elsif text_collection && text_sample.length < (num_chars+1)
         
     | 
| 
       59 
     | 
    
         
            -
                    text_sample << line
         
     | 
| 
       60 
     | 
    
         
            -
                  end
         
     | 
| 
       61 
     | 
    
         
            -
                  break if text_sample.length > (num_chars+1)
         
     | 
| 
       62 
     | 
    
         
            -
                  next
         
     | 
| 
      
 68 
     | 
    
         
            +
              def infer_languages
         
     | 
| 
      
 69 
     | 
    
         
            +
                lang = nil
         
     | 
| 
      
 70 
     | 
    
         
            +
                begin
         
     | 
| 
      
 71 
     | 
    
         
            +
                  sample_text = get_text(@cc_file, 100)
         
     | 
| 
      
 72 
     | 
    
         
            +
                  lang = @translator.infer_language(sample_text)
         
     | 
| 
      
 73 
     | 
    
         
            +
                rescue StandardError => e
         
     | 
| 
      
 74 
     | 
    
         
            +
                  puts "Error while detecting the language due to #{e.message}"
         
     | 
| 
       63 
75 
     | 
    
         
             
                end
         
     | 
| 
       64 
     | 
    
         
            -
                 
     | 
| 
      
 76 
     | 
    
         
            +
                [lang]
         
     | 
| 
       65 
77 
     | 
    
         
             
              end
         
     | 
| 
       66 
78 
     | 
    
         | 
| 
       67 
     | 
    
         
            -
               
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
      
 79 
     | 
    
         
            +
              private 
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
              # 
         
     | 
| 
      
 82 
     | 
    
         
            +
              # Method to get a minimal amount of key text that excludes any tags
         
     | 
| 
      
 83 
     | 
    
         
            +
              # or control information for the engine to meaninfully and 
         
     | 
| 
      
 84 
     | 
    
         
            +
              # correctly infer the language being referred to in ths VTT
         
     | 
| 
      
 85 
     | 
    
         
            +
              #
         
     | 
| 
      
 86 
     | 
    
         
            +
              def get_text(srt_file, num_chars)
         
     | 
| 
       69 
87 
     | 
    
         
             
                begin
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
      
 88 
     | 
    
         
            +
                  ccfile = File.open(srt_file, 'r:UTF-8', &:read)
         
     | 
| 
      
 89 
     | 
    
         
            +
                  text_collection = false
         
     | 
| 
      
 90 
     | 
    
         
            +
                  text_sample = ""
         
     | 
| 
      
 91 
     | 
    
         
            +
                  ccfile.each_line do |line|
         
     | 
| 
      
 92 
     | 
    
         
            +
                    if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
         
     | 
| 
      
 93 
     | 
    
         
            +
                      text_collection = true
         
     | 
| 
      
 94 
     | 
    
         
            +
                    elsif line.strip.empty?
         
     | 
| 
      
 95 
     | 
    
         
            +
                      text_collection = false
         
     | 
| 
      
 96 
     | 
    
         
            +
                    elsif text_collection && text_sample.length < (num_chars + 1)
         
     | 
| 
      
 97 
     | 
    
         
            +
                      text_sample << line
         
     | 
| 
      
 98 
     | 
    
         
            +
                    end
         
     | 
| 
      
 99 
     | 
    
         
            +
                    break if text_sample.length > (num_chars + 1)
         
     | 
| 
      
 100 
     | 
    
         
            +
                  end
         
     | 
| 
      
 101 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 102 
     | 
    
         
            +
                  ccfile.close rescue nil
         
     | 
| 
       77 
103 
     | 
    
         
             
                end
         
     | 
| 
       78 
     | 
    
         
            -
                 
     | 
| 
      
 104 
     | 
    
         
            +
                return text_sample[0, num_chars]
         
     | 
| 
       79 
105 
     | 
    
         
             
              end
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
106 
     | 
    
         
             
            end
         
     | 
    
        data/lib/subtitle.rb
    CHANGED
    
    | 
         @@ -1,43 +1,88 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative "srt"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "vtt"
         
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative "scc"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative "ttml"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative "dfxp"
         
     | 
| 
      
 6 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
      
 7 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 8 
     | 
    
         
            +
            require_relative "engines/aws"
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
       2 
10 
     | 
    
         | 
| 
       3 
11 
     | 
    
         
             
            class Subtitle
         
     | 
| 
       4 
     | 
    
         
            -
              def initialize( 
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                 
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
       9 
     | 
    
         
            -
                 
     | 
| 
       10 
     | 
    
         
            -
                  raise "Incorrect File extension"
         
     | 
| 
       11 
     | 
    
         
            -
                end
         
     | 
| 
       12 
     | 
    
         
            -
                begin
         
     | 
| 
       13 
     | 
    
         
            -
                  @srt_parser = SRT.new(awskey, awssecret)
         
     | 
| 
       14 
     | 
    
         
            -
                rescue
         
     | 
| 
       15 
     | 
    
         
            -
                  raise "Could not initialize Parser!!. Check the Keys supplied."
         
     | 
| 
       16 
     | 
    
         
            -
                end
         
     | 
| 
      
 12 
     | 
    
         
            +
              def initialize(options={})
         
     | 
| 
      
 13 
     | 
    
         
            +
                # Infer the caption handler from the extension
         
     | 
| 
      
 14 
     | 
    
         
            +
                @cc_file = options[:cc_file]
         
     | 
| 
      
 15 
     | 
    
         
            +
                raise "Input caption not provided. Please provide the same in :cc_file option" if @cc_file.nil?
         
     | 
| 
      
 16 
     | 
    
         
            +
                translator = get_translator(options)
         
     | 
| 
      
 17 
     | 
    
         
            +
                @handler = get_caption_handler(options, translator)
         
     | 
| 
       17 
18 
     | 
    
         
             
              end
         
     | 
| 
       18 
19 
     | 
    
         | 
| 
       19 
20 
     | 
    
         
             
              def detect_language
         
     | 
| 
       20 
     | 
    
         
            -
                 
     | 
| 
       21 
     | 
    
         
            -
                detected_lang
         
     | 
| 
      
 21 
     | 
    
         
            +
                @handler.infer_languages
         
     | 
| 
       22 
22 
     | 
    
         
             
              end
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
              def  
     | 
| 
      
 24 
     | 
    
         
            +
              def translate(dest_lang, src_lang = nil, outfile = nil)
         
     | 
| 
       25 
25 
     | 
    
         
             
                if outfile.nil?
         
     | 
| 
       26 
     | 
    
         
            -
                  outfile = "#{@ 
     | 
| 
      
 26 
     | 
    
         
            +
                  outfile = "#{@cc_file}_#{dest_lang}"
         
     | 
| 
       27 
27 
     | 
    
         
             
                end
         
     | 
| 
       28 
28 
     | 
    
         
             
                if src_lang.nil?
         
     | 
| 
       29 
     | 
    
         
            -
                  src_lang = detect_language
         
     | 
| 
       30 
     | 
    
         
            -
                  raise " 
     | 
| 
      
 29 
     | 
    
         
            +
                  src_lang = detect_language[0] rescue nil
         
     | 
| 
      
 30 
     | 
    
         
            +
                  raise "Could not detect Source Language!!" if src_lang.nil?
         
     | 
| 
       31 
31 
     | 
    
         
             
                end
         
     | 
| 
       32 
     | 
    
         
            -
                @ 
     | 
| 
      
 32 
     | 
    
         
            +
                @handler.translate(src_lang, dest_lang, outfile)
         
     | 
| 
       33 
33 
     | 
    
         
             
                outfile
         
     | 
| 
       34 
34 
     | 
    
         
             
              end
         
     | 
| 
       35 
35 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
               
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
      
 36 
     | 
    
         
            +
              private
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
              def get_translator(options)
         
     | 
| 
      
 39 
     | 
    
         
            +
                translator = nil
         
     | 
| 
      
 40 
     | 
    
         
            +
                # Try to infer the engine based on the passed options
         
     | 
| 
      
 41 
     | 
    
         
            +
                engine = options[:engine]
         
     | 
| 
      
 42 
     | 
    
         
            +
                unless engine
         
     | 
| 
      
 43 
     | 
    
         
            +
                  engine_props = Translator::ENGINE_KEYS
         
     | 
| 
      
 44 
     | 
    
         
            +
                  engine_props.each do |k, values|
         
     | 
| 
      
 45 
     | 
    
         
            +
                    original_size = values.size 
         
     | 
| 
      
 46 
     | 
    
         
            +
                    diff = values - options.keys
         
     | 
| 
      
 47 
     | 
    
         
            +
                    if diff.size < original_size
         
     | 
| 
      
 48 
     | 
    
         
            +
                      # We have some keys for this engine in options
         
     | 
| 
      
 49 
     | 
    
         
            +
                      engine = k
         
     | 
| 
      
 50 
     | 
    
         
            +
                      break
         
     | 
| 
      
 51 
     | 
    
         
            +
                    end
         
     | 
| 
      
 52 
     | 
    
         
            +
                  end
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
                case engine 
         
     | 
| 
      
 55 
     | 
    
         
            +
                when Translator::ENGINE_AWS
         
     | 
| 
      
 56 
     | 
    
         
            +
                  translator = AwsEngine.new(options)
         
     | 
| 
      
 57 
     | 
    
         
            +
                when Translator::ENGINE_GCP
         
     | 
| 
      
 58 
     | 
    
         
            +
                  raise "GCP is yet to be implemented"
         
     | 
| 
      
 59 
     | 
    
         
            +
                else
         
     | 
| 
      
 60 
     | 
    
         
            +
                  raise "Unable to infer the Translation Engine. Options missing key credential params"
         
     | 
| 
      
 61 
     | 
    
         
            +
                end
         
     | 
| 
      
 62 
     | 
    
         
            +
                translator
         
     | 
| 
      
 63 
     | 
    
         
            +
              end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
              def get_caption_handler(options, translator)
         
     | 
| 
      
 66 
     | 
    
         
            +
                caption_file = options[:cc_file]
         
     | 
| 
      
 67 
     | 
    
         
            +
                extension = File.extname(caption_file)
         
     | 
| 
      
 68 
     | 
    
         
            +
                unless AllFather::VALID_FILES.include?(extension)
         
     | 
| 
      
 69 
     | 
    
         
            +
                  raise "Caption support for #{caption_file} of type #{extension} is not supported yet" 
         
     | 
| 
      
 70 
     | 
    
         
            +
                end
         
     | 
| 
      
 71 
     | 
    
         
            +
                handler = nil
         
     | 
| 
      
 72 
     | 
    
         
            +
                case extension.downcase
         
     | 
| 
      
 73 
     | 
    
         
            +
                when ".scc"
         
     | 
| 
      
 74 
     | 
    
         
            +
                  handler = SCC.new(caption_file, translator)
         
     | 
| 
      
 75 
     | 
    
         
            +
                when ".srt"
         
     | 
| 
      
 76 
     | 
    
         
            +
                  handler = SRT.new(caption_file, translator)
         
     | 
| 
      
 77 
     | 
    
         
            +
                when ".vtt"
         
     | 
| 
      
 78 
     | 
    
         
            +
                  handler = VTT.new(caption_file, translator)
         
     | 
| 
      
 79 
     | 
    
         
            +
                when ".ttml"
         
     | 
| 
      
 80 
     | 
    
         
            +
                  handler = TTML.new(caption_file, translator, {:force_detect => options[:force_detect]})
         
     | 
| 
      
 81 
     | 
    
         
            +
                when ".dfxp"
         
     | 
| 
      
 82 
     | 
    
         
            +
                  handler = DFXP.new(caption_file, translator, {:force_detect => options[:force_detect]})
         
     | 
| 
      
 83 
     | 
    
         
            +
                else
         
     | 
| 
      
 84 
     | 
    
         
            +
                  raise "Cannot handle file type .#{extension}"
         
     | 
| 
       40 
85 
     | 
    
         
             
                end
         
     | 
| 
       41 
     | 
    
         
            -
                 
     | 
| 
      
 86 
     | 
    
         
            +
                handler
         
     | 
| 
       42 
87 
     | 
    
         
             
              end
         
     | 
| 
       43 
     | 
    
         
            -
            end
         
     | 
| 
      
 88 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/ttml.rb
    ADDED
    
    | 
         @@ -0,0 +1,180 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            require "nokogiri"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # Library to handle TTML Files
         
     | 
| 
      
 8 
     | 
    
         
            +
            #
         
     | 
| 
      
 9 
     | 
    
         
            +
            # Uses the translator available to do the necessary language operations
         
     | 
| 
      
 10 
     | 
    
         
            +
            # as defined by the AllFather
         
     | 
| 
      
 11 
     | 
    
         
            +
            #
         
     | 
| 
      
 12 
     | 
    
         
            +
            class TTML
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              include AllFather
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              def initialize(cc_file, translator, opts={})
         
     | 
| 
      
 17 
     | 
    
         
            +
                @cc_file = cc_file
         
     | 
| 
      
 18 
     | 
    
         
            +
                @translator = translator
         
     | 
| 
      
 19 
     | 
    
         
            +
                @force_detect = opts[:force_detect] || false
         
     | 
| 
      
 20 
     | 
    
         
            +
                raise "Invalid TTML file provided" unless is_valid?
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 24 
     | 
    
         
            +
                # Do any VTT specific validations here
         
     | 
| 
      
 25 
     | 
    
         
            +
                if @cc_file =~ /^.*\.(ttml)$/
         
     | 
| 
      
 26 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 27 
     | 
    
         
            +
                end
         
     | 
| 
      
 28 
     | 
    
         
            +
                # TODO: Check if it's required to do a File read to see if this
         
     | 
| 
      
 29 
     | 
    
         
            +
                # a well-formed XML. Another is to see if lang is available in each div
         
     | 
| 
      
 30 
     | 
    
         
            +
                return false
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
              def infer_languages
         
     | 
| 
      
 34 
     | 
    
         
            +
                lang = []
         
     | 
| 
      
 35 
     | 
    
         
            +
                begin
         
     | 
| 
      
 36 
     | 
    
         
            +
                  xml_file = File.open(@cc_file)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  xml_doc  = Nokogiri::XML(xml_file)
         
     | 
| 
      
 38 
     | 
    
         
            +
                  div_objects = xml_doc.css("/tt/body/div")
         
     | 
| 
      
 39 
     | 
    
         
            +
                  div_objects.each_with_index do |div, index|
         
     | 
| 
      
 40 
     | 
    
         
            +
                    # By default, return the lang if specified in the div and 
         
     | 
| 
      
 41 
     | 
    
         
            +
                    # force detect is false
         
     | 
| 
      
 42 
     | 
    
         
            +
                    inferred_lang = div.attributes['lang'].value rescue nil
         
     | 
| 
      
 43 
     | 
    
         
            +
                    if inferred_lang.nil?
         
     | 
| 
      
 44 
     | 
    
         
            +
                      # If lang is not provided in the caption, then override
         
     | 
| 
      
 45 
     | 
    
         
            +
                      # force detect for inferrence
         
     | 
| 
      
 46 
     | 
    
         
            +
                      @force_detect = true
         
     | 
| 
      
 47 
     | 
    
         
            +
                    end
         
     | 
| 
      
 48 
     | 
    
         
            +
                    if @force_detect
         
     | 
| 
      
 49 
     | 
    
         
            +
                      sample_text = get_text(div, 100)
         
     | 
| 
      
 50 
     | 
    
         
            +
                      inferred_lang = @translator.infer_language(sample_text) rescue nil
         
     | 
| 
      
 51 
     | 
    
         
            +
                      if inferred_lang.nil?
         
     | 
| 
      
 52 
     | 
    
         
            +
                        err_msg = "Failed to detect lang for div block number #{index + 1}"
         
     | 
| 
      
 53 
     | 
    
         
            +
                        unless lang.empty?
         
     | 
| 
      
 54 
     | 
    
         
            +
                          err_msg += "; Detected languages before failure are #{lang}"
         
     | 
| 
      
 55 
     | 
    
         
            +
                        end
         
     | 
| 
      
 56 
     | 
    
         
            +
                        raise AllFather::LangDetectionFailureException.new(err_msg)
         
     | 
| 
      
 57 
     | 
    
         
            +
                      end
         
     | 
| 
      
 58 
     | 
    
         
            +
                    end
         
     | 
| 
      
 59 
     | 
    
         
            +
                    lang << inferred_lang
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
      
 61 
     | 
    
         
            +
                rescue StandardError => e
         
     | 
| 
      
 62 
     | 
    
         
            +
                  puts "Error while detecting the language due to #{e.message}"
         
     | 
| 
      
 63 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 64 
     | 
    
         
            +
                  xml_file.close rescue nil
         
     | 
| 
      
 65 
     | 
    
         
            +
                end
         
     | 
| 
      
 66 
     | 
    
         
            +
                return nil if lang.empty?
         
     | 
| 
      
 67 
     | 
    
         
            +
                lang
         
     | 
| 
      
 68 
     | 
    
         
            +
              end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
              def translate(src_lang, dest_lang, out_file)
         
     | 
| 
      
 71 
     | 
    
         
            +
                super(src_lang, dest_lang, out_file)
         
     | 
| 
      
 72 
     | 
    
         
            +
                xml_file = File.open(@cc_file, 'r:UTF-8', &:read)
         
     | 
| 
      
 73 
     | 
    
         
            +
                xml_doc  = Nokogiri::XML(xml_file)
         
     | 
| 
      
 74 
     | 
    
         
            +
                div_objects = xml_doc.css("/tt/body/div")
         
     | 
| 
      
 75 
     | 
    
         
            +
                # Irrespective of what lang the div xml:lang says, infer the lang and then
         
     | 
| 
      
 76 
     | 
    
         
            +
                # check to see if it matches src_lang
         
     | 
| 
      
 77 
     | 
    
         
            +
                matched_div = nil
         
     | 
| 
      
 78 
     | 
    
         
            +
                div_objects.each do |div|
         
     | 
| 
      
 79 
     | 
    
         
            +
                  sample_text = get_text(div, 100)
         
     | 
| 
      
 80 
     | 
    
         
            +
                  inferred_lang = @translator.infer_language(sample_text) rescue nil
         
     | 
| 
      
 81 
     | 
    
         
            +
                  next if inferred_lang.nil?
         
     | 
| 
      
 82 
     | 
    
         
            +
                  if inferred_lang.eql?(src_lang)
         
     | 
| 
      
 83 
     | 
    
         
            +
                    matched_div = div 
         
     | 
| 
      
 84 
     | 
    
         
            +
                    break 
         
     | 
| 
      
 85 
     | 
    
         
            +
                  end
         
     | 
| 
      
 86 
     | 
    
         
            +
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
                if matched_div.nil?
         
     | 
| 
      
 88 
     | 
    
         
            +
                  FileUtils.remove_file(out_file)
         
     | 
| 
      
 89 
     | 
    
         
            +
                  raise AllFather::InvalidInputException.new("Unable to find #{src_lang} language section in TTML")
         
     | 
| 
      
 90 
     | 
    
         
            +
                end
         
     | 
| 
      
 91 
     | 
    
         
            +
                # Update the Lang in the Div
         
     | 
| 
      
 92 
     | 
    
         
            +
                matched_div.lang = dest_lang
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                blocks = matched_div.css("p")
         
     | 
| 
      
 95 
     | 
    
         
            +
                blocks.each do |block|
         
     | 
| 
      
 96 
     | 
    
         
            +
                  # Multiple spaces being stripped off
         
     | 
| 
      
 97 
     | 
    
         
            +
                  text = block.inner_html.strip.gsub(/(\s){2,}/, '')
         
     | 
| 
      
 98 
     | 
    
         
            +
                  text_blocks = get_block_text(text)
         
     | 
| 
      
 99 
     | 
    
         
            +
                  translated_text = ""
         
     | 
| 
      
 100 
     | 
    
         
            +
                  text_blocks.each do |text_block|
         
     | 
| 
      
 101 
     | 
    
         
            +
                    if text_block.start_with?('<') || text_block.empty?
         
     | 
| 
      
 102 
     | 
    
         
            +
                      translated_text << text_block
         
     | 
| 
      
 103 
     | 
    
         
            +
                      next
         
     | 
| 
      
 104 
     | 
    
         
            +
                    end
         
     | 
| 
      
 105 
     | 
    
         
            +
                    translated_resp = @translator.translate(text_block, src_lang, dest_lang)
         
     | 
| 
      
 106 
     | 
    
         
            +
                    translated_text << translated_resp
         
     | 
| 
      
 107 
     | 
    
         
            +
                  end
         
     | 
| 
      
 108 
     | 
    
         
            +
                  block.inner_html = translated_text
         
     | 
| 
      
 109 
     | 
    
         
            +
                end
         
     | 
| 
      
 110 
     | 
    
         
            +
                xml_file.close rescue nil
         
     | 
| 
      
 111 
     | 
    
         
            +
                File.write(out_file, xml_doc)
         
     | 
| 
      
 112 
     | 
    
         
            +
                out_file
         
     | 
| 
      
 113 
     | 
    
         
            +
              end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
              private
         
     | 
| 
      
 116 
     | 
    
         
            +
             
     | 
| 
      
 117 
     | 
    
         
            +
              #
         
     | 
| 
      
 118 
     | 
    
         
            +
              # Method to segregate the data from markups as markups don't need
         
     | 
| 
      
 119 
     | 
    
         
            +
              # translations.
         
     | 
| 
      
 120 
     | 
    
         
            +
              # For example, if the cue block is of the form
         
     | 
| 
      
 121 
     | 
    
         
            +
              # This is a test caption with <span id="1">a test span </span> within a block
         
     | 
| 
      
 122 
     | 
    
         
            +
              # This method returns
         
     | 
| 
      
 123 
     | 
    
         
            +
              # ["This is a test caption with ", "<span id=\"1\">", "a test span ", "</span>", " within a block"]
         
     | 
| 
      
 124 
     | 
    
         
            +
              # as we can infer the markups can be retained as is to avoid translation
         
     | 
| 
      
 125 
     | 
    
         
            +
              #
         
     | 
| 
      
 126 
     | 
    
         
            +
              def get_block_text(text)
         
     | 
| 
      
 127 
     | 
    
         
            +
                data = []
         
     | 
| 
      
 128 
     | 
    
         
            +
                tag_start = tag_end = false
         
     | 
| 
      
 129 
     | 
    
         
            +
                str_length = text.size
         
     | 
| 
      
 130 
     | 
    
         
            +
                text_block = ""
         
     | 
| 
      
 131 
     | 
    
         
            +
                markup_block = ""
         
     | 
| 
      
 132 
     | 
    
         
            +
                for i in 0...text.size do
         
     | 
| 
      
 133 
     | 
    
         
            +
                  if text[i] == '<'
         
     | 
| 
      
 134 
     | 
    
         
            +
                    tag_end = false
         
     | 
| 
      
 135 
     | 
    
         
            +
                    tag_start = true
         
     | 
| 
      
 136 
     | 
    
         
            +
                    markup_block << text[i]
         
     | 
| 
      
 137 
     | 
    
         
            +
                    data << text_block
         
     | 
| 
      
 138 
     | 
    
         
            +
                    text_block = ""
         
     | 
| 
      
 139 
     | 
    
         
            +
                    next 
         
     | 
| 
      
 140 
     | 
    
         
            +
                  elsif text[i] == '>'
         
     | 
| 
      
 141 
     | 
    
         
            +
                    tag_end = true
         
     | 
| 
      
 142 
     | 
    
         
            +
                    tag_start = false
         
     | 
| 
      
 143 
     | 
    
         
            +
                    markup_block << text[i]
         
     | 
| 
      
 144 
     | 
    
         
            +
                    data << markup_block
         
     | 
| 
      
 145 
     | 
    
         
            +
                    markup_block = ""
         
     | 
| 
      
 146 
     | 
    
         
            +
                    next
         
     | 
| 
      
 147 
     | 
    
         
            +
                  end
         
     | 
| 
      
 148 
     | 
    
         
            +
                  if tag_start && !tag_end
         
     | 
| 
      
 149 
     | 
    
         
            +
                    markup_block << text[i]
         
     | 
| 
      
 150 
     | 
    
         
            +
                  else
         
     | 
| 
      
 151 
     | 
    
         
            +
                    text_block << text[i]
         
     | 
| 
      
 152 
     | 
    
         
            +
                  end
         
     | 
| 
      
 153 
     | 
    
         
            +
                end
         
     | 
| 
      
 154 
     | 
    
         
            +
                unless text_block.empty?
         
     | 
| 
      
 155 
     | 
    
         
            +
                  data << text_block
         
     | 
| 
      
 156 
     | 
    
         
            +
                end
         
     | 
| 
      
 157 
     | 
    
         
            +
                data
         
     | 
| 
      
 158 
     | 
    
         
            +
              end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
              # 
         
     | 
| 
      
 161 
     | 
    
         
            +
              # Method to get a minimal amount of key text that excludes any tags
         
     | 
| 
      
 162 
     | 
    
         
            +
              # or control information for the engine to meaninfully and 
         
     | 
| 
      
 163 
     | 
    
         
            +
              # correctly infer the language being referred to in ths TTML
         
     | 
| 
      
 164 
     | 
    
         
            +
              #
         
     | 
| 
      
 165 
     | 
    
         
            +
              def get_text(div, num_chars)
         
     | 
| 
      
 166 
     | 
    
         
            +
                text_sample = ""
         
     | 
| 
      
 167 
     | 
    
         
            +
                blocks = div.css("p")
         
     | 
| 
      
 168 
     | 
    
         
            +
                blocks.each do |block|
         
     | 
| 
      
 169 
     | 
    
         
            +
                  # Multiple spaces being stripped off
         
     | 
| 
      
 170 
     | 
    
         
            +
                  text = block.inner_html.strip.gsub(/(\s){2,}/, '')
         
     | 
| 
      
 171 
     | 
    
         
            +
                  # Strip off html tags (if any)
         
     | 
| 
      
 172 
     | 
    
         
            +
                  text = text.gsub(/(<.*?>)/, ' ')
         
     | 
| 
      
 173 
     | 
    
         
            +
                  text_sample << text
         
     | 
| 
      
 174 
     | 
    
         
            +
                  if text_sample.length > (num_chars + 1)
         
     | 
| 
      
 175 
     | 
    
         
            +
                    break
         
     | 
| 
      
 176 
     | 
    
         
            +
                  end
         
     | 
| 
      
 177 
     | 
    
         
            +
                end
         
     | 
| 
      
 178 
     | 
    
         
            +
                return text_sample[0, num_chars]
         
     | 
| 
      
 179 
     | 
    
         
            +
              end
         
     | 
| 
      
 180 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/vtt.rb
    CHANGED
    
    | 
         @@ -1,81 +1,115 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            require_relative "engines/translator"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require_relative "allfather"
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Library to handle VTT Files
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # Uses the translator available to do the necessary language operations
         
     | 
| 
      
 8 
     | 
    
         
            +
            # as defined by the AllFather
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
       4 
10 
     | 
    
         
             
            class VTT
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
              include AllFather
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
              def initialize(cc_file, translator)
         
     | 
| 
      
 15 
     | 
    
         
            +
                @cc_file = cc_file
         
     | 
| 
      
 16 
     | 
    
         
            +
                @translator = translator
         
     | 
| 
      
 17 
     | 
    
         
            +
                raise "Invalid VTT file provided" unless is_valid?
         
     | 
| 
       8 
18 
     | 
    
         
             
              end
         
     | 
| 
       9 
19 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
              def  
     | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                   
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                     
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
                     
     | 
| 
       22 
     | 
    
         
            -
                       
     | 
| 
       23 
     | 
    
         
            -
                       
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
      
 20 
     | 
    
         
            +
              def translate(src_lang, dest_lang, out_file)
         
     | 
| 
      
 21 
     | 
    
         
            +
                super(src_lang, dest_lang, out_file)
         
     | 
| 
      
 22 
     | 
    
         
            +
                begin
         
     | 
| 
      
 23 
     | 
    
         
            +
                  ccfile = File.open(@cc_file, 'r:UTF-8', &:read)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  outfile = File.open(out_file, "w")
         
     | 
| 
      
 25 
     | 
    
         
            +
                  text_collection = false
         
     | 
| 
      
 26 
     | 
    
         
            +
                  text_sample = ""
         
     | 
| 
      
 27 
     | 
    
         
            +
                  ccfile.each_line do | line |
         
     | 
| 
      
 28 
     | 
    
         
            +
                    if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
         
     | 
| 
      
 29 
     | 
    
         
            +
                      text_collection = true
         
     | 
| 
      
 30 
     | 
    
         
            +
                      outfile.puts line
         
     | 
| 
      
 31 
     | 
    
         
            +
                    elsif line.strip.empty? && !text_sample.empty?
         
     | 
| 
      
 32 
     | 
    
         
            +
                      json_text = JSON.parse(text_sample) rescue nil
         
     | 
| 
      
 33 
     | 
    
         
            +
                      if json_text.nil?
         
     | 
| 
      
 34 
     | 
    
         
            +
                        trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
         
     | 
| 
      
 35 
     | 
    
         
            +
                        outfile.puts trans_resp
         
     | 
| 
      
 36 
     | 
    
         
            +
                        outfile.puts
         
     | 
| 
      
 37 
     | 
    
         
            +
                      else
         
     | 
| 
      
 38 
     | 
    
         
            +
                        outfile.puts text_sample
         
     | 
| 
      
 39 
     | 
    
         
            +
                        outfile.puts
         
     | 
| 
      
 40 
     | 
    
         
            +
                      end
         
     | 
| 
      
 41 
     | 
    
         
            +
                      text_sample = ""
         
     | 
| 
      
 42 
     | 
    
         
            +
                      text_collection = false
         
     | 
| 
      
 43 
     | 
    
         
            +
                    elsif text_collection
         
     | 
| 
      
 44 
     | 
    
         
            +
                      text_sample << line
         
     | 
| 
       25 
45 
     | 
    
         
             
                    else
         
     | 
| 
       26 
     | 
    
         
            -
                      outfile.puts  
     | 
| 
       27 
     | 
    
         
            -
                      outfile.puts
         
     | 
| 
      
 46 
     | 
    
         
            +
                      outfile.puts line
         
     | 
| 
       28 
47 
     | 
    
         
             
                    end
         
     | 
| 
       29 
     | 
    
         
            -
                    text_sample = ""
         
     | 
| 
       30 
     | 
    
         
            -
                    text_collection = false
         
     | 
| 
       31 
     | 
    
         
            -
                  elsif text_collection
         
     | 
| 
       32 
     | 
    
         
            -
                    text_sample << line
         
     | 
| 
       33 
     | 
    
         
            -
                  else
         
     | 
| 
       34 
     | 
    
         
            -
                    outfile.puts line
         
     | 
| 
       35 
48 
     | 
    
         
             
                  end
         
     | 
| 
       36 
     | 
    
         
            -
                  next
         
     | 
| 
       37 
     | 
    
         
            -
                end
         
     | 
| 
       38 
49 
     | 
    
         | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
      
 50 
     | 
    
         
            +
                  if !text_sample.empty?
         
     | 
| 
      
 51 
     | 
    
         
            +
                    trans_resp = @translator.translate(text_sample, src_lang, dest_lang)
         
     | 
| 
      
 52 
     | 
    
         
            +
                    outfile.puts trans_resp
         
     | 
| 
      
 53 
     | 
    
         
            +
                    outfile.puts
         
     | 
| 
      
 54 
     | 
    
         
            +
                  end
         
     | 
| 
      
 55 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 56 
     | 
    
         
            +
                  ccfile.close rescue nil
         
     | 
| 
       43 
57 
     | 
    
         
             
                  outfile.close
         
     | 
| 
       44 
58 
     | 
    
         
             
                end
         
     | 
| 
       45 
59 
     | 
    
         
             
              end
         
     | 
| 
       46 
60 
     | 
    
         | 
| 
      
 61 
     | 
    
         
            +
              #
         
     | 
| 
      
 62 
     | 
    
         
            +
              # Returns the inferred language in an array
         
     | 
| 
      
 63 
     | 
    
         
            +
              #
         
     | 
| 
      
 64 
     | 
    
         
            +
              def infer_languages
         
     | 
| 
      
 65 
     | 
    
         
            +
                lang = nil
         
     | 
| 
      
 66 
     | 
    
         
            +
                begin
         
     | 
| 
      
 67 
     | 
    
         
            +
                  sample_text = get_text(@cc_file, 100)
         
     | 
| 
      
 68 
     | 
    
         
            +
                  lang = @translator.infer_language(sample_text)
         
     | 
| 
      
 69 
     | 
    
         
            +
                rescue StandardError => e
         
     | 
| 
      
 70 
     | 
    
         
            +
                  puts "Error while detecting the language due to #{e.message}"
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
                [lang]
         
     | 
| 
      
 73 
     | 
    
         
            +
              end
         
     | 
| 
       47 
74 
     | 
    
         | 
| 
       48 
     | 
    
         
            -
               
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                 
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                   
     | 
| 
       55 
     | 
    
         
            -
                    text_collection = true
         
     | 
| 
       56 
     | 
    
         
            -
                  elsif line.strip.empty?
         
     | 
| 
       57 
     | 
    
         
            -
                    text_collection = false
         
     | 
| 
       58 
     | 
    
         
            -
                  elsif text_collection && text_sample.length < (num_chars+1)
         
     | 
| 
       59 
     | 
    
         
            -
                    text_sample << line
         
     | 
| 
       60 
     | 
    
         
            -
                  end
         
     | 
| 
       61 
     | 
    
         
            -
                  break if text_sample.length > (num_chars+1)
         
     | 
| 
       62 
     | 
    
         
            -
                  next
         
     | 
| 
      
 75 
     | 
    
         
            +
              # 
         
     | 
| 
      
 76 
     | 
    
         
            +
              # Method to add required set of validations specific to caption type
         
     | 
| 
      
 77 
     | 
    
         
            +
              #
         
     | 
| 
      
 78 
     | 
    
         
            +
              def is_valid?
         
     | 
| 
      
 79 
     | 
    
         
            +
                # Do any VTT specific validations here
         
     | 
| 
      
 80 
     | 
    
         
            +
                if @cc_file =~ /^.*\.(vtt)$/
         
     | 
| 
      
 81 
     | 
    
         
            +
                  return true
         
     | 
| 
       63 
82 
     | 
    
         
             
                end
         
     | 
| 
       64 
     | 
    
         
            -
                 
     | 
| 
      
 83 
     | 
    
         
            +
                # TODO: Check if it's required to do a File read to see if the 1st line is WEBVTT
         
     | 
| 
      
 84 
     | 
    
         
            +
                # to handle cases where invalid file is named with vtt extension
         
     | 
| 
      
 85 
     | 
    
         
            +
                return false
         
     | 
| 
       65 
86 
     | 
    
         
             
              end
         
     | 
| 
       66 
87 
     | 
    
         | 
| 
       67 
     | 
    
         
            -
               
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
      
 88 
     | 
    
         
            +
              private 
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
              # 
         
     | 
| 
      
 91 
     | 
    
         
            +
              # Method to get a minimal amount of key text that excludes any tags
         
     | 
| 
      
 92 
     | 
    
         
            +
              # or control information for the engine to meaninfully and 
         
     | 
| 
      
 93 
     | 
    
         
            +
              # correctly infer the language being referred to in ths VTT
         
     | 
| 
      
 94 
     | 
    
         
            +
              #
         
     | 
| 
      
 95 
     | 
    
         
            +
              def get_text(vtt_file, num_chars)
         
     | 
| 
       69 
96 
     | 
    
         
             
                begin
         
     | 
| 
       70 
     | 
    
         
            -
                   
     | 
| 
       71 
     | 
    
         
            -
                   
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
      
 97 
     | 
    
         
            +
                  ccfile = File.open(vtt_file, 'r:UTF-8', &:read)
         
     | 
| 
      
 98 
     | 
    
         
            +
                  text_collection = false
         
     | 
| 
      
 99 
     | 
    
         
            +
                  text_sample = ""
         
     | 
| 
      
 100 
     | 
    
         
            +
                  ccfile.each_line do |line|
         
     | 
| 
      
 101 
     | 
    
         
            +
                    if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
         
     | 
| 
      
 102 
     | 
    
         
            +
                      text_collection = true
         
     | 
| 
      
 103 
     | 
    
         
            +
                    elsif line.strip.empty?
         
     | 
| 
      
 104 
     | 
    
         
            +
                      text_collection = false
         
     | 
| 
      
 105 
     | 
    
         
            +
                    elsif text_collection && text_sample.length < (num_chars + 1)
         
     | 
| 
      
 106 
     | 
    
         
            +
                      text_sample << line
         
     | 
| 
      
 107 
     | 
    
         
            +
                    end
         
     | 
| 
      
 108 
     | 
    
         
            +
                    break if text_sample.length > (num_chars + 1)
         
     | 
| 
      
 109 
     | 
    
         
            +
                  end
         
     | 
| 
      
 110 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 111 
     | 
    
         
            +
                  ccfile.close rescue nil
         
     | 
| 
       77 
112 
     | 
    
         
             
                end
         
     | 
| 
       78 
     | 
    
         
            -
                 
     | 
| 
      
 113 
     | 
    
         
            +
                return text_sample[0, num_chars]
         
     | 
| 
       79 
114 
     | 
    
         
             
              end
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
115 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,15 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: subtitle
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.4
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Maheshwaran G
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Arunjeyaprasad A J
         
     | 
| 
       8 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2019-10- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2019-10-31 00:00:00.000000000 Z
         
     | 
| 
       12 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
15 
     | 
    
         
             
              name: bundler
         
     | 
| 
         @@ -25,43 +26,37 @@ dependencies: 
     | 
|
| 
       25 
26 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       26 
27 
     | 
    
         
             
                    version: '2.0'
         
     | 
| 
       27 
28 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       28 
     | 
    
         
            -
              name: aws-sdk 
     | 
| 
      
 29 
     | 
    
         
            +
              name: aws-sdk
         
     | 
| 
       29 
30 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       30 
31 
     | 
    
         
             
                requirements:
         
     | 
| 
       31 
     | 
    
         
            -
                - - " 
     | 
| 
       32 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       33 
     | 
    
         
            -
                    version: '0'
         
     | 
| 
       34 
     | 
    
         
            -
              type: :runtime
         
     | 
| 
       35 
     | 
    
         
            -
              prerelease: false
         
     | 
| 
       36 
     | 
    
         
            -
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       37 
     | 
    
         
            -
                requirements:
         
     | 
| 
       38 
     | 
    
         
            -
                - - ">="
         
     | 
| 
       39 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       40 
     | 
    
         
            -
                    version: '0'
         
     | 
| 
       41 
     | 
    
         
            -
            - !ruby/object:Gem::Dependency
         
     | 
| 
       42 
     | 
    
         
            -
              name: aws-sdk-translate
         
     | 
| 
       43 
     | 
    
         
            -
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
       44 
     | 
    
         
            -
                requirements:
         
     | 
| 
       45 
     | 
    
         
            -
                - - ">="
         
     | 
| 
      
 32 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
       46 
33 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       47 
     | 
    
         
            -
                    version: ' 
     | 
| 
       48 
     | 
    
         
            -
              type: : 
     | 
| 
      
 34 
     | 
    
         
            +
                    version: '2.11'
         
     | 
| 
      
 35 
     | 
    
         
            +
              type: :development
         
     | 
| 
       49 
36 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       50 
37 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       51 
38 
     | 
    
         
             
                requirements:
         
     | 
| 
       52 
     | 
    
         
            -
                - - " 
     | 
| 
      
 39 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
       53 
40 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       54 
     | 
    
         
            -
                    version: ' 
     | 
| 
       55 
     | 
    
         
            -
            description:  
     | 
| 
      
 41 
     | 
    
         
            +
                    version: '2.11'
         
     | 
| 
      
 42 
     | 
    
         
            +
            description: Subtitle gem helps you to detect language and translate closed caption
         
     | 
| 
      
 43 
     | 
    
         
            +
              to required language.
         
     | 
| 
       56 
44 
     | 
    
         
             
            email:
         
     | 
| 
       57 
45 
     | 
    
         
             
            - pgmaheshwaran@gmail.com
         
     | 
| 
      
 46 
     | 
    
         
            +
            - arunjeyaprasad@gmail.com
         
     | 
| 
       58 
47 
     | 
    
         
             
            executables: []
         
     | 
| 
       59 
48 
     | 
    
         
             
            extensions: []
         
     | 
| 
       60 
49 
     | 
    
         
             
            extra_rdoc_files: []
         
     | 
| 
       61 
50 
     | 
    
         
             
            files:
         
     | 
| 
      
 51 
     | 
    
         
            +
            - lib/allfather.rb
         
     | 
| 
      
 52 
     | 
    
         
            +
            - lib/dfxp.rb
         
     | 
| 
      
 53 
     | 
    
         
            +
            - lib/engines/aws.rb
         
     | 
| 
      
 54 
     | 
    
         
            +
            - lib/engines/gcp.rb
         
     | 
| 
      
 55 
     | 
    
         
            +
            - lib/engines/translator.rb
         
     | 
| 
       62 
56 
     | 
    
         
             
            - lib/scc.rb
         
     | 
| 
       63 
57 
     | 
    
         
             
            - lib/srt.rb
         
     | 
| 
       64 
58 
     | 
    
         
             
            - lib/subtitle.rb
         
     | 
| 
      
 59 
     | 
    
         
            +
            - lib/ttml.rb
         
     | 
| 
       65 
60 
     | 
    
         
             
            - lib/vtt.rb
         
     | 
| 
       66 
61 
     | 
    
         
             
            homepage: https://github.com/cloudaffair/subtitle
         
     | 
| 
       67 
62 
     | 
    
         
             
            licenses:
         
     | 
| 
         @@ -85,9 +80,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       85 
80 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       86 
81 
     | 
    
         
             
            requirements: []
         
     | 
| 
       87 
82 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       88 
     | 
    
         
            -
            rubygems_version: 2. 
     | 
| 
      
 83 
     | 
    
         
            +
            rubygems_version: 2.5.1
         
     | 
| 
       89 
84 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       90 
85 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       91 
     | 
    
         
            -
            summary:  
     | 
| 
       92 
     | 
    
         
            -
              language
         
     | 
| 
      
 86 
     | 
    
         
            +
            summary: Subtitle gem helps you to detect language and translate closed caption to
         
     | 
| 
      
 87 
     | 
    
         
            +
              required language
         
     | 
| 
       93 
88 
     | 
    
         
             
            test_files: []
         
     |