subtitle 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/lib/srt.rb +81 -0
  3. data/lib/subtitle.rb +43 -0
  4. data/lib/vtt.rb +81 -0
  5. metadata +91 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e0908e0ca690672bf633eb1c212c5470aa7316f0e46fb5e224f60e1d17e3b6ee
4
+ data.tar.gz: 47aebdc5c91dd5ef4a93f3699f3c62042dbaeed01fb31b7ec23c386cc8dfe5ef
5
+ SHA512:
6
+ metadata.gz: 45fa0b25384d0547ae54bd84f3acd9cfb06d393616e23aea3a986532a141d94ab9f65c3d215b7688c5e964537fd632d5acca949f53e1ec91634ee9f70d9ca61e
7
+ data.tar.gz: 155b11f1511aeb00d91ba59ea78cf8a432a603ae0f90bcf624828f227eb8abd92e8f162da3b0aae0b5334e75b83d1a18e8bbea71a1abf051866815b7c99f1cdb
data/lib/srt.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'aws-sdk-translate'
2
+ require 'aws-sdk-comprehend'
3
+
4
+ class SRT
5
+ def initialize(awskey, awssecret)
6
+ @translate = Aws::Translate::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
7
+ @comp = Aws::Comprehend::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
8
+ end
9
+
10
+ def translate_text(srt_file, src_lang, dest_lang, out_file)
11
+ ccfile = File.open(srt_file, 'r:UTF-8', &:read)
12
+ outfile = File.open(out_file, "w")
13
+ text_collection = false
14
+ text_sample = ""
15
+ ccfile.each_line do | line |
16
+ if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
17
+ text_collection = true
18
+ outfile.puts line
19
+ elsif line.strip.empty? && !text_sample.empty?
20
+ json_text = JSON.parse(text_sample) rescue nil
21
+ if json_text.nil?
22
+ trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
23
+ outfile.puts trans_resp.translated_text
24
+ outfile.puts
25
+ else
26
+ outfile.puts text_sample
27
+ outfile.puts
28
+ end
29
+ text_sample = ""
30
+ text_collection = false
31
+ elsif text_collection
32
+ text_sample << line
33
+ else
34
+ outfile.puts line
35
+ end
36
+ next
37
+ end
38
+
39
+ if !text_sample.empty?
40
+ trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
41
+ outfile.puts trans_resp.translated_text
42
+ outfile.puts
43
+ outfile.close
44
+ end
45
+ end
46
+
47
+
48
+ def get_text(srt_file, num_chars)
49
+ ccfile = File.open(srt_file, 'r:UTF-8', &:read)
50
+ text_collection = false
51
+ text_sample = ""
52
+ ccfile.each_line do | line |
53
+ line = line
54
+ if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
55
+ text_collection = true
56
+ elsif line.strip.empty?
57
+ text_collection = false
58
+ elsif text_collection && text_sample.length < (num_chars+1)
59
+ text_sample << line
60
+ end
61
+ break if text_sample.length > (num_chars+1)
62
+ next
63
+ end
64
+ return text_sample[0,num_chars]
65
+ end
66
+
67
+ def detect_lang(srt_file)
68
+ lang = nil
69
+ begin
70
+ sample_text = get_text(srt_file, 100)
71
+ response = @comp.detect_dominant_language( {
72
+ text: "#{sample_text}"
73
+ })
74
+ lang = response[:languages][0][:language_code] rescue nil
75
+ rescue => error
76
+ puts "Error while detecting the language!!"
77
+ end
78
+ lang
79
+ end
80
+
81
+ end
data/lib/subtitle.rb ADDED
@@ -0,0 +1,43 @@
1
+ require "srt"
2
+
3
+ class Subtitle
4
+ def initialize(awskey, awssecret, ccfile)
5
+ if awskey.nil? || awssecret.nil? || ccfile.nil?
6
+ raise "Invalid Arguments, please check"
7
+ end
8
+ @ccfile = ccfile
9
+ unless file_valid
10
+ raise "Incorrect File extension"
11
+ end
12
+ begin
13
+ @srt_parser = SRT.new(awskey, awssecret)
14
+ rescue
15
+ raise "Could not initialize Parser!!. Check the Keys supplied."
16
+ end
17
+ end
18
+
19
+ def detect_language
20
+ detected_lang = @srt_parser.detect_lang(@ccfile)
21
+ detected_lang
22
+ end
23
+
24
+ def translate_cc( dest_lang, src_lang = nil, outfile = nil)
25
+ if outfile.nil?
26
+ outfile = "#{@ccfile}_#{dest_lang}"
27
+ end
28
+ if src_lang.nil?
29
+ src_lang = detect_language
30
+ raise "could not detect Source Language!!" if src_lang.nil?
31
+ end
32
+ @srt_parser.translate_text(@ccfile, src_lang, dest_lang, outfile)
33
+ outfile
34
+ end
35
+
36
+ def file_valid
37
+ valid = false
38
+ if @ccfile =~ /^.*\.(srt|vtt)$/
39
+ valid = true
40
+ end
41
+ valid
42
+ end
43
+ end
data/lib/vtt.rb ADDED
@@ -0,0 +1,81 @@
1
+ require 'aws-sdk-translate'
2
+ require 'aws-sdk-comprehend'
3
+
4
+ class VTT
5
+ def initialize(awskey, awssecret)
6
+ @translate = Aws::Translate::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
7
+ @comp = Aws::Comprehend::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
8
+ end
9
+
10
+ def translate_text(srt_file, src_lang, dest_lang, out_file)
11
+ ccfile = File.open(srt_file, 'r:UTF-8', &:read)
12
+ outfile = File.open(out_file, "w")
13
+ text_collection = false
14
+ text_sample = ""
15
+ ccfile.each_line do | line |
16
+ if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
17
+ text_collection = true
18
+ outfile.puts line
19
+ elsif line.strip.empty? && !text_sample.empty?
20
+ json_text = JSON.parse(text_sample) rescue nil
21
+ if json_text.nil?
22
+ trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
23
+ outfile.puts trans_resp.translated_text
24
+ outfile.puts
25
+ else
26
+ outfile.puts text_sample
27
+ outfile.puts
28
+ end
29
+ text_sample = ""
30
+ text_collection = false
31
+ elsif text_collection
32
+ text_sample << line
33
+ else
34
+ outfile.puts line
35
+ end
36
+ next
37
+ end
38
+
39
+ if !text_sample.empty?
40
+ trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
41
+ outfile.puts trans_resp.translated_text
42
+ outfile.puts
43
+ outfile.close
44
+ end
45
+ end
46
+
47
+
48
+ def get_text(srt_file, num_chars)
49
+ ccfile = File.open(srt_file, 'r:UTF-8', &:read)
50
+ text_collection = false
51
+ text_sample = ""
52
+ ccfile.each_line do | line |
53
+ line = line
54
+ if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
55
+ text_collection = true
56
+ elsif line.strip.empty?
57
+ text_collection = false
58
+ elsif text_collection && text_sample.length < (num_chars+1)
59
+ text_sample << line
60
+ end
61
+ break if text_sample.length > (num_chars+1)
62
+ next
63
+ end
64
+ return text_sample[0,num_chars]
65
+ end
66
+
67
+ def detect_lang(srt_file)
68
+ lang = nil
69
+ begin
70
+ sample_text = get_text(srt_file, 100)
71
+ response = @comp.detect_dominant_language( {
72
+ text: "#{sample_text}"
73
+ })
74
+ lang = response[:languages][0][:language_code] rescue nil
75
+ rescue => error
76
+ puts "Error while detecting the language!!"
77
+ end
78
+ lang
79
+ end
80
+
81
+ end
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: subtitle
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.7
5
+ platform: ruby
6
+ authors:
7
+ - Maheshwaran G
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-10-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk-comprehend
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: aws-sdk-translate
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: subtitle gem to detect and translate closed caption for SubRip and WebVTT
56
+ email:
57
+ - pgmaheshwaran@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - lib/srt.rb
63
+ - lib/subtitle.rb
64
+ - lib/vtt.rb
65
+ homepage: https://github.com/cloudaffair/subtitle
66
+ licenses:
67
+ - MIT
68
+ metadata:
69
+ homepage_uri: https://github.com/cloudaffair/subtitle
70
+ source_code_uri: https://github.com/cloudaffair/subtitle
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.3
88
+ signing_key:
89
+ specification_version: 4
90
+ summary: subtitle gem to detect and translate closed caption
91
+ test_files: []