subtitle 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/srt.rb +81 -0
- data/lib/subtitle.rb +43 -0
- data/lib/vtt.rb +81 -0
- metadata +91 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e0908e0ca690672bf633eb1c212c5470aa7316f0e46fb5e224f60e1d17e3b6ee
|
4
|
+
data.tar.gz: 47aebdc5c91dd5ef4a93f3699f3c62042dbaeed01fb31b7ec23c386cc8dfe5ef
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 45fa0b25384d0547ae54bd84f3acd9cfb06d393616e23aea3a986532a141d94ab9f65c3d215b7688c5e964537fd632d5acca949f53e1ec91634ee9f70d9ca61e
|
7
|
+
data.tar.gz: 155b11f1511aeb00d91ba59ea78cf8a432a603ae0f90bcf624828f227eb8abd92e8f162da3b0aae0b5334e75b83d1a18e8bbea71a1abf051866815b7c99f1cdb
|
data/lib/srt.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'aws-sdk-translate'
|
2
|
+
require 'aws-sdk-comprehend'
|
3
|
+
|
4
|
+
class SRT
|
5
|
+
def initialize(awskey, awssecret)
|
6
|
+
@translate = Aws::Translate::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
|
7
|
+
@comp = Aws::Comprehend::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
|
8
|
+
end
|
9
|
+
|
10
|
+
def translate_text(srt_file, src_lang, dest_lang, out_file)
|
11
|
+
ccfile = File.open(srt_file, 'r:UTF-8', &:read)
|
12
|
+
outfile = File.open(out_file, "w")
|
13
|
+
text_collection = false
|
14
|
+
text_sample = ""
|
15
|
+
ccfile.each_line do | line |
|
16
|
+
if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
|
17
|
+
text_collection = true
|
18
|
+
outfile.puts line
|
19
|
+
elsif line.strip.empty? && !text_sample.empty?
|
20
|
+
json_text = JSON.parse(text_sample) rescue nil
|
21
|
+
if json_text.nil?
|
22
|
+
trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
|
23
|
+
outfile.puts trans_resp.translated_text
|
24
|
+
outfile.puts
|
25
|
+
else
|
26
|
+
outfile.puts text_sample
|
27
|
+
outfile.puts
|
28
|
+
end
|
29
|
+
text_sample = ""
|
30
|
+
text_collection = false
|
31
|
+
elsif text_collection
|
32
|
+
text_sample << line
|
33
|
+
else
|
34
|
+
outfile.puts line
|
35
|
+
end
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
if !text_sample.empty?
|
40
|
+
trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
|
41
|
+
outfile.puts trans_resp.translated_text
|
42
|
+
outfile.puts
|
43
|
+
outfile.close
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def get_text(srt_file, num_chars)
|
49
|
+
ccfile = File.open(srt_file, 'r:UTF-8', &:read)
|
50
|
+
text_collection = false
|
51
|
+
text_sample = ""
|
52
|
+
ccfile.each_line do | line |
|
53
|
+
line = line
|
54
|
+
if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
|
55
|
+
text_collection = true
|
56
|
+
elsif line.strip.empty?
|
57
|
+
text_collection = false
|
58
|
+
elsif text_collection && text_sample.length < (num_chars+1)
|
59
|
+
text_sample << line
|
60
|
+
end
|
61
|
+
break if text_sample.length > (num_chars+1)
|
62
|
+
next
|
63
|
+
end
|
64
|
+
return text_sample[0,num_chars]
|
65
|
+
end
|
66
|
+
|
67
|
+
def detect_lang(srt_file)
|
68
|
+
lang = nil
|
69
|
+
begin
|
70
|
+
sample_text = get_text(srt_file, 100)
|
71
|
+
response = @comp.detect_dominant_language( {
|
72
|
+
text: "#{sample_text}"
|
73
|
+
})
|
74
|
+
lang = response[:languages][0][:language_code] rescue nil
|
75
|
+
rescue => error
|
76
|
+
puts "Error while detecting the language!!"
|
77
|
+
end
|
78
|
+
lang
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
data/lib/subtitle.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "srt"
|
2
|
+
|
3
|
+
class Subtitle
|
4
|
+
def initialize(awskey, awssecret, ccfile)
|
5
|
+
if awskey.nil? || awssecret.nil? || ccfile.nil?
|
6
|
+
raise "Invalid Arguments, please check"
|
7
|
+
end
|
8
|
+
@ccfile = ccfile
|
9
|
+
unless file_valid
|
10
|
+
raise "Incorrect File extension"
|
11
|
+
end
|
12
|
+
begin
|
13
|
+
@srt_parser = SRT.new(awskey, awssecret)
|
14
|
+
rescue
|
15
|
+
raise "Could not initialize Parser!!. Check the Keys supplied."
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def detect_language
|
20
|
+
detected_lang = @srt_parser.detect_lang(@ccfile)
|
21
|
+
detected_lang
|
22
|
+
end
|
23
|
+
|
24
|
+
def translate_cc( dest_lang, src_lang = nil, outfile = nil)
|
25
|
+
if outfile.nil?
|
26
|
+
outfile = "#{@ccfile}_#{dest_lang}"
|
27
|
+
end
|
28
|
+
if src_lang.nil?
|
29
|
+
src_lang = detect_language
|
30
|
+
raise "could not detect Source Language!!" if src_lang.nil?
|
31
|
+
end
|
32
|
+
@srt_parser.translate_text(@ccfile, src_lang, dest_lang, outfile)
|
33
|
+
outfile
|
34
|
+
end
|
35
|
+
|
36
|
+
def file_valid
|
37
|
+
valid = false
|
38
|
+
if @ccfile =~ /^.*\.(srt|vtt)$/
|
39
|
+
valid = true
|
40
|
+
end
|
41
|
+
valid
|
42
|
+
end
|
43
|
+
end
|
data/lib/vtt.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'aws-sdk-translate'
|
2
|
+
require 'aws-sdk-comprehend'
|
3
|
+
|
4
|
+
class VTT
|
5
|
+
def initialize(awskey, awssecret)
|
6
|
+
@translate = Aws::Translate::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
|
7
|
+
@comp = Aws::Comprehend::Client.new(:access_key_id => "#{awskey}", :secret_access_key => "#{awssecret}")
|
8
|
+
end
|
9
|
+
|
10
|
+
def translate_text(srt_file, src_lang, dest_lang, out_file)
|
11
|
+
ccfile = File.open(srt_file, 'r:UTF-8', &:read)
|
12
|
+
outfile = File.open(out_file, "w")
|
13
|
+
text_collection = false
|
14
|
+
text_sample = ""
|
15
|
+
ccfile.each_line do | line |
|
16
|
+
if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
|
17
|
+
text_collection = true
|
18
|
+
outfile.puts line
|
19
|
+
elsif line.strip.empty? && !text_sample.empty?
|
20
|
+
json_text = JSON.parse(text_sample) rescue nil
|
21
|
+
if json_text.nil?
|
22
|
+
trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
|
23
|
+
outfile.puts trans_resp.translated_text
|
24
|
+
outfile.puts
|
25
|
+
else
|
26
|
+
outfile.puts text_sample
|
27
|
+
outfile.puts
|
28
|
+
end
|
29
|
+
text_sample = ""
|
30
|
+
text_collection = false
|
31
|
+
elsif text_collection
|
32
|
+
text_sample << line
|
33
|
+
else
|
34
|
+
outfile.puts line
|
35
|
+
end
|
36
|
+
next
|
37
|
+
end
|
38
|
+
|
39
|
+
if !text_sample.empty?
|
40
|
+
trans_resp = @translate.translate_text({ :text => "#{text_sample}" , :source_language_code => "#{src_lang}", :target_language_code => "#{dest_lang}"})
|
41
|
+
outfile.puts trans_resp.translated_text
|
42
|
+
outfile.puts
|
43
|
+
outfile.close
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def get_text(srt_file, num_chars)
|
49
|
+
ccfile = File.open(srt_file, 'r:UTF-8', &:read)
|
50
|
+
text_collection = false
|
51
|
+
text_sample = ""
|
52
|
+
ccfile.each_line do | line |
|
53
|
+
line = line
|
54
|
+
if line =~ /^(\d\d:)\d\d:\d\d[,.]\d\d\d.*-->.*(\d\d:)\d\d:\d\d[,.]\d\d\d/
|
55
|
+
text_collection = true
|
56
|
+
elsif line.strip.empty?
|
57
|
+
text_collection = false
|
58
|
+
elsif text_collection && text_sample.length < (num_chars+1)
|
59
|
+
text_sample << line
|
60
|
+
end
|
61
|
+
break if text_sample.length > (num_chars+1)
|
62
|
+
next
|
63
|
+
end
|
64
|
+
return text_sample[0,num_chars]
|
65
|
+
end
|
66
|
+
|
67
|
+
def detect_lang(srt_file)
|
68
|
+
lang = nil
|
69
|
+
begin
|
70
|
+
sample_text = get_text(srt_file, 100)
|
71
|
+
response = @comp.detect_dominant_language( {
|
72
|
+
text: "#{sample_text}"
|
73
|
+
})
|
74
|
+
lang = response[:languages][0][:language_code] rescue nil
|
75
|
+
rescue => error
|
76
|
+
puts "Error while detecting the language!!"
|
77
|
+
end
|
78
|
+
lang
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
metadata
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: subtitle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.7
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Maheshwaran G
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2019-10-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: aws-sdk-comprehend
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: aws-sdk-translate
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
description: subtitle gem to detect and translate closed caption for SubRip and WebVTT
|
56
|
+
email:
|
57
|
+
- pgmaheshwaran@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- lib/srt.rb
|
63
|
+
- lib/subtitle.rb
|
64
|
+
- lib/vtt.rb
|
65
|
+
homepage: https://github.com/cloudaffair/subtitle
|
66
|
+
licenses:
|
67
|
+
- MIT
|
68
|
+
metadata:
|
69
|
+
homepage_uri: https://github.com/cloudaffair/subtitle
|
70
|
+
source_code_uri: https://github.com/cloudaffair/subtitle
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubyforge_project:
|
87
|
+
rubygems_version: 2.7.3
|
88
|
+
signing_key:
|
89
|
+
specification_version: 4
|
90
|
+
summary: subtitle gem to detect and translate closed caption
|
91
|
+
test_files: []
|