youtube-captions 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/youtube-captions.rb +44 -0
  3. metadata +56 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 15c3c25edd3842b8c1e7ca12a867806c80ec2457a02a590deb67a43b3da80e39
4
+ data.tar.gz: 6e90375916db90e5e398e2c7ebbbe4b92b9ceed87e39fff268b20d5331ef4067
5
+ SHA512:
6
+ metadata.gz: 9f57ea785e6bc8300557bbba7e82dfa927940f81b511abf4f3ef8ca7aaa2a71f9664e8ce8f04ab67281c90fecd8d7cb174e94b1ada9be7afcf8a16053b978c18
7
+ data.tar.gz: 78dfd88bf33fd9e403347b7aacfcb800b4a7c225541e3772b1ce1530b9e01db323b2ae9a7084b9b672220112a36fdd641d5057bfce6b4db6e6795375fd367cea
@@ -0,0 +1,44 @@
1
+ require 'cgi'
2
+ require 'httparty'
3
+
4
+ class YoutubeCaptions
5
+ include HTTParty
6
+
7
+ attr_reader :id, :lang
8
+ def initialize(id:, lang: nil)
9
+ @id = id
10
+ @lang = lang
11
+ end
12
+
13
+ TRANSLATABLE_REGEX = /({"captionTracks":.*"isTranslatable":(true|false)}\])/
14
+
15
+ def call
16
+ youtube_html = self.class.get("https://www.youtube.com/watch?v=#{id}")
17
+ match_data = youtube_html.match(TRANSLATABLE_REGEX)
18
+ raise StandardError.new("There are no captions") unless match_data
19
+
20
+ caption_tracks = JSON.parse("#{match_data[1]}}")["captionTracks"]
21
+ if lang.present?
22
+ subtitle = caption_tracks.find {|json| json["vssId"] == ".#{lang}"} || caption_tracks.find {|json| json["vssId"] == "a.#{lang}"}
23
+ if !subtitle || (subtitle && !subtitle["baseUrl"])
24
+ raise ArgumentError.new("Lang no available")
25
+ end
26
+ else
27
+ subtitle = caption_tracks.find {|json| json["vssId"] == ".en"} || caption_tracks.find {|json| json["vssId"] == "a.en"}
28
+ subtitle = caption_tracks.first unless subtitle
29
+ end
30
+
31
+ transcript_html = self.class.get(subtitle["baseUrl"])
32
+ transcript_tags = transcript_html.to_s.gsub('<?xml version="1.0" encoding="utf-8" ?><transcript>', '').gsub('</transcript>', '').split('</text>').select {|line| line && line.strip}
33
+
34
+ transcript_parts = transcript_tags.map do |transcript_tag|
35
+ encoded_transcript = transcript_tag.gsub(/<text.+>/, '')
36
+ .gsub("/&amp;/gi", '&')
37
+ .gsub("/<\/?[^>]+(>|$)/g", '')
38
+ .gsub(/&amp;#(\d+);/) { [$1.to_i].pack('U*') }
39
+ CGI.unescapeHTML(encoded_transcript)
40
+ end
41
+
42
+ transcript_parts.join(" ")
43
+ end
44
+ end
metadata ADDED
@@ -0,0 +1,56 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: youtube-captions
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S.
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-07-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: httparty
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description:
28
+ email:
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/youtube-captions.rb
34
+ homepage:
35
+ licenses: []
36
+ metadata: {}
37
+ post_install_message:
38
+ rdoc_options: []
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubygems_version: 3.4.6
53
+ signing_key:
54
+ specification_version: 4
55
+ summary: A gem to get captions of a youtube video
56
+ test_files: []