youtube-captions 0.0.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: 15c3c25edd3842b8c1e7ca12a867806c80ec2457a02a590deb67a43b3da80e39
4
- data.tar.gz: 6e90375916db90e5e398e2c7ebbbe4b92b9ceed87e39fff268b20d5331ef4067
2
+ SHA1:
3
+ metadata.gz: b3a75ebb25b8d3a61d08a95226072c4f1dec17a7
4
+ data.tar.gz: c5256c04472b80bf77ac4e56e95c63358040b431
5
5
  SHA512:
6
- metadata.gz: 9f57ea785e6bc8300557bbba7e82dfa927940f81b511abf4f3ef8ca7aaa2a71f9664e8ce8f04ab67281c90fecd8d7cb174e94b1ada9be7afcf8a16053b978c18
7
- data.tar.gz: 78dfd88bf33fd9e403347b7aacfcb800b4a7c225541e3772b1ce1530b9e01db323b2ae9a7084b9b672220112a36fdd641d5057bfce6b4db6e6795375fd367cea
6
+ metadata.gz: 75eb49ab43023fe2f0d4ff291a5a53fc853d0ff567df929be831406230867913d6232249ce01e8bd98540b99ac86e0d6fc9c8f60299e0505f52ade2d57adb486
7
+ data.tar.gz: a3652d02e585ff86b0baf97a8979b7f1b0ef594b4f88c96fcf2a09032b551d8711f161f85e6df288837aa1f8521b46da9a19ccb762cbba59e5b882d9a39639be
@@ -0,0 +1,49 @@
1
+ require 'httparty'
2
+ require 'cgi'
3
+
4
+ module YoutubeCaptions
5
+ class Captions
6
+ include HTTParty
7
+
8
+ attr_reader :info, :lang
9
+ def initialize(info:, lang:)
10
+ @info = info
11
+ @lang = lang
12
+ end
13
+
14
+ def call
15
+ if lang.nil?
16
+ lang_info = default_lang_info
17
+ else
18
+ lang_info = search_lang_info
19
+ end
20
+
21
+ return raise LangNotAvailableError.new("Lang no available") unless lang_info_has_base_url?(lang_info)
22
+
23
+ response = self.class.get(lang_info["baseUrl"])
24
+ captions = response["transcript"]["text"]
25
+
26
+ clean_captions(captions)
27
+ end
28
+
29
+ private
30
+
31
+ def default_lang_info
32
+ info.find {|json| json["kind"] == "asr"} || info.first
33
+ end
34
+
35
+ def search_lang_info
36
+ info.find { |json| json["vssId"] == ".#{lang}"} || info.find {|json| json["vssId"] == "a.#{lang}" }
37
+ end
38
+
39
+ def lang_info_has_base_url?(lang_info)
40
+ lang_info && lang_info["baseUrl"]
41
+ end
42
+
43
+ def clean_captions(captions)
44
+ captions.map do |caption|
45
+ caption.tap { |caption_hash| caption_hash["__content__"] = CGI.unescapeHTML(caption_hash["__content__"]).split.join(" ") }
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,11 @@
1
+ module YoutubeCaptions
2
+ YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v="
3
+ CAPTIONABLE_REGEX = /(\[{"baseUrl":.*"trackName":"(.*?)"}\])/
4
+ YOUTUBE_URL_FORMATS = [
5
+ %r((?:https?://)?youtu\.be/(.+)),
6
+ %r((?:https?://)?(?:www\.)?youtube\.com/watch\?v=(.*?)(&|#|$)),
7
+ %r((?:https?://)?(?:www\.)?youtube\.com/embed/(.*?)(\?|$)),
8
+ %r((?:https?://)?(?:www\.)?youtube\.com/v/(.*?)(#|\?|$)),
9
+ %r((?:https?://)?(?:www\.)?youtube\.com/user/.*?#\w/\w/\w/\w/(.+)\b)
10
+ ]
11
+ end
@@ -0,0 +1,5 @@
1
+ module YoutubeCaptions
2
+ class Error < StandardError; end
3
+ class NoCaptionsAvailableError < Error; end
4
+ class LangNotAvailableError < Error; end
5
+ end
@@ -0,0 +1,20 @@
1
+ require 'httparty'
2
+
3
+ module YoutubeCaptions
4
+ class Info
5
+ include HTTParty
6
+
7
+ attr_reader :id
8
+ def initialize(id:)
9
+ @id = id
10
+ end
11
+
12
+ def call
13
+ youtube_html = self.class.get("#{YoutubeCaptions::YOUTUBE_VIDEO_URL}#{id}")
14
+ match_data = youtube_html.match(YoutubeCaptions::CAPTIONABLE_REGEX)
15
+ return raise NoCaptionsAvailableError.new("No captions available") unless match_data
16
+
17
+ JSON.parse(match_data[1])
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,22 @@
1
+ module YoutubeCaptions
2
+ class Video
3
+ attr_reader :info
4
+ def initialize(id:)
5
+ @info = YoutubeCaptions::Info.new(id: parse_youtube_id(id)).call
6
+ end
7
+
8
+ def captions(lang: nil)
9
+ YoutubeCaptions::Captions.new(info: info, lang: lang).call
10
+ end
11
+
12
+ def available_langs
13
+ info.map { |json| json["languageCode"] }
14
+ end
15
+
16
+ private
17
+
18
+ def parse_youtube_id(id)
19
+ YoutubeCaptions::YOUTUBE_URL_FORMATS.find { |format| id =~ format } && $1 || id
20
+ end
21
+ end
22
+ end
@@ -1,44 +1,8 @@
1
- require 'cgi'
2
- require 'httparty'
1
+ require 'youtube-captions/captions'
2
+ require 'youtube-captions/constants'
3
+ require 'youtube-captions/errors'
4
+ require 'youtube-captions/info'
5
+ require 'youtube-captions/video'
3
6
 
4
- class YoutubeCaptions
5
- include HTTParty
6
-
7
- attr_reader :id, :lang
8
- def initialize(id:, lang: nil)
9
- @id = id
10
- @lang = lang
11
- end
12
-
13
- TRANSLATABLE_REGEX = /({"captionTracks":.*"isTranslatable":(true|false)}\])/
14
-
15
- def call
16
- youtube_html = self.class.get("https://www.youtube.com/watch?v=#{id}")
17
- match_data = youtube_html.match(TRANSLATABLE_REGEX)
18
- raise StandardError.new("There are no captions") unless match_data
19
-
20
- caption_tracks = JSON.parse("#{match_data[1]}}")["captionTracks"]
21
- if lang.present?
22
- subtitle = caption_tracks.find {|json| json["vssId"] == ".#{lang}"} || caption_tracks.find {|json| json["vssId"] == "a.#{lang}"}
23
- if !subtitle || (subtitle && !subtitle["baseUrl"])
24
- raise ArgumentError.new("Lang no available")
25
- end
26
- else
27
- subtitle = caption_tracks.find {|json| json["vssId"] == ".en"} || caption_tracks.find {|json| json["vssId"] == "a.en"}
28
- subtitle = caption_tracks.first unless subtitle
29
- end
30
-
31
- transcript_html = self.class.get(subtitle["baseUrl"])
32
- transcript_tags = transcript_html.to_s.gsub('<?xml version="1.0" encoding="utf-8" ?><transcript>', '').gsub('</transcript>', '').split('</text>').select {|line| line && line.strip}
33
-
34
- transcript_parts = transcript_tags.map do |transcript_tag|
35
- encoded_transcript = transcript_tag.gsub(/<text.+>/, '')
36
- .gsub("/&amp;/gi", '&')
37
- .gsub("/<\/?[^>]+(>|$)/g", '')
38
- .gsub(/&amp;#(\d+);/) { [$1.to_i].pack('U*') }
39
- CGI.unescapeHTML(encoded_transcript)
40
- end
41
-
42
- transcript_parts.join(" ")
43
- end
7
+ module YoutubeCaptions
44
8
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: youtube-captions
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S.
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-07-27 00:00:00.000000000 Z
11
+ date: 2023-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httparty
@@ -24,17 +24,36 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- description:
28
- email:
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description:
42
+ email:
29
43
  executables: []
30
44
  extensions: []
31
45
  extra_rdoc_files: []
32
46
  files:
33
47
  - lib/youtube-captions.rb
34
- homepage:
48
+ - lib/youtube-captions/captions.rb
49
+ - lib/youtube-captions/constants.rb
50
+ - lib/youtube-captions/errors.rb
51
+ - lib/youtube-captions/info.rb
52
+ - lib/youtube-captions/video.rb
53
+ homepage: https://github.com/sevinchek/youtube-captions
35
54
  licenses: []
36
55
  metadata: {}
37
- post_install_message:
56
+ post_install_message:
38
57
  rdoc_options: []
39
58
  require_paths:
40
59
  - lib
@@ -42,15 +61,16 @@ required_ruby_version: !ruby/object:Gem::Requirement
42
61
  requirements:
43
62
  - - ">="
44
63
  - !ruby/object:Gem::Version
45
- version: '0'
64
+ version: 2.3.0
46
65
  required_rubygems_version: !ruby/object:Gem::Requirement
47
66
  requirements:
48
67
  - - ">="
49
68
  - !ruby/object:Gem::Version
50
69
  version: '0'
51
70
  requirements: []
52
- rubygems_version: 3.4.6
53
- signing_key:
71
+ rubyforge_project:
72
+ rubygems_version: 2.5.1
73
+ signing_key:
54
74
  specification_version: 4
55
75
  summary: A gem to get captions of a youtube video
56
76
  test_files: []