RubyGems - rubytube - Versions diffs - 0.1.0 - Mend

rubytube 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.standard.yml +3 -0
data/CHANGELOG.md +5 -0
data/CODE_OF_CONDUCT.md +84 -0
data/Gemfile +12 -0
data/LICENSE.txt +21 -0
data/README.md +19 -0
data/Rakefile +10 -0
data/lib/rubytube/cipher.rb +369 -0
data/lib/rubytube/client.rb +165 -0
data/lib/rubytube/extractor.rb +177 -0
data/lib/rubytube/innertube.rb +105 -0
data/lib/rubytube/monostate.rb +5 -0
data/lib/rubytube/parser.rb +164 -0
data/lib/rubytube/request.rb +75 -0
data/lib/rubytube/stream.rb +81 -0
data/lib/rubytube/stream_query.rb +33 -0
data/lib/rubytube/utils.rb +24 -0
data/lib/rubytube/version.rb +5 -0
data/lib/rubytube.rb +67 -0
data/sig/rubytube.rbs +4 -0
metadata +95 -0

data/lib/rubytube/client.rb ADDED Viewed

@@ -0,0 +1,165 @@
+module RubyTube
+  class Client
+    attr_accessor :video_id, :watch_url, :embed_url, :stream_monostate
+    def initialize(url)
+      self.video_id = Extractor.video_id(url)
+      self.watch_url = "https://youtube.com/watch?v=#{video_id}"
+      self.embed_url = "https://www.youtube.com/embed/#{video_id}"
+      self.stream_monostate = Monostate.new
+    end
+    def watch_html
+      return @watch_html if @watch_html
+      @watch_html = Request.get(watch_url)
+      @watch_html
+    end
+    def js
+      return @js if @js
+      @js = Request.get(js_url)
+      @js
+    end
+    def js_url
+      return @js_url if @js_url
+      @js_url = Extractor.js_url(watch_html)
+      @js_url
+    end
+    def streaming_data
+      return vid_info['streamingData'] if vid_info && vid_info.key?('streamingData')
+      bypass_age_gate
+      vid_info['streamingData']
+    end
+    def fmt_streams
+      check_availability
+      return @fmt_streams if @fmt_streams
+      @fmt_streams = []
+      stream_manifest = Extractor.apply_descrambler(streaming_data)
+      begin
+        Extractor.apply_signature(stream_manifest, vid_info, js)
+      rescue ExtractError
+        js = nil
+        js_url = nil
+        Extractor.apply_signature(stream_manifest, vid_info, js)
+      end
+      for stream in stream_manifest
+        @fmt_streams << Stream.new(stream, stream_monostate)
+      end
+      stream_monostate.title = title
+      stream_monostate.duration = length
+      @fmt_streams
+    end
+    def check_availability
+      status, messages = Extractor.playability_status(watch_html)
+      messages.each do |reason|
+        case status
+        when 'UNPLAYABLE'
+          case reason
+          when 'Join this channel to get access to members-only content like this video, and other exclusive perks.'
+            raise MembersOnly.new(video_id)
+          when 'This live stream recording is not available.'
+            raise RecordingUnavailable.new(video_id)
+          else
+            raise VideoUnavailable.new(video_id)
+          end
+        when 'LOGIN_REQUIRED'
+          if reason == 'This is a private video. Please sign in to verify that you may see it.'
+            raise VideoPrivate.new(video_id)
+          end
+        when 'ERROR'
+          if reason == 'Video unavailable'
+            raise VideoUnavailable.new(video_id)
+          end
+        when 'LIVE_STREAM'
+          raise LiveStreamError.new(video_id)
+        end
+      end
+    end
+    def streams
+      return @streams if @streams
+      check_availability
+      @streams = StreamQuery.new(fmt_streams)
+    end
+    def vid_info
+      return @vid_info if @vid_info
+      it = InnerTube.new
+      @vid_info = it.player(video_id)
+      @vid_info
+    end
+    def bypass_age_gate
+      it = InnerTube.new(client: 'ANDROID_EMBED')
+      resp = it.player(video_id)
+      status = resp['playabilityStatus']['status']
+      if status == 'UNPLAYABLE'
+        raise VideoUnavailable.new(video_id)
+      end
+      @vid_info = resp
+    end
+    def title
+      return @title if @title
+      @title = vid_info['videoDetails']['title']
+      @title
+    end
+    def length
+      return @length if @length
+      @length = vid_info['videoDetails']['lengthSeconds'].to_i
+      @length
+    end
+    def views
+      return @views if @views
+      @views = vid_info['videoDetails']['viewCount'].to_i
+      @views
+    end
+    def author
+      return @author if @author
+      @author = vid_info['videoDetails']['author']
+      @author
+    end
+    def keywords
+      return @keywords if @keywords
+      @keywords = vid_info['videoDetails']['keywords']
+      @keywords
+    end
+    def channel_id
+      return @channel_id if @channel_id
+      @channel_id = vid_info['videoDetails']['channelId']
+      @channel_id
+    end
+  end
+end

data/lib/rubytube/extractor.rb ADDED Viewed

@@ -0,0 +1,177 @@
+module RubyTube
+  class Extractor
+    class << self
+      def playability_status(watch_html)
+        player_response = initial_player_response(watch_html)
+        player_response = JSON.parse(player_response)
+        status_obj = player_response['playabilityStatus'] || {}
+        if status_obj.has_key?('liveStreamability')
+          return ['LIVE_STREAM', 'Video is a live stream.']
+        end
+        if status_obj.has_key?('status')
+          if status_obj.has_key?('reason')
+            return [status_obj['status'], [status_obj['reason']]]
+          end
+          if status_obj.has_key?('messages')
+            return [status_obj['status'], status_obj['messages']]
+          end
+        end
+        [nil, [nil]]
+      end
+      def video_id(url)
+        return Utils.regex_search(/(?:v=|\/)([0-9A-Za-z_-]{11}).*/, url, 1)
+      end
+      def js_url(html)
+        begin
+          base_js = get_ytplayer_config(html)['assets']['js']
+        rescue RegexMatchError, NoMethodError
+          base_js = get_ytplayer_js(html)
+        end
+        "https://youtube.com#{base_js}"
+      end
+      def mime_type_codec(mime_type_codec)
+        pattern = %r{(\w+\/\w+)\;\scodecs=\"([a-zA-Z\-0-9.,\s]*)\"}
+        results = mime_type_codec.match(pattern)
+        raise RegexMatchError.new("mime_type_codec, pattern=#{pattern}") if results.nil?
+        mime_type, codecs = results.captures
+        [mime_type, codecs.split(",").map(&:strip)]
+      end
+      def get_ytplayer_js(html)
+        js_url_patterns = [
+          %r{(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)},
+        ]
+        js_url_patterns.each do |pattern|
+          function_match = html.match(pattern)
+          if function_match
+            return function_match[1]
+          end
+        end
+        raise RegexMatchError.new('get_ytplayer_js', 'js_url_patterns')
+      end
+      def get_ytplayer_config(html)
+        config_patterns = [
+          /ytplayer\.config\s*=\s*/,
+          /ytInitialPlayerResponse\s*=\s*/
+        ]
+        config_patterns.each do |pattern|
+          begin
+            return Parser.parse_for_object(html, pattern)
+          rescue HTMLParseError => e
+            next
+          end
+        end
+        setconfig_patterns = [
+          /yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*/
+        ]
+        setconfig_patterns.each do |pattern|
+          begin
+            return Parser.parse_for_object(html, pattern)
+          rescue HTMLParseError => e
+            next
+          end
+        end
+        raise RegexMatchError.new('get_ytplayer_config', 'config_patterns, setconfig_patterns')
+      end
+      def apply_signature(stream_manifest, vid_info, js)
+        cipher = Cipher.new(js)
+        stream_manifest.each_with_index do |stream, i|
+          begin
+            url = stream['url']
+          rescue NoMethodError
+            live_stream = vid_info.fetch('playabilityStatus', {})['liveStreamability']
+            if live_stream
+              raise LiveStreamError.new('UNKNOWN')
+            end
+          end
+          if url.include?("signature") ||
+            (!stream.key?("s") && (url.include?("&sig=") || url.include?("&lsig=")))
+            # For certain videos, YouTube will just provide them pre-signed, in
+            # which case there's no real magic to download them and we can skip
+            # the whole signature descrambling entirely.
+            next
+          end
+          signature = cipher.get_signature(stream['s'])
+          parsed_url = URI.parse(url)
+          query_params = CGI.parse(parsed_url.query)
+          query_params.transform_values!(&:first)
+          query_params['sig'] = signature
+          unless query_params.key?('ratebypass')
+            initial_n = query_params['n'].split('')
+            new_n = cipher.calculate_n(initial_n)
+            query_params['n'] = new_n
+          end
+          url = "#{parsed_url.scheme}://#{parsed_url.host}#{parsed_url.path}?#{URI.encode_www_form(query_params)}"
+          stream_manifest[i]["url"] = url
+        end
+      end
+      def apply_descrambler(stream_data)
+        return if stream_data.has_key?('url')
+        # Merge formats and adaptiveFormats into a single array
+        formats = []
+        formats += stream_data['formats'] if stream_data.has_key?('formats')
+        formats += stream_data['adaptiveFormats'] if stream_data.has_key?('adaptiveFormats')
+        # Extract url and s from signatureCiphers as necessary
+        formats.each do |data|
+          unless data.has_key?('url')
+            if data.has_key?('signatureCipher')
+              cipher_url = URI.decode_www_form(data['signatureCipher']).to_h
+              data['url'] = cipher_url['url']
+              data['s'] = cipher_url['s']
+            end
+          end
+          data['is_otf'] = data['type'] == 'FORMAT_STREAM_TYPE_OTF'
+        end
+        formats
+      end
+      private
+      def initial_player_response(watch_html)
+        patterns = [
+          "window\\[['\"]ytInitialPlayerResponse['\"]\\]\\s*=\\s*",
+          "ytInitialPlayerResponse\\s*=\\s*"
+        ]
+        patterns.each do |pattern|
+          begin
+            return Parser.parse_for_object(watch_html, pattern)
+          rescue HTMLParseError
+            next
+          end
+        end
+        raise RegexMatchError.new('initial_player_response', 'initial_player_response_pattern')
+      end
+    end
+  end
+end

data/lib/rubytube/innertube.rb ADDED Viewed

@@ -0,0 +1,105 @@
+module RubyTube
+  class InnerTube
+    DEFALUT_CLIENTS = {
+      'WEB' => {
+        context: {
+          client: {
+            clientName: 'WEB',
+            clientVersion: '2.20200720.00.02'
+          }
+        },
+        header: { 'User-Agent': 'Mozilla/5.0' },
+        api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+      },
+      'ANDROID_MUSIC' => {
+        context: {
+          client: {
+            clientName: 'ANDROID_MUSIC',
+            clientVersion: '5.16.51',
+            androidSdkVersion: 30,
+          },
+        },
+        header: { 'User-Agent': 'com.google.android.apps.youtube.music/'},
+        api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+      },
+      'ANDROID_EMBED' => {
+        context: {
+            client: {
+                clientName: 'ANDROID_EMBEDDED_PLAYER',
+                clientVersion: '17.31.35',
+                clientScreen: 'EMBED',
+                androidSdkVersion: 30,
+            }
+        },
+        header: { 'User-Agent': 'com.google.android.youtube/' },
+        api_key: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+      },
+    }
+    BASE_URL = 'https://www.youtube.com/youtubei/v1'
+    attr_accessor :context, :header, :api_key, :access_token, :refresh_token, :use_oauth, :allow_cache, :expires
+    def initialize(client: 'ANDROID_MUSIC', use_oauth: false, allow_cache: false)
+      self.context = DEFALUT_CLIENTS[client][:context]
+      self.header  = DEFALUT_CLIENTS[client][:header]
+      self.api_key = DEFALUT_CLIENTS[client][:api_key]
+      self.use_oauth = use_oauth
+      self.allow_cache = allow_cache
+    end
+    def cache_tokens
+      return unless allow_cache
+      # TODO:
+    end
+    def refresh_bearer_token(force: false)
+      # TODO:
+    end
+    def fetch_bearer_token
+      # TODO:
+    end
+    def send(endpoint, query, data)
+      if use_oauth
+        query.delete(:key)
+      end
+      headers = {
+        'Content-Type': 'application/json',
+      }
+      if use_oauth
+        if access_token
+          refresh_bearer_token
+          headers['Authorization'] = "Bearer #{access_token}"
+        else
+          fetch_bearer_token
+          headers['Authorization'] = "Bearer #{access_token}"
+        end
+      end
+      options = {}
+      options[:headers] = headers.merge(header)
+      options[:query] = {
+        key: api_key,
+        contentCheckOk: true,
+        racyCheckOk: true,
+      }.merge(query)
+      options[:data] = data
+      resp = Request.post(endpoint, options)
+      JSON.parse(resp)
+    end
+    def player(video_id)
+      endpoint = "#{BASE_URL}/player"
+      query = { 'videoId' => video_id }
+      send(endpoint, query, {context: context})
+    end
+  end
+end

data/lib/rubytube/monostate.rb ADDED Viewed

@@ -0,0 +1,5 @@
+module RubyTube
+  class Monostate
+    attr_accessor :title, :duration
+  end
+end

data/lib/rubytube/parser.rb ADDED Viewed

@@ -0,0 +1,164 @@
+module RubyTube
+  module Parser
+    module_function
+    def parse_for_object(html, preceding_regex)
+      regex = Regexp.new(preceding_regex)
+      result = regex.match(html)
+      if result.nil?
+        raise HTMLParseError, "No matches for regex #{preceding_regex}"
+      end
+      start_index = result.end(0)
+      return parse_for_object_from_startpoint(html, start_index)
+    end
+    def find_object_from_startpoint(html, start_point)
+      html = html[start_point..-1]
+      unless ['{', '['].include?(html[0])
+        raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
+      end
+      last_char = '{'
+      curr_char = nil
+      stack = [html[0]]
+      i = 1
+      context_closers = {
+        '{' => '}',
+        '[' => ']',
+        '"' => '"',
+        '/' => '/',
+      }
+      while i < html.length
+        break if stack.empty?
+        last_char = curr_char unless [' ', '\n'].include?(curr_char)
+        curr_char = html[i]
+        curr_context = stack.last
+        if curr_char == context_closers[curr_context]
+          stack.pop
+          i += 1
+          next
+        end
+        if ['"', '/'].include?(curr_context)
+          if curr_char == '\\'
+            i += 2
+            next
+          end
+        else
+          if context_closers.keys.include?(curr_char)
+            unless curr_char == '/' && !['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
+              stack.push(curr_char)
+            end
+          end
+        end
+        i += 1
+      end
+      full_obj = html[0...i]
+      full_obj
+    end
+    def parse_for_object_from_startpoint(html, start_point)
+      html = html[start_point..-1]
+      unless ['{', '['].include?(html[0])
+        raise HTMLParseError, "Invalid start point. Start of HTML:\n#{html[0..19]}"
+      end
+      # First letter MUST be an open brace, so we put that in the stack,
+      # and skip the first character.
+      last_char = '{'
+      curr_char = nil
+      stack = [html[0]]
+      i = 1
+      context_closers = {
+        '{' => '}',
+        '[' => ']',
+        '"' => '"',
+        '/' => '/' # JavaScript regex
+      }
+      while i < html.length
+        break if stack.empty?
+        last_char = curr_char unless [' ', '\n'].include?(curr_char)
+        curr_char = html[i]
+        curr_context = stack.last
+        # If we've reached a context closer, we can remove an element off the stack
+        if curr_char == context_closers[curr_context]
+          stack.pop
+          i += 1
+          next
+        end
+        # Strings and regex expressions require special context handling because they can contain
+        # context openers *and* closers
+        if ['"', '/'].include?(curr_context)
+          # If there's a backslash in a string or regex expression, we skip a character
+          if curr_char == '\\'
+            i += 2
+            next
+          end
+        else
+          # Non-string contexts are when we need to look for context openers.
+          if context_closers.keys.include?(curr_char)
+            # Slash starts a regular expression depending on context
+            unless curr_char == '/' && ['(', ',', '=', ':', '[', '!', '&', '|', '?', '{', '}', ';'].include?(last_char)
+              stack << curr_char
+            end
+          end
+        end
+        i += 1
+      end
+      full_obj = html[0..(i - 1)]
+      full_obj
+    end
+    def throttling_array_split(js_array)
+      results = []
+      curr_substring = js_array[1..-1]
+      comma_regex = /,/
+      func_regex = /function\([^)]*\)/
+      until curr_substring.empty?
+        if curr_substring.start_with?('function')
+          match = func_regex.match(curr_substring)
+          match_start = match.begin(0)
+          match_end = match.end(0)
+          function_text = find_object_from_startpoint(curr_substring, match_end)
+          full_function_def = curr_substring[0, match_end + function_text.length]
+          results << full_function_def
+          curr_substring = curr_substring[full_function_def.length + 1..-1]
+        else
+          match = comma_regex.match(curr_substring)
+          begin
+            match_start = match.begin(0)
+            match_end = match.end(0)
+          rescue NoMethodError
+            match_start = curr_substring.length - 1
+            match_end = match_start + 1
+          end
+          curr_el = curr_substring[0, match_start]
+          results << curr_el
+          curr_substring = curr_substring[match_end..-1]
+        end
+      end
+      results
+    end
+  end
+end

data/lib/rubytube/request.rb ADDED Viewed

@@ -0,0 +1,75 @@
+module RubyTube
+  module Request
+    module_function
+    DEFAULT_RANGE_SIZE = 9437184
+    def get(url, options = {})
+      send(:get, url, options).body
+    end
+    def post(url, options = {})
+      send(:post, url, options).body
+    end
+    def head(url, options = {})
+      send(:head, url, options).headers
+    end
+    def stream(url, timeout: 60, max_retries: 0)
+      file_size = DEFAULT_RANGE_SIZE
+      downloaded = 0
+      while downloaded < file_size
+        stop_pos = [downloaded + DEFAULT_RANGE_SIZE, file_size].min - 1
+        range_header = "bytes=#{downloaded}-#{stop_pos}"
+        tries = 0
+        while true
+          begin
+            if tries >= 1 + max_retries
+              raise MaxRetriesExceeded
+            end
+            response = send(:get, "#{url}&range=#{downloaded}-#{stop_pos}")
+            break
+          rescue Faraday::TimeoutError
+          rescue Faraday::ClientError => e
+            raise e
+          end
+          tries += 1
+        end
+        if file_size == DEFAULT_RANGE_SIZE
+          begin
+            resp = send(:get, "#{url}&range=0-99999999999")
+            content_range = resp.headers["Content-Length"]
+            file_size = content_range.to_i
+          rescue KeyError, IndexError, StandardError => e
+          end
+        end
+        response.body.each_char do |chunk|
+          downloaded += chunk.length
+          yield chunk
+        end
+      end
+    end
+    def send(method, url, options = {})
+      headers = { 'Content-Type': 'text/html' }
+      options[:headers] && headers.merge!(options[:headers])
+      connection = Faraday.new(url: url) do |faraday|
+        faraday.response :follow_redirects
+        faraday.adapter Faraday.default_adapter
+      end
+      response = connection.send(method) do |req|
+        req.headers = headers
+        options[:query] && req.params = options[:query]
+        options[:data] && req.body = JSON.dump(options[:data])
+      end
+      response
+    end
+  end
+end