RubyGems - youtube-transcript-rb - Versions diffs - 0.1.0 → 0.2.3 - Mend

youtube-transcript-rb 0.1.0 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/.rubocop.yml +9 -0
data/.rubocop_todo.yml +166 -0
data/README.md +42 -42
data/lib/youtube-transcript-rb.rb +4 -0
data/lib/youtube_rb/formatters.rb +263 -0
data/lib/youtube_rb/transcript/api.rb +144 -0
data/lib/youtube_rb/transcript/errors.rb +215 -0
data/lib/youtube_rb/transcript/settings.rb +26 -0
data/lib/youtube_rb/transcript/transcript.rb +237 -0
data/lib/youtube_rb/transcript/transcript_list.rb +168 -0
data/lib/youtube_rb/transcript/transcript_list_fetcher.rb +220 -0
data/lib/youtube_rb/transcript/transcript_parser.rb +81 -0
data/lib/youtube_rb/transcript.rb +33 -0
data/lib/youtube_rb/version.rb +5 -0
data/sig/youtube_rb/transcript.rbs +4 -0
data/spec/api_spec.rb +27 -27
data/spec/errors_spec.rb +41 -41
data/spec/formatters_spec.rb +45 -46
data/spec/integration_spec.rb +39 -48
data/spec/settings_spec.rb +16 -16
data/spec/spec_helper.rb +52 -52
data/spec/transcript_list_fetcher_spec.rb +38 -33
data/spec/transcript_list_spec.rb +16 -19
data/spec/transcript_parser_spec.rb +3 -3
data/spec/transcript_spec.rb +23 -24
metadata +17 -13
data/lib/youtube/transcript/rb/api.rb +0 -150
data/lib/youtube/transcript/rb/errors.rb +0 -217
data/lib/youtube/transcript/rb/formatters.rb +0 -269
data/lib/youtube/transcript/rb/settings.rb +0 -28
data/lib/youtube/transcript/rb/transcript.rb +0 -239
data/lib/youtube/transcript/rb/transcript_list.rb +0 -170
data/lib/youtube/transcript/rb/transcript_list_fetcher.rb +0 -225
data/lib/youtube/transcript/rb/transcript_parser.rb +0 -83
data/lib/youtube/transcript/rb/version.rb +0 -9
data/lib/youtube/transcript/rb.rb +0 -37
data/sig/youtube/transcript/rb.rbs +0 -8

data/lib/youtube/transcript/rb/transcript_parser.rb DELETED Viewed

@@ -1,83 +0,0 @@
-# frozen_string_literal: true
-require "nokogiri"
-require "cgi"
-module Youtube
-  module Transcript
-    module Rb
-      # Parses XML transcript data from YouTube
-      class TranscriptParser
-        # HTML formatting tags to preserve when preserve_formatting is enabled
-        FORMATTING_TAGS = %w[
-          strong
-          em
-          b
-          i
-          mark
-          small
-          del
-          ins
-          sub
-          sup
-        ].freeze
-        # @param preserve_formatting [Boolean] whether to preserve HTML formatting tags
-        def initialize(preserve_formatting: false)
-          @preserve_formatting = preserve_formatting
-          @html_regex = build_html_regex
-        end
-        # Parse XML transcript data into TranscriptSnippet objects
-        # @param raw_data [String] the raw XML data from YouTube
-        # @return [Array<TranscriptSnippet>] parsed transcript snippets
-        def parse(raw_data)
-          doc = Nokogiri::XML(raw_data)
-          snippets = []
-          doc.xpath("//text").each do |element|
-            text_content = element.text
-            next if text_content.nil? || text_content.empty?
-            # Unescape HTML entities and remove unwanted HTML tags
-            text = process_text(text_content)
-            snippets << TranscriptSnippet.new(
-              text: text,
-              start: element["start"].to_f,
-              duration: (element["dur"] || "0.0").to_f
-            )
-          end
-          snippets
-        end
-        private
-        # Build regex for removing HTML tags
-        # @return [Regexp]
-        def build_html_regex
-          if @preserve_formatting
-            # Remove all tags except formatting tags
-            formats_pattern = FORMATTING_TAGS.join("|")
-            # Match tags that are NOT the formatting tags
-            Regexp.new("</?(?!/?(?:#{formats_pattern})\\b)[^>]*>", Regexp::IGNORECASE)
-          else
-            # Remove all HTML tags
-            Regexp.new("<[^>]*>", Regexp::IGNORECASE)
-          end
-        end
-        # Process text by unescaping HTML entities and removing unwanted tags
-        # @param text [String] the raw text
-        # @return [String] processed text
-        def process_text(text)
-          # Unescape HTML entities
-          unescaped = CGI.unescapeHTML(text)
-          # Remove unwanted HTML tags
-          unescaped.gsub(@html_regex, "")
-        end
-      end
-    end
-  end
-end

data/lib/youtube/transcript/rb/version.rb DELETED Viewed

@@ -1,9 +0,0 @@
-# frozen_string_literal: true
-module Youtube
-  module Transcript
-    module Rb
-      VERSION = "0.1.0"
-    end
-  end
-end

data/lib/youtube/transcript/rb.rb DELETED Viewed

@@ -1,37 +0,0 @@
-# frozen_string_literal: true
-require_relative "rb/version"
-require_relative "rb/settings"
-require_relative "rb/errors"
-require_relative "rb/transcript_parser"
-require_relative "rb/transcript"
-require_relative "rb/transcript_list"
-require_relative "rb/transcript_list_fetcher"
-require_relative "rb/api"
-require_relative "rb/formatters"
-module Youtube
-  module Transcript
-    module Rb
-      class << self
-        # Convenience method to fetch a transcript
-        # @param video_id [String] YouTube video ID
-        # @param languages [Array<String>] Language codes in order of preference
-        # @param preserve_formatting [Boolean] Whether to preserve HTML formatting
-        # @return [FetchedTranscript] The fetched transcript
-        def fetch(video_id, languages: ["en"], preserve_formatting: false)
-          api = YouTubeTranscriptApi.new
-          api.fetch(video_id, languages: languages, preserve_formatting: preserve_formatting)
-        end
-        # Convenience method to list available transcripts
-        # @param video_id [String] YouTube video ID
-        # @return [TranscriptList] List of available transcripts
-        def list(video_id)
-          api = YouTubeTranscriptApi.new
-          api.list(video_id)
-        end
-      end
-    end
-  end
-end

data/sig/youtube/transcript/rb.rbs DELETED Viewed

@@ -1,8 +0,0 @@
-module Youtube
-  module Transcript
-    module Rb
-      VERSION: String
-      # See the writing guide of rbs: https://github.com/ruby/rbs#guides
-    end
-  end
-end