RubyGems - youtube_transcript2020 - Versions diffs - 0.1.0 - Mend

youtube_transcript2020 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +7 -0
checksums.yaml.gz.sig +3 -0
data.tar.gz.sig +2 -0
data/lib/youtube_transcript2020.rb +163 -0
metadata +110 -0
metadata.gz.sig +0 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 8010045930895f8e21d94e9a66764270caa71c187592fb385d29c5b005c44d98
+  data.tar.gz: 370d33e8f02774874ab626b2d3b52c3cc7d0ae681db36c4c6dd0070a882faa59
+SHA512:
+  metadata.gz: 9f8a4090ea44bb1579b58048ae7d1586883ef9580898e569207b365df2d70e2c0e5fa3db30e65c0d23d4011f1583d779432fec331b955d6745c90b561b863e88
+  data.tar.gz: b0bd2bda2e4182e4d1b522720ba032e7c0313ead7f2a987f71b728cc80da34edb26c5b114ccc006f5196448af1e325708e9b7d32ddcd2511f3a2a549b5480e97

checksums.yaml.gz.sig ADDED

@@ -0,0 +1,3 @@
+1y������>�1ǽ�
+ҋ���Z*f6��W��!F���7��u\= � �}�e5N��r�u�a����d������m�dƩ�>ŉ�2���^<3%>$N�|��*��۬�4�A��v���kLC�y��{�-Yܽ�J��v�}�\KDނ��AG`�Ug��G7�,y�\��Uag�I:.��=�W��0V��
+��h�}"_��#��nf�L�o��L}�;��r�㑩�{�;���T��YC�rxj�D�r��kX5�K8.ɘS�t��Z9�[9��yCg@<2��������R�>�����@pk��ef鹂��֍	��	��FP���+*��_�-J���q�u%�z��#�ݠb}�e�#rHI

data.tar.gz.sig ADDED

	@@ -0,0 +1,2 @@
1	+ g�ج��d�Y�"��)�
2	+ �X�" k�5��\�aO�S�K;v��nr�e�S�H�<��5-`�ȶ��R�z��WS��ʜ�}g*��?��y'��Λ:�>NK�P��HI_��23$�8Y��"�7,��Ng��1��Y�-<v�4�5��U$FA��!8�lK��!5�m<n��aa))W�ªH�D�4�k dQ��A�:+�(�=L �B]x5��>�F�Kk]{�0P��,��9�cf�V� ,�1�犄��t}ִة�2��x�j��)Yv>�A�{�)&�P��

data/lib/youtube_transcript2020.rb ADDED

@@ -0,0 +1,163 @@
+#!/usr/bin/env ruby
+# file: youtube_transcript2020.rb
+require 'subunit'
+require 'simple-config'
+class YoutubeTranscript2020
+  attr_reader :to_a, :author, :id, :title
+  def initialize(id=nil)
+    return unless id
+    @id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
+      id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
+    elsif id[/https:\/\/youtu\.be\//]
+      id[/(?<=^https:\/\/youtu\.be\/).*/]
+    else
+      id
+    end
+    s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
+    @s = parse s
+    fetch_info(@id)
+  end
+  def to_a()
+    @a
+  end
+  # returns the transcript in plain text including timestamps
+  #
+  def to_s()
+    h = {id: @id, title: @title, author: @author}
+    SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
+  end
+  # reads a plain text transcript which has been modified to include headings
+  #
+  def import(obj)
+    s = RXFHelper.read(obj).first
+    header, body = s.split(/-----+/,2)
+    h = SimpleConfig.new(header).to_h
+    @id, @author, @title = h[:id], h[:author], h[:title]
+    @s = body
+    a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
+    @a = a[0].zip(a[1])
+  end
+  # Outputs HTML containing the embedded video and transcription
+  #
+  def to_html()
+    url = 'https://www.youtube.com/embed/' + @id
+    links = @a.map do |timestamp, s|
+      seconds = Subunit.new(units={minutes:60, hours:60},
+                  timestamp.split(':').map(&:to_i)).to_i
+      "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
+          % [url, seconds, timestamp, s]
+    end
+<<EOF
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <title></title>
+    <meta charset="utf-8" />
+  </head>
+  <body>
+<div style="width: 1080px; background: white">
+<div style="float:left; width: 580px; background: white">
+<iframe width="560" height="315" src="#{url}?start=67&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
+<h1>#{@title}</h1>
+</div>
+<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
+<ul>#{links.join("\n")}</ul>
+</div>
+</div>
+  </body>
+</html>
+EOF
+  end
+  # Outputs plain text containing the headings including timestamps
+  # note: This can be helpful for copyng and pasting directly into a YouTube comment
+  #
+  def to_headings()
+    @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
+  end
+  private
+  def fetch_info(id)
+    url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=json"
+    s = Net::HTTP.get(URI(url))
+    h = JSON.parse(s, symbolize_names: true)
+    @title = h[:title]
+    @author = h[:author_name]
+  end
+  def parse(s)
+    doc = Rexle.new(s)
+    a = doc.root.elements.each.map do |x|
+      timestamp = Subunit.new(units={minutes:60, hours:60}, \
+        seconds: x.attributes[:start].to_f).to_s(verbose: false)
+      [timestamp, x.text.unescape.gsub("\n", ' ').gsub('&#39;',"'").gsub('&quot;','"')]
+    end
+    @to_a = a
+    a2 = []
+    # the following cleans up sentences that start with And, Or, But, So etc.
+    a.each do |time, s|
+      if s[/^[a-z|0-9]/]then
+        a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
+      elsif s[/^And,? /]
+        a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
+      elsif  s[/^Or,? /]
+        a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
+      elsif  s[/^But /]
+        a2[-1][-1] += ' ' + s.sub(/But,? /,'').capitalize
+      elsif s[/^"/]
+        a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
+      elsif s[/^So,? /]
+        a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
+      else
+        a2 << [time, s]
+      end
+    end
+    # formats the paragraph with the timestamp appearing above
+    @a = a2
+    a2.map {|time, s| "\n%s\n\n%s" % [time, s]}
+  end
+end

metadata ADDED

@@ -0,0 +1,110 @@
+--- !ruby/object:Gem::Specification
+name: youtube_transcript2020
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- James Robertson
+autorequire:
+bindir: bin
+cert_chain:
+- |
+  -----BEGIN CERTIFICATE-----
+  MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
+  YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
+  MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
+  cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
+  xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
+  2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
+  EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
+  +ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
+  uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
+  jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
+  kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
+  fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
+  AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
+  Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
+  c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
+  BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
+  N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
+  17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
+  KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
+  v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
+  dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
+  ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
+  42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
+  6ZSQYo0XuSVg3by/5kp1TrrS
+  -----END CERTIFICATE-----
+date: 2020-07-27 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: subunit
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.5.2
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.5'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.5.2
+- !ruby/object:Gem::Dependency
+  name: simple-config
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.7'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.7.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.7'
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 0.7.1
+description:
+email: james@jamesrobertson.eu
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/youtube_transcript2020.rb
+homepage: https://github.com/jrobertson/youtube_transcript2020
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.0.3
+signing_key:
+specification_version: 4
+summary: Makes it easier to digest a Youtube video by reading the transcript.
+test_files: []

metadata.gz.sig ADDED

Binary file