youtube_transcript2020 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +3 -0
- data.tar.gz.sig +2 -0
- data/lib/youtube_transcript2020.rb +163 -0
- metadata +110 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8010045930895f8e21d94e9a66764270caa71c187592fb385d29c5b005c44d98
|
4
|
+
data.tar.gz: 370d33e8f02774874ab626b2d3b52c3cc7d0ae681db36c4c6dd0070a882faa59
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9f8a4090ea44bb1579b58048ae7d1586883ef9580898e569207b365df2d70e2c0e5fa3db30e65c0d23d4011f1583d779432fec331b955d6745c90b561b863e88
|
7
|
+
data.tar.gz: b0bd2bda2e4182e4d1b522720ba032e7c0313ead7f2a987f71b728cc80da34edb26c5b114ccc006f5196448af1e325708e9b7d32ddcd2511f3a2a549b5480e97
|
checksums.yaml.gz.sig
ADDED
@@ -0,0 +1,3 @@
|
|
1
|
+
1y������>�1ǽ�
|
2
|
+
ҋ���Z*f6��W��!F���7��u\= � �}�e5N��r�u�a����d������m�dƩ�>ʼn�2���^<3%>$N�|��*��۬�4�A��v���kLC�y��{�-Yܽ�J��v�}�\KDނ��AG`�Ug��G7�,y�\��Uag�I:.��=�W��0V��
|
3
|
+
��h�}"_��#��nf�L�o��L}�;��r�㑩�{�;���T��YC�rxj�D�r��kX5�K8.ɘS�t��Z9�[9��yCg@<2��������R�>�����@pk��ef鹂��֍ �� ��FP���+*��_�-J���q�u%�z��#�ݠb}�e�#rHI
|
data.tar.gz.sig
ADDED
@@ -0,0 +1,2 @@
|
|
1
|
+
g�ج����d�Y�"��)�
|
2
|
+
�X�" k�5��\�aO�S�K;v��nr�e�S�H�<��5-`�ȶ��R�z����WS����ʜ�}g*���?����y'��Λ:�>NK�P�����HI_�����23$�8Y����"�7,��Ng����1��Y�-<v�4�5��U$FA��!8�lK��!5�m<n���aa))W�ªH�D�4�k dQ��A�:+�(�=L �B]x5��>�F�Kk]{�0P����,���9�cf�V� ,�1�犄�������t}ִة�2��x�j������)Yv>�A�{�)&�P��������
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: youtube_transcript2020.rb
|
4
|
+
|
5
|
+
require 'subunit'
|
6
|
+
require 'simple-config'
|
7
|
+
|
8
|
+
|
9
|
+
class YoutubeTranscript2020
|
10
|
+
|
11
|
+
attr_reader :to_a, :author, :id, :title
|
12
|
+
|
13
|
+
def initialize(id=nil)
|
14
|
+
|
15
|
+
return unless id
|
16
|
+
|
17
|
+
@id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
|
18
|
+
id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
|
19
|
+
elsif id[/https:\/\/youtu\.be\//]
|
20
|
+
id[/(?<=^https:\/\/youtu\.be\/).*/]
|
21
|
+
else
|
22
|
+
id
|
23
|
+
end
|
24
|
+
|
25
|
+
s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
|
26
|
+
@s = parse s
|
27
|
+
|
28
|
+
fetch_info(@id)
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_a()
|
33
|
+
@a
|
34
|
+
end
|
35
|
+
|
36
|
+
# returns the transcript in plain text including timestamps
|
37
|
+
#
|
38
|
+
def to_s()
|
39
|
+
|
40
|
+
h = {id: @id, title: @title, author: @author}
|
41
|
+
SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
|
42
|
+
end
|
43
|
+
|
44
|
+
# reads a plain text transcript which has been modified to include headings
|
45
|
+
#
|
46
|
+
def import(obj)
|
47
|
+
|
48
|
+
s = RXFHelper.read(obj).first
|
49
|
+
|
50
|
+
header, body = s.split(/-----+/,2)
|
51
|
+
|
52
|
+
h = SimpleConfig.new(header).to_h
|
53
|
+
@id, @author, @title = h[:id], h[:author], h[:title]
|
54
|
+
@s = body
|
55
|
+
|
56
|
+
a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
|
57
|
+
@a = a[0].zip(a[1])
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
# Outputs HTML containing the embedded video and transcription
|
62
|
+
#
|
63
|
+
def to_html()
|
64
|
+
|
65
|
+
url = 'https://www.youtube.com/embed/' + @id
|
66
|
+
|
67
|
+
links = @a.map do |timestamp, s|
|
68
|
+
|
69
|
+
seconds = Subunit.new(units={minutes:60, hours:60},
|
70
|
+
timestamp.split(':').map(&:to_i)).to_i
|
71
|
+
"<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
|
72
|
+
% [url, seconds, timestamp, s]
|
73
|
+
end
|
74
|
+
|
75
|
+
<<EOF
|
76
|
+
<!DOCTYPE html>
|
77
|
+
<html lang="en">
|
78
|
+
<head>
|
79
|
+
<title></title>
|
80
|
+
<meta charset="utf-8" />
|
81
|
+
</head>
|
82
|
+
<body>
|
83
|
+
<div style="width: 1080px; background: white">
|
84
|
+
<div style="float:left; width: 580px; background: white">
|
85
|
+
<iframe width="560" height="315" src="#{url}?start=67&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
86
|
+
<h1>#{@title}</h1>
|
87
|
+
</div>
|
88
|
+
<div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
|
89
|
+
<ul>#{links.join("\n")}</ul>
|
90
|
+
</div>
|
91
|
+
|
92
|
+
</div>
|
93
|
+
</body>
|
94
|
+
</html>
|
95
|
+
EOF
|
96
|
+
end
|
97
|
+
|
98
|
+
# Outputs plain text containing the headings including timestamps
|
99
|
+
# note: This can be helpful for copyng and pasting directly into a YouTube comment
|
100
|
+
#
|
101
|
+
def to_headings()
|
102
|
+
|
103
|
+
@to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def fetch_info(id)
|
110
|
+
|
111
|
+
url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=json"
|
112
|
+
s = Net::HTTP.get(URI(url))
|
113
|
+
|
114
|
+
h = JSON.parse(s, symbolize_names: true)
|
115
|
+
@title = h[:title]
|
116
|
+
@author = h[:author_name]
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
def parse(s)
|
121
|
+
|
122
|
+
doc = Rexle.new(s)
|
123
|
+
|
124
|
+
a = doc.root.elements.each.map do |x|
|
125
|
+
timestamp = Subunit.new(units={minutes:60, hours:60}, \
|
126
|
+
seconds: x.attributes[:start].to_f).to_s(verbose: false)
|
127
|
+
[timestamp, x.text.unescape.gsub("\n", ' ').gsub(''',"'").gsub('"','"')]
|
128
|
+
end
|
129
|
+
|
130
|
+
@to_a = a
|
131
|
+
|
132
|
+
a2 = []
|
133
|
+
|
134
|
+
# the following cleans up sentences that start with And, Or, But, So etc.
|
135
|
+
|
136
|
+
a.each do |time, s|
|
137
|
+
|
138
|
+
if s[/^[a-z|0-9]/]then
|
139
|
+
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
140
|
+
elsif s[/^And,? /]
|
141
|
+
a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
|
142
|
+
elsif s[/^Or,? /]
|
143
|
+
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
144
|
+
elsif s[/^But /]
|
145
|
+
a2[-1][-1] += ' ' + s.sub(/But,? /,'').capitalize
|
146
|
+
elsif s[/^"/]
|
147
|
+
a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
|
148
|
+
elsif s[/^So,? /]
|
149
|
+
a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
|
150
|
+
else
|
151
|
+
a2 << [time, s]
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
155
|
+
|
156
|
+
# formats the paragraph with the timestamp appearing above
|
157
|
+
@a = a2
|
158
|
+
a2.map {|time, s| "\n%s\n\n%s" % [time, s]}
|
159
|
+
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
|
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: youtube_transcript2020
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
|
15
|
+
MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
|
17
|
+
xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
|
18
|
+
2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
|
19
|
+
EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
|
20
|
+
+ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
|
21
|
+
uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
|
22
|
+
jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
|
23
|
+
kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
|
24
|
+
fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
|
26
|
+
Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
|
29
|
+
N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
|
30
|
+
17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
|
31
|
+
KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
|
32
|
+
v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
|
33
|
+
dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
|
34
|
+
ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
|
35
|
+
42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
|
36
|
+
6ZSQYo0XuSVg3by/5kp1TrrS
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2020-07-27 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: subunit
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.5'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.5.2
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.5'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.5.2
|
60
|
+
- !ruby/object:Gem::Dependency
|
61
|
+
name: simple-config
|
62
|
+
requirement: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - "~>"
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0.7'
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 0.7.1
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - "~>"
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0.7'
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.7.1
|
80
|
+
description:
|
81
|
+
email: james@jamesrobertson.eu
|
82
|
+
executables: []
|
83
|
+
extensions: []
|
84
|
+
extra_rdoc_files: []
|
85
|
+
files:
|
86
|
+
- lib/youtube_transcript2020.rb
|
87
|
+
homepage: https://github.com/jrobertson/youtube_transcript2020
|
88
|
+
licenses:
|
89
|
+
- MIT
|
90
|
+
metadata: {}
|
91
|
+
post_install_message:
|
92
|
+
rdoc_options: []
|
93
|
+
require_paths:
|
94
|
+
- lib
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubygems_version: 3.0.3
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: Makes it easier to digest a Youtube video by reading the transcript.
|
110
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|