youtube_transcript2020 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8010045930895f8e21d94e9a66764270caa71c187592fb385d29c5b005c44d98
4
+ data.tar.gz: 370d33e8f02774874ab626b2d3b52c3cc7d0ae681db36c4c6dd0070a882faa59
5
+ SHA512:
6
+ metadata.gz: 9f8a4090ea44bb1579b58048ae7d1586883ef9580898e569207b365df2d70e2c0e5fa3db30e65c0d23d4011f1583d779432fec331b955d6745c90b561b863e88
7
+ data.tar.gz: b0bd2bda2e4182e4d1b522720ba032e7c0313ead7f2a987f71b728cc80da34edb26c5b114ccc006f5196448af1e325708e9b7d32ddcd2511f3a2a549b5480e97
@@ -0,0 +1,3 @@
1
+ 1y������>�1ǽ�
2
+ ҋ���Z*f6��W��!F���7��u\= � �}�e5N��r�u�a����d�� ����m�dƩ�>ʼn�2���^<3%>$N�|��*��۬�4�A��v���kLC�y��{� -Yܽ�J��v�}�\KDނ��AG`�Ug��G7�,y�\��Uag�I:.��=�W��0V��
3
+ ��h�} "_��#��nf�L�o��L}�;��r�㑩�{�;���T��YC�rxj�D�r��kX5�K8.ɘS�t�� Z9�[9��yCg@<2��������R�>�����@pk��ef鹂��֍ �� ��FP���+*��_�-J���q�u%�z��#�ݠb}�e�#rHI
@@ -0,0 +1,2 @@
1
+ g�ج����d�Y�"��)�
2
+ �X�" k�5��\�aO�S�K;v��nr�e�S�H�<��5-`�ȶ��R�z����WS����ʜ�}g*���?����y'��Λ:�>NK�P�����HI_�����23$�8Y����"�7,��Ng����1��Y�-<v�4�5��U$FA��!8�lK��!5�m<n���aa))W�ªH�D�4�k dQ��A�:+�(�=L �B]x5��>�F�Kk]{�0P����,���9�cf�V� ,�1�犄�� ����� t}ִة�2��x�j������)Yv>�A�{�)&�P��������
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: youtube_transcript2020.rb
4
+
5
+ require 'subunit'
6
+ require 'simple-config'
7
+
8
+
9
+ class YoutubeTranscript2020
10
+
11
+ attr_reader :to_a, :author, :id, :title
12
+
13
+ def initialize(id=nil)
14
+
15
+ return unless id
16
+
17
+ @id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
18
+ id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
19
+ elsif id[/https:\/\/youtu\.be\//]
20
+ id[/(?<=^https:\/\/youtu\.be\/).*/]
21
+ else
22
+ id
23
+ end
24
+
25
+ s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
26
+ @s = parse s
27
+
28
+ fetch_info(@id)
29
+
30
+ end
31
+
32
+ def to_a()
33
+ @a
34
+ end
35
+
36
+ # returns the transcript in plain text including timestamps
37
+ #
38
+ def to_s()
39
+
40
+ h = {id: @id, title: @title, author: @author}
41
+ SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
42
+ end
43
+
44
+ # reads a plain text transcript which has been modified to include headings
45
+ #
46
+ def import(obj)
47
+
48
+ s = RXFHelper.read(obj).first
49
+
50
+ header, body = s.split(/-----+/,2)
51
+
52
+ h = SimpleConfig.new(header).to_h
53
+ @id, @author, @title = h[:id], h[:author], h[:title]
54
+ @s = body
55
+
56
+ a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
57
+ @a = a[0].zip(a[1])
58
+
59
+ end
60
+
61
+ # Outputs HTML containing the embedded video and transcription
62
+ #
63
+ def to_html()
64
+
65
+ url = 'https://www.youtube.com/embed/' + @id
66
+
67
+ links = @a.map do |timestamp, s|
68
+
69
+ seconds = Subunit.new(units={minutes:60, hours:60},
70
+ timestamp.split(':').map(&:to_i)).to_i
71
+ "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
72
+ % [url, seconds, timestamp, s]
73
+ end
74
+
75
+ <<EOF
76
+ <!DOCTYPE html>
77
+ <html lang="en">
78
+ <head>
79
+ <title></title>
80
+ <meta charset="utf-8" />
81
+ </head>
82
+ <body>
83
+ <div style="width: 1080px; background: white">
84
+ <div style="float:left; width: 580px; background: white">
85
+ <iframe width="560" height="315" src="#{url}?start=67&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
86
+ <h1>#{@title}</h1>
87
+ </div>
88
+ <div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
89
+ <ul>#{links.join("\n")}</ul>
90
+ </div>
91
+
92
+ </div>
93
+ </body>
94
+ </html>
95
+ EOF
96
+ end
97
+
98
+ # Outputs plain text containing the headings including timestamps
99
+ # note: This can be helpful for copyng and pasting directly into a YouTube comment
100
+ #
101
+ def to_headings()
102
+
103
+ @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
104
+
105
+ end
106
+
107
+ private
108
+
109
+ def fetch_info(id)
110
+
111
+ url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=json"
112
+ s = Net::HTTP.get(URI(url))
113
+
114
+ h = JSON.parse(s, symbolize_names: true)
115
+ @title = h[:title]
116
+ @author = h[:author_name]
117
+
118
+ end
119
+
120
+ def parse(s)
121
+
122
+ doc = Rexle.new(s)
123
+
124
+ a = doc.root.elements.each.map do |x|
125
+ timestamp = Subunit.new(units={minutes:60, hours:60}, \
126
+ seconds: x.attributes[:start].to_f).to_s(verbose: false)
127
+ [timestamp, x.text.unescape.gsub("\n", ' ').gsub('&#39;',"'").gsub('&quot;','"')]
128
+ end
129
+
130
+ @to_a = a
131
+
132
+ a2 = []
133
+
134
+ # the following cleans up sentences that start with And, Or, But, So etc.
135
+
136
+ a.each do |time, s|
137
+
138
+ if s[/^[a-z|0-9]/]then
139
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
140
+ elsif s[/^And,? /]
141
+ a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
142
+ elsif s[/^Or,? /]
143
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
144
+ elsif s[/^But /]
145
+ a2[-1][-1] += ' ' + s.sub(/But,? /,'').capitalize
146
+ elsif s[/^"/]
147
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
148
+ elsif s[/^So,? /]
149
+ a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
150
+ else
151
+ a2 << [time, s]
152
+ end
153
+
154
+ end
155
+
156
+ # formats the paragraph with the timestamp appearing above
157
+ @a = a2
158
+ a2.map {|time, s| "\n%s\n\n%s" % [time, s]}
159
+
160
+ end
161
+
162
+ end
163
+
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: youtube_transcript2020
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
15
+ MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
17
+ xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
18
+ 2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
19
+ EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
20
+ +ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
21
+ uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
22
+ jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
23
+ kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
24
+ fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
26
+ Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
29
+ N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
30
+ 17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
31
+ KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
32
+ v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
33
+ dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
34
+ ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
35
+ 42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
36
+ 6ZSQYo0XuSVg3by/5kp1TrrS
37
+ -----END CERTIFICATE-----
38
+ date: 2020-07-27 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: subunit
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.5.2
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.5.2
60
+ - !ruby/object:Gem::Dependency
61
+ name: simple-config
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '0.7'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 0.7.1
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.7'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 0.7.1
80
+ description:
81
+ email: james@jamesrobertson.eu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - lib/youtube_transcript2020.rb
87
+ homepage: https://github.com/jrobertson/youtube_transcript2020
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubygems_version: 3.0.3
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Makes it easier to digest a Youtube video by reading the transcript.
110
+ test_files: []
Binary file