youtube_transcript2020 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8010045930895f8e21d94e9a66764270caa71c187592fb385d29c5b005c44d98
4
+ data.tar.gz: 370d33e8f02774874ab626b2d3b52c3cc7d0ae681db36c4c6dd0070a882faa59
5
+ SHA512:
6
+ metadata.gz: 9f8a4090ea44bb1579b58048ae7d1586883ef9580898e569207b365df2d70e2c0e5fa3db30e65c0d23d4011f1583d779432fec331b955d6745c90b561b863e88
7
+ data.tar.gz: b0bd2bda2e4182e4d1b522720ba032e7c0313ead7f2a987f71b728cc80da34edb26c5b114ccc006f5196448af1e325708e9b7d32ddcd2511f3a2a549b5480e97
@@ -0,0 +1,3 @@
1
+ 1y������>�1ǽ�
2
+ ҋ���Z*f6��W��!F���7��u\= � �}�e5N��r�u�a����d�� ����m�dƩ�>ʼn�2���^<3%>$N�|��*��۬�4�A��v���kLC�y��{� -Yܽ�J��v�}�\KDނ��AG`�Ug��G7�,y�\��Uag�I:.��=�W��0V��
3
+ ��h�} "_��#��nf�L�o��L}�;��r�㑩�{�;���T��YC�rxj�D�r��kX5�K8.ɘS�t�� Z9�[9��yCg@<2��������R�>�����@pk��ef鹂��֍ �� ��FP���+*��_�-J���q�u%�z��#�ݠb}�e�#rHI
@@ -0,0 +1,2 @@
1
+ g�ج����d�Y�"��)�
2
+ �X�" k�5��\�aO�S�K;v��nr�e�S�H�<��5-`�ȶ��R�z����WS����ʜ�}g*���?����y'��Λ:�>NK�P�����HI_�����23$�8Y����"�7,��Ng����1��Y�-<v�4�5��U$FA��!8�lK��!5�m<n���aa))W�ªH�D�4�k dQ��A�:+�(�=L �B]x5��>�F�Kk]{�0P����,���9�cf�V� ,�1�犄�� ����� t}ִة�2��x�j������)Yv>�A�{�)&�P��������
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: youtube_transcript2020.rb
4
+
5
+ require 'subunit'
6
+ require 'simple-config'
7
+
8
+
9
+ class YoutubeTranscript2020
10
+
11
+ attr_reader :to_a, :author, :id, :title
12
+
13
+ def initialize(id=nil)
14
+
15
+ return unless id
16
+
17
+ @id = if id[/https:\/\/www\.youtube\.com\/watch\?v=/] then
18
+ id[/(?<=^https:\/\/www\.youtube\.com\/watch\?v=).*/]
19
+ elsif id[/https:\/\/youtu\.be\//]
20
+ id[/(?<=^https:\/\/youtu\.be\/).*/]
21
+ else
22
+ id
23
+ end
24
+
25
+ s = Net::HTTP.get(URI("http://video.google.com/timedtext?lang=en&v=#{@id}"))
26
+ @s = parse s
27
+
28
+ fetch_info(@id)
29
+
30
+ end
31
+
32
+ def to_a()
33
+ @a
34
+ end
35
+
36
+ # returns the transcript in plain text including timestamps
37
+ #
38
+ def to_s()
39
+
40
+ h = {id: @id, title: @title, author: @author}
41
+ SimpleConfig.new(h).to_s + "\n#{'-'*78}\n\n" + @s
42
+ end
43
+
44
+ # reads a plain text transcript which has been modified to include headings
45
+ #
46
+ def import(obj)
47
+
48
+ s = RXFHelper.read(obj).first
49
+
50
+ header, body = s.split(/-----+/,2)
51
+
52
+ h = SimpleConfig.new(header).to_h
53
+ @id, @author, @title = h[:id], h[:author], h[:title]
54
+ @s = body
55
+
56
+ a = body.lines.map(&:chomp).partition {|x| x =~ /\d+:\d+/ }
57
+ @a = a[0].zip(a[1])
58
+
59
+ end
60
+
61
+ # Outputs HTML containing the embedded video and transcription
62
+ #
63
+ def to_html()
64
+
65
+ url = 'https://www.youtube.com/embed/' + @id
66
+
67
+ links = @a.map do |timestamp, s|
68
+
69
+ seconds = Subunit.new(units={minutes:60, hours:60},
70
+ timestamp.split(':').map(&:to_i)).to_i
71
+ "<li><a href='%s?start=%s&autoplay=1' target='video'>%s</a><p>%s</p></li> " \
72
+ % [url, seconds, timestamp, s]
73
+ end
74
+
75
+ <<EOF
76
+ <!DOCTYPE html>
77
+ <html lang="en">
78
+ <head>
79
+ <title></title>
80
+ <meta charset="utf-8" />
81
+ </head>
82
+ <body>
83
+ <div style="width: 1080px; background: white">
84
+ <div style="float:left; width: 580px; background: white">
85
+ <iframe width="560" height="315" src="#{url}?start=67&autoplay=1" name="video" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
86
+ <h1>#{@title}</h1>
87
+ </div>
88
+ <div style="float:right; width: 500px; overflow-y: scroll; height: 400px">
89
+ <ul>#{links.join("\n")}</ul>
90
+ </div>
91
+
92
+ </div>
93
+ </body>
94
+ </html>
95
+ EOF
96
+ end
97
+
98
+ # Outputs plain text containing the headings including timestamps
99
+ # note: This can be helpful for copyng and pasting directly into a YouTube comment
100
+ #
101
+ def to_headings()
102
+
103
+ @to_a.select {|timestamp, _| timestamp =~ / /}.map(&:first)
104
+
105
+ end
106
+
107
+ private
108
+
109
+ def fetch_info(id)
110
+
111
+ url = "http://www.youtube.com/oembed?url=http://www.youtube.com/watch?v=#{id}&format=json"
112
+ s = Net::HTTP.get(URI(url))
113
+
114
+ h = JSON.parse(s, symbolize_names: true)
115
+ @title = h[:title]
116
+ @author = h[:author_name]
117
+
118
+ end
119
+
120
+ def parse(s)
121
+
122
+ doc = Rexle.new(s)
123
+
124
+ a = doc.root.elements.each.map do |x|
125
+ timestamp = Subunit.new(units={minutes:60, hours:60}, \
126
+ seconds: x.attributes[:start].to_f).to_s(verbose: false)
127
+ [timestamp, x.text.unescape.gsub("\n", ' ').gsub('&#39;',"'").gsub('&quot;','"')]
128
+ end
129
+
130
+ @to_a = a
131
+
132
+ a2 = []
133
+
134
+ # the following cleans up sentences that start with And, Or, But, So etc.
135
+
136
+ a.each do |time, s|
137
+
138
+ if s[/^[a-z|0-9]/]then
139
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
140
+ elsif s[/^And,? /]
141
+ a2[-1][-1] += ' ' + s.sub(/^And,? /,'').capitalize
142
+ elsif s[/^Or,? /]
143
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
144
+ elsif s[/^But /]
145
+ a2[-1][-1] += ' ' + s.sub(/But,? /,'').capitalize
146
+ elsif s[/^"/]
147
+ a2[-1][-1] = a2[-1][-1].chomp + ' ' + s
148
+ elsif s[/^So,? /]
149
+ a2[-1][-1] += ' ' + s.sub(/^So,? /,'').capitalize
150
+ else
151
+ a2 << [time, s]
152
+ end
153
+
154
+ end
155
+
156
+ # formats the paragraph with the timestamp appearing above
157
+ @a = a2
158
+ a2.map {|time, s| "\n%s\n\n%s" % [time, s]}
159
+
160
+ end
161
+
162
+ end
163
+
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: youtube_transcript2020
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjAwNzI3MjI1MTUyWhcN
15
+ MjEwNzI3MjI1MTUyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQCxfRw+
17
+ xg525jF+UNFVTtUrON2issNxWgDTq1efjPq9yMzqYrIDZREFE/3fgYbtAqA1Ut94
18
+ 2h8mAKnAg1CC4plPA8o15f+h30TPRaxZXFmYUMxTkaLHL4Lvzd1D7eXqRYf9SFQM
19
+ EvoYbncj9QwR57WcVF/MTdwbyyiZo3CGzwmWNb9OCIZtvs8m/UOzAmbfF3lIKz9k
20
+ +ZK03KqYhyjuAiVhF39LdWUc1AWqu5i+JpFE+Lzfqv1uAjjgshmUkHOXkpWOorHc
21
+ uxL0+xZXWgTwpa1QCw3cQY1LW45QjZt4ckA9lOub1LvUTDCvZocNS+dlIUMdW0mP
22
+ jFII/nX/KWxW+NOmkWBpdGbXmY5QTppwx88r+VRpTdhepVcNiiHhMsYQsLI/fzVo
23
+ kWTib/aBnAoahtlbaldC+e03GPsLPmpTl4ZjOFqUuAyq47h42NYt6kPY/y7Gj8To
24
+ fx4pNgddR/r/WABaNao8Q+tzIxgQwCf1rijvfJP+u04GCmIeFm8oQ1x0XkUCAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU1nkRML1E
26
+ Q0PgH/jEHBOQSUTi4MYwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAWjyRzOnO0k/P2YHBsie5hNyJq6q7zb9bto2WYF1L
29
+ N0/cvumuBsJMDUuPlD9RFvzncZbu//hbnZbK6cxiptm9HUN+m7zNi8XUcDHQw4Ba
30
+ 17ZyHWKM2pkf+PJb4waQVeqyUXjbM9r6L8cVa1gkalU6ZpqEtBmkEzJCDZVf0Fll
31
+ KrPYWAW5cC7EWeDm1yxusOqzxnkBcXMnKYNJm8KU4YfVpgPXJy9bTLWhm482BlJm
32
+ v6wUZwYOM9B7x3dWbbsQXSuKmFqoxiNRWaA41qUS6eVjXpd4Gn/diSzntaX/Whew
33
+ dCXyioQY49CVGJg8LpX/zSYUk9dns+fCSeUUfKjv2K8WuzVkS/uMA8DxSeYBfxf5
34
+ ON+xcGIy3Nk7FHwY+CuIIa4WCJYB+1bVFeyCaRlCpwHK8DGUxP5PzCb44USGTI2V
35
+ 42/R+mfGUgXXd9e36R3+wmfHZSFR6p6I6XKToCKca7buvgP2XgO9I04lTYUr0KLi
36
+ 6ZSQYo0XuSVg3by/5kp1TrrS
37
+ -----END CERTIFICATE-----
38
+ date: 2020-07-27 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: subunit
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.5.2
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.5.2
60
+ - !ruby/object:Gem::Dependency
61
+ name: simple-config
62
+ requirement: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - "~>"
65
+ - !ruby/object:Gem::Version
66
+ version: '0.7'
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: 0.7.1
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '0.7'
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: 0.7.1
80
+ description:
81
+ email: james@jamesrobertson.eu
82
+ executables: []
83
+ extensions: []
84
+ extra_rdoc_files: []
85
+ files:
86
+ - lib/youtube_transcript2020.rb
87
+ homepage: https://github.com/jrobertson/youtube_transcript2020
88
+ licenses:
89
+ - MIT
90
+ metadata: {}
91
+ post_install_message:
92
+ rdoc_options: []
93
+ require_paths:
94
+ - lib
95
+ required_ruby_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ requirements: []
106
+ rubygems_version: 3.0.3
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Makes it easier to digest a Youtube video by reading the transcript.
110
+ test_files: []
Binary file