ted_talk 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +20 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +57 -0
- data/Rakefile +8 -0
- data/bin/ted_talk +110 -0
- data/lib/ted_talk.rb +298 -0
- data/lib/ted_talk/download_utils.rb +120 -0
- data/lib/ted_talk/unix_tools.rb +38 -0
- data/lib/ted_talk/version.rb +3 -0
- data/ted_talk.gemspec +27 -0
- data/test/ted_talk_test.rb +29 -0
- metadata +172 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Yoichiro Hasebe
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# TedTalk
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is useful for English learning
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
TedTalk requires [FFmpeg](http://ffmpeg.org/) and [SoX](http://sox.sourceforge.net/) with LAME support, as well as [TagLib](http://taglib.github.com/) audio meta-data library installed to the system
|
10
|
+
|
11
|
+
$ gem install ted_talk
|
12
|
+
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
Basic usage: tedtalk desc <option> - show TED Talk description(s)
|
17
|
+
tedtalk exec <option> - download and convert a TED Talk video
|
18
|
+
tedtalk delete - delete cache folder
|
19
|
+
|
20
|
+
For details about <option>, type:
|
21
|
+
tedtalk desc -h
|
22
|
+
or tedtalk exec -h
|
23
|
+
|
24
|
+
[global options]:
|
25
|
+
--version, -v: Print version and exit
|
26
|
+
--help, -h: Show this message
|
27
|
+
|
28
|
+
### desc
|
29
|
+
|
30
|
+
ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS
|
31
|
+
feed or the URL of a specific talk
|
32
|
+
|
33
|
+
Usage: ted_talk desc <options>
|
34
|
+
where <options> are:
|
35
|
+
|
36
|
+
[desc options]:
|
37
|
+
--lang, -l <s>: Language of description (default: en)
|
38
|
+
--rss, -r: Show descriptions of the newest talks from TED Talk RSS
|
39
|
+
--url, -u <s>: URL of a specific TED Talk
|
40
|
+
--help, -h: Show this message
|
41
|
+
|
42
|
+
### exec
|
43
|
+
|
44
|
+
ted_talk exec subcommand download TED Talk video and convert it to an MP3 file
|
45
|
+
that is modified in a specified fashion
|
46
|
+
|
47
|
+
Usage: ted_talk exec <options>
|
48
|
+
where <options> are:
|
49
|
+
|
50
|
+
[exec options]
|
51
|
+
--url, -u <s>: URL of a specific TED Talk
|
52
|
+
--lang, -l <s>: Language of (bilingual) transcripts (default: en)
|
53
|
+
--outdir, -o <s>: Directory for file output (default: ./)
|
54
|
+
--speed, -s <f>: Speed of output file [0.1 - 100] (default: 1.0)
|
55
|
+
--silence, -i <f>: Length (secondes) of a pause added to each utterance
|
56
|
+
[0.1 - 120] (default: 0.0)
|
57
|
+
--help, -h: Show this message
|
data/Rakefile
ADDED
data/bin/ted_talk
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
require 'rubygems'
|
6
|
+
require 'trollop'
|
7
|
+
require 'ted_talk'
|
8
|
+
|
9
|
+
# http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html
|
10
|
+
# http://www.ted.com/talks/jonathan_haidt_on_the_moral_mind.html
|
11
|
+
# http://www.ted.com/talks/susan_cain_the_power_of_introverts.html
|
12
|
+
# http://www.ted.com/talks/amy_cuddy_your_body_language_shapes_who_you_are.html
|
13
|
+
# http://www.ted.com/talks/ken_robinson_says_schools_kill_creativity.html
|
14
|
+
# http://www.ted.com/talks/pranav_mistry_the_thrilling_potential_of_sixthsense_technology.html
|
15
|
+
# http://www.ted.com/talks/rives_reinventing_the_encyclopedia_game.html
|
16
|
+
# http://www.ted.com/talks/hans_rosling_and_the_magic_washing_machine.html
|
17
|
+
# http://www.ted.com/talks/israel_and_iran_a_love_story.html
|
18
|
+
# http://www.ted.com/talks/lemon_andersen_performs_please_don_t_take_my_air_jordans.html
|
19
|
+
# http://www.ted.com/talks/j_j_abrams_mystery_box.html
|
20
|
+
# http://www.ted.com/talks/robert_gupta_between_music_and_medicine.html
|
21
|
+
|
22
|
+
|
23
|
+
SUB_COMMANDS = ["desc", "exec"]
|
24
|
+
global_opts = Trollop::options do
|
25
|
+
version TedTalk::VERSION
|
26
|
+
banner <<-EOS
|
27
|
+
TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is helpful for English learning
|
28
|
+
|
29
|
+
Basic usage: tedtalk desc <option> - show TED Talk description(s)
|
30
|
+
tedtalk exec <option> - download and convert a TED Talk video
|
31
|
+
tedtalk delete - delete cache folder
|
32
|
+
|
33
|
+
For details about <option>, type:
|
34
|
+
tedtalk desc -h
|
35
|
+
or tedtalk exec -h
|
36
|
+
|
37
|
+
[global options]:
|
38
|
+
EOS
|
39
|
+
|
40
|
+
stop_on SUB_COMMANDS
|
41
|
+
end
|
42
|
+
|
43
|
+
cmd = ARGV.shift # get the subcommand
|
44
|
+
cmd_opts = case cmd
|
45
|
+
|
46
|
+
when "desc" # parse delete options
|
47
|
+
desc_opts = Trollop::options do
|
48
|
+
banner <<-EOS
|
49
|
+
ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS feed or the URL of a specific talk
|
50
|
+
|
51
|
+
Usage: ted_talk desc <options>
|
52
|
+
where <options> are:
|
53
|
+
|
54
|
+
[desc options]:
|
55
|
+
EOS
|
56
|
+
|
57
|
+
opt :lang, "Language of description", :default => "en", :type => :string
|
58
|
+
opt :rss, "Show descriptions of the newest talks from TED Talk RSS", :default => false
|
59
|
+
opt :url, "URL of a specific TED Talk", :type => :string
|
60
|
+
end
|
61
|
+
|
62
|
+
if desc_opts[:url] and /http\:\/\/www\.ted\.com\/talks\// !~ desc_opts[:url]
|
63
|
+
Trollop::die :url, "must include 'http://www.ted.com/talks/'"
|
64
|
+
end
|
65
|
+
|
66
|
+
lang = desc_opts[:lang]
|
67
|
+
|
68
|
+
if source_url = desc_opts[:url]
|
69
|
+
tedtalk = TedTalk::Converter.new(source_url)
|
70
|
+
tedtalk.desc_talk(lang)
|
71
|
+
elsif desc_opts[:rss]
|
72
|
+
TedTalk.desc_talks_rss(lang)
|
73
|
+
else
|
74
|
+
Trollop::die "invalid options"
|
75
|
+
end
|
76
|
+
when "exec" # parse exec options
|
77
|
+
exec_opts = Trollop::options do
|
78
|
+
banner <<-EOS
|
79
|
+
ted_talk exec subcommand download TED Talk video and convert it to an MP3 file that is modified in a specified fashion
|
80
|
+
|
81
|
+
Usage: ted_talk exec <options>
|
82
|
+
where <options> are:
|
83
|
+
|
84
|
+
[exec options]
|
85
|
+
EOS
|
86
|
+
|
87
|
+
opt :url, "URL of a specific TED Talk", :type => :string
|
88
|
+
opt :lang, "Language of (bilingual) transcripts", :default => "en", :type => :string
|
89
|
+
opt :outdir, "Directory for file output", :default=> "./"
|
90
|
+
opt :speed, "Speed of output file [0.1 - 100]", :default => 1.0
|
91
|
+
opt :silence, "Length (secondes) of a pause added to each utterance [0.1 - 120]", :default => 0.0
|
92
|
+
end
|
93
|
+
|
94
|
+
Trollop::die :outdir, "must be an existing directory" unless File::ftype(exec_opts[:outdir]) == "directory"
|
95
|
+
Trollop::die :speed, "must between 0.1 to 100" unless exec_opts[:speed] >= 0.1 and exec_opts[:speed] <= 100
|
96
|
+
Trollop::die :silence, "must be 0 to 120" unless exec_opts[:silence] >= 0 and exec_opts[:silence] <= 120
|
97
|
+
|
98
|
+
source_url = exec_opts[:url]
|
99
|
+
lang = exec_opts[:lang]
|
100
|
+
outdir = exec_opts[:outdir]
|
101
|
+
speed = exec_opts[:speed]
|
102
|
+
silence = exec_opts[:silence]
|
103
|
+
|
104
|
+
tedtalk = TedTalk::Converter.new(source_url)
|
105
|
+
tedtalk.execute(outdir, lang, speed, silence)
|
106
|
+
when "delete"
|
107
|
+
TedTalk.delete_cache
|
108
|
+
else
|
109
|
+
Trollop::die "unknown subcommand #{cmd.inspect}"
|
110
|
+
end
|
data/lib/ted_talk.rb
ADDED
@@ -0,0 +1,298 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift "~/Dropbox/code/speak_slow/lib"
|
5
|
+
|
6
|
+
require 'speak_slow'
|
7
|
+
require 'json'
|
8
|
+
require 'net/http'
|
9
|
+
require 'digest/md5'
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'progressbar'
|
13
|
+
require 'taglib'
|
14
|
+
require 'nokogiri'
|
15
|
+
|
16
|
+
require 'ted_talk/version'
|
17
|
+
require 'ted_talk/download_utils'
|
18
|
+
require 'ted_talk/unix_tools'
|
19
|
+
|
20
|
+
FFMPEG = "/usr/local/bin/ffmpeg"
|
21
|
+
|
22
|
+
CACHE_DIR = File.expand_path(File.dirname(__FILE__)) + "/../cache"
|
23
|
+
|
24
|
+
INTRO_DURATION = 16500
|
25
|
+
AD_DURATION = 4000
|
26
|
+
POST_AD_DURATION = 2000
|
27
|
+
|
28
|
+
Dir.mkdir(CACHE_DIR) unless File.exists?(CACHE_DIR)
|
29
|
+
|
30
|
+
module TedTalk
|
31
|
+
|
32
|
+
def self.delete_cache
|
33
|
+
UnixTools.delete_dir(CACHE_DIR)
|
34
|
+
puts "Cache folder has been deleted"
|
35
|
+
return true
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.desc_talks_rss(lang, num = 12)
|
39
|
+
if lang != "en"
|
40
|
+
html = DownloadUtils.get_html("http://www.ted.com/translate/languages/#{lang}", true)
|
41
|
+
html_doc = Nokogiri::HTML(html)
|
42
|
+
puts "--------------------------------------------------"
|
43
|
+
html_doc.xpath("//div[@id='list']//dd//a[1]").each do |link|
|
44
|
+
puts link.attribute("title")
|
45
|
+
puts link.attribute("href").text.sub(/\A\//, "http://www.ted.com/")
|
46
|
+
puts "--------------------------------------------------"
|
47
|
+
end
|
48
|
+
else
|
49
|
+
rss_html = DownloadUtils.get_html("http://feeds.feedburner.com/tedtalks_video", true)
|
50
|
+
rss_doc = Nokogiri::XML(rss_html)
|
51
|
+
talks = rss_doc.xpath("//item")
|
52
|
+
puts "--------------------------------------------------"
|
53
|
+
talks.each_with_index do |talk, index|
|
54
|
+
puts title = talk.xpath("title").text
|
55
|
+
puts pubdate = talk.xpath("pubDate").text
|
56
|
+
puts category = talk.xpath("category").text
|
57
|
+
# puts source_url = DownloadUtils.get_final_location(talk.xpath("link").text).sub(/\?.+\z/, "")
|
58
|
+
puts source_url = talk.xpath("feedburner:origLink").text
|
59
|
+
puts description = talk.xpath("description").text
|
60
|
+
puts "--------------------------------------------------"
|
61
|
+
break if index + 1 == num
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Converter
|
67
|
+
include DownloadUtils
|
68
|
+
include UnixTools
|
69
|
+
|
70
|
+
def initialize(url)
|
71
|
+
begin
|
72
|
+
if /(?:http\:\/\/)?(?:www\.)?ted\.com\/talks\/(?:lang\/[^\/]+\/)?(.+\.html)/ =~ url
|
73
|
+
@url = "http://www.ted.com/talks/" + $1
|
74
|
+
else
|
75
|
+
puts "The specified URL does not seem to be a valid one"
|
76
|
+
exit
|
77
|
+
end
|
78
|
+
if html = get_html(@url)
|
79
|
+
@html = html
|
80
|
+
else
|
81
|
+
puts "The specified URL does not respond with a TED Talk content"
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
@url_basename = File.basename(@url)
|
85
|
+
ted_doc = Nokogiri::HTML(@html)
|
86
|
+
data = ted_doc.xpath("//div[@id='share_and_save']").first
|
87
|
+
@ted_id = data.attribute("data-id").value
|
88
|
+
@video_url = ted_doc.xpath("//a[@id='no-flash-video-download']").attribute("href").value
|
89
|
+
@basename = File.basename(@video_url, ".*")
|
90
|
+
@captions = {}
|
91
|
+
@title = ted_doc.xpath("//h1[1]").text.strip rescue ""
|
92
|
+
@speaker = @title.split(":", 2).first.strip rescue ""
|
93
|
+
@available_langs = []
|
94
|
+
ted_doc.xpath("//select[@id='languageCode'][1]/option").collect do |op|
|
95
|
+
v = op.attributes["value"].value.strip
|
96
|
+
@available_langs << v if v != ""
|
97
|
+
end
|
98
|
+
@available_langs.sort!
|
99
|
+
@titles = {}
|
100
|
+
@titles["en"] = get_title("en")
|
101
|
+
@descriptions = {}
|
102
|
+
@descriptions["en"] = get_description("en")
|
103
|
+
@language_hash = list_langs
|
104
|
+
rescue => e
|
105
|
+
puts "The specified URL does not seem to contain a regular TED Talk contents"
|
106
|
+
exit
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def setup_lang(lang)
|
111
|
+
unless @available_langs.index lang
|
112
|
+
puts "Description in #{lang} is not available"
|
113
|
+
return false
|
114
|
+
end
|
115
|
+
@lang = lang
|
116
|
+
if lang != "en"
|
117
|
+
@titles[lang] = get_title(lang)
|
118
|
+
@descriptions[lang] = get_description(lang)
|
119
|
+
@lang_name = @language_hash[@lang]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def desc_talk(lang = "en")
|
124
|
+
setup_lang(lang)
|
125
|
+
unless @descriptions[lang]
|
126
|
+
lang = "en"
|
127
|
+
end
|
128
|
+
puts "\nTitle:\n" + @titles["en"]
|
129
|
+
puts @titles[lang] if lang != "en"
|
130
|
+
puts ""
|
131
|
+
puts "Description:\n" + @descriptions[lang]
|
132
|
+
puts ""
|
133
|
+
puts "Available Languages: "
|
134
|
+
@available_langs.each do |lang_code|
|
135
|
+
lang_name = @language_hash[lang_code]
|
136
|
+
puts " " + lang_name + ": " + lang_code
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def execute(outdir = "./", lang = "en", speed = 1, silence = 0)
|
141
|
+
puts "TedTalk is prepararing for the process"
|
142
|
+
@outdir = File.join(outdir, @ted_id + "-" + @basename)
|
143
|
+
Dir.mkdir(@outdir) unless File.exists?(@outdir)
|
144
|
+
|
145
|
+
@speed = speed
|
146
|
+
@silence = silence
|
147
|
+
@lang = lang
|
148
|
+
get_captions("en")
|
149
|
+
setup_lang(lang)
|
150
|
+
get_captions(lang)
|
151
|
+
video_filepath = get_binary(@video_url)
|
152
|
+
wav_filepath = get_wav(video_filepath)
|
153
|
+
outfile = @outdir + "/" + @basename + "-result.mp3"
|
154
|
+
speakslow = SpeakSlow::Converter.new(wav_filepath, outfile)
|
155
|
+
speakslow.execute(speed, silence)
|
156
|
+
write_info(outfile)
|
157
|
+
end
|
158
|
+
|
159
|
+
def get_title(lang)
|
160
|
+
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
|
161
|
+
html = get_html(lang_url)
|
162
|
+
lang_doc = Nokogiri::HTML(html)
|
163
|
+
lang_doc.xpath("//meta[@name='title']").first.attribute("content").value.split("|").first.strip rescue ""
|
164
|
+
end
|
165
|
+
|
166
|
+
def get_description(lang)
|
167
|
+
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
|
168
|
+
html = get_html(lang_url)
|
169
|
+
lang_doc = Nokogiri::HTML(html)
|
170
|
+
temp = lang_doc.xpath("//meta[@name='description']").first.attribute("content").value.strip
|
171
|
+
/\ATED Talks\s*(.+)\z/ =~ temp
|
172
|
+
$1 rescue temp ""
|
173
|
+
end
|
174
|
+
|
175
|
+
def get_captions(lang = "en")
|
176
|
+
unless @available_langs.index(lang)
|
177
|
+
puts "Caption in #{lang} is not available"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
json_url = "http://www.ted.com/talks/subtitles/id/#{@ted_id}"
|
181
|
+
json_url << "/lang/#{lang}" unless lang == "en"
|
182
|
+
script_json = get_json(json_url)
|
183
|
+
num_total_captions = script_json["captions"].size
|
184
|
+
num_digits = num_total_captions.to_s.split(//).size
|
185
|
+
captions = [{:id => sprintf("%0#{num_digits}d", 0),
|
186
|
+
:start_time_s => "00.00.00",
|
187
|
+
:duration => nil,
|
188
|
+
:content => "",
|
189
|
+
:start_of_paragraph => false,
|
190
|
+
:start_time => 0
|
191
|
+
}]
|
192
|
+
script_json["captions"].each_with_index do |caption, index|
|
193
|
+
result = {}
|
194
|
+
result[:id] = sprintf("%0#{num_digits}d", index + 1)
|
195
|
+
result[:start_time] = INTRO_DURATION - AD_DURATION + POST_AD_DURATION + caption["startTime"].to_i
|
196
|
+
result[:start_time_s] = format_time(result[:start_time])
|
197
|
+
result[:duration] = caption["duration"].to_i
|
198
|
+
result[:content] = caption["content"].gsub(/\s+/, " ")
|
199
|
+
result[:end_time_s] = format_time(result[:start_time] + caption["duration"].to_i)
|
200
|
+
result[:start_of_paragraph] = caption["startOfParagraph"]
|
201
|
+
if index == 0
|
202
|
+
intro_duration =
|
203
|
+
captions[0][:duration] = result[:start_time]
|
204
|
+
end
|
205
|
+
captions << result
|
206
|
+
end
|
207
|
+
lang_sym = lang
|
208
|
+
File.open(@outdir + "/" + @basename + "-" + lang + ".txt", "w") do |f|
|
209
|
+
f.write format_captions(captions)
|
210
|
+
end
|
211
|
+
@captions[lang_sym] = captions
|
212
|
+
return captions
|
213
|
+
end
|
214
|
+
|
215
|
+
def list_langs
|
216
|
+
language_hash = {}
|
217
|
+
lang_url = "http://www.ted.com/translate/languages"
|
218
|
+
html = get_html(lang_url)
|
219
|
+
ted_doc = Nokogiri::HTML(html)
|
220
|
+
data = ted_doc.xpath("//div[@id='content'][1]//ul//a").each do |lang|
|
221
|
+
lang_name = lang.text
|
222
|
+
lang_code = lang.attribute("href").value.split("/")[-1].strip
|
223
|
+
language_hash[lang_code] = lang_name.sub(/\(.+?\)/){""}.strip
|
224
|
+
end
|
225
|
+
return language_hash
|
226
|
+
end
|
227
|
+
|
228
|
+
def write_info(filepath)
|
229
|
+
puts "Writing captions to MP3"
|
230
|
+
TagLib::MPEG::File.open(filepath) do |mp3|
|
231
|
+
tag = mp3.id3v2_tag
|
232
|
+
tag.artist = "TED Talk "
|
233
|
+
tag.title = @title
|
234
|
+
tag.title += " (with captions in #{@lang_name})" if @lang_name
|
235
|
+
tag.title += " [x#{@speed}]" if @speed and @speed != 1
|
236
|
+
tag.genre = "Talk"
|
237
|
+
|
238
|
+
caption_text = @titles["en"] + "\n"
|
239
|
+
caption_text << @titles[@lang] + "\n" if @titles[@lang]
|
240
|
+
caption_text << "--------------------\n"
|
241
|
+
caption_text << @descriptions["en"] + "\n"
|
242
|
+
caption_text << @descriptions[@lang] + "\n" if @descriptions[@lang]
|
243
|
+
caption_text << "\n"
|
244
|
+
@captions["en"].each_with_index do |c, index|
|
245
|
+
caption_text << "--------------------\n\n" if c[:start_of_paragraph]
|
246
|
+
next if c[:content] == ""
|
247
|
+
caption_text << c[:content] + "\n"
|
248
|
+
if @captions[@lang]
|
249
|
+
bl_content = @captions[@lang][index][:content] + "\n\n" rescue ""
|
250
|
+
caption_text << bl_content
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
uslt = TagLib::ID3v2::UnsynchronizedLyricsFrame.new
|
255
|
+
uslt.language = "eng"
|
256
|
+
uslt.text_encoding = TagLib::String::UTF8
|
257
|
+
uslt.text = caption_text
|
258
|
+
|
259
|
+
tag.add_frame(uslt)
|
260
|
+
mp3.save
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def format_captions(captions)
|
265
|
+
lang_name = @lang_name || "English"
|
266
|
+
result = "TED Talk ID: #{@ted_id}\n"
|
267
|
+
result << "Speaker: #{@speaker}\n"
|
268
|
+
result << "Title: #{@title} (with captions in #{lang_name})\n"
|
269
|
+
result << "URL: #{@url}\n\n"
|
270
|
+
num_digits = captions.size.to_s.split(//).size
|
271
|
+
captions.each_with_index do |c, index|
|
272
|
+
index_s = sprintf("%0#{num_digits}d", index + 1)
|
273
|
+
result << "\n" if c[:start_of_paragraph]
|
274
|
+
result << "#{index_s} #{c[:content]} \n"
|
275
|
+
# result << "#{index_s} #{c[:start_time_s]} #{c[:content]} \n"
|
276
|
+
end
|
277
|
+
return result
|
278
|
+
end
|
279
|
+
|
280
|
+
def format_time(time)
|
281
|
+
millis = time % 1000 / 10
|
282
|
+
millis_s = sprintf("%02d", millis)
|
283
|
+
total_seconds = time / 1000
|
284
|
+
minutes = total_seconds / 60
|
285
|
+
seconds = total_seconds - minutes * 60
|
286
|
+
seconds_s = sprintf("%02d", seconds)
|
287
|
+
minutes_s = sprintf("%02d", minutes)
|
288
|
+
minutes_s = sprintf("%02d", minutes)
|
289
|
+
minutes_s + "." + seconds_s + "." + millis_s
|
290
|
+
end
|
291
|
+
|
292
|
+
def get_video_urls(html)
|
293
|
+
videos = html.scan(/http\:\/\/download.ted.com\/talks\/#{@basename}.*?\.mp4/).sort
|
294
|
+
end
|
295
|
+
|
296
|
+
end # of class
|
297
|
+
end # of module
|
298
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require "rubygems"
|
5
|
+
require "progressbar"
|
6
|
+
|
7
|
+
$:.unshift File.expand_path(File.dirname(__FILE__))
|
8
|
+
require 'unix_tools'
|
9
|
+
|
10
|
+
module DownloadUtils
|
11
|
+
def get_html(url, without_cache = false)
|
12
|
+
url = get_final_location(url)
|
13
|
+
key = Digest::MD5.new.update(url).to_s
|
14
|
+
html = ""
|
15
|
+
if File.exists?(CACHE_DIR + "/" + key) and !without_cache
|
16
|
+
html = File.read(CACHE_DIR + "/" + key)
|
17
|
+
else
|
18
|
+
begin
|
19
|
+
uri = URI(url)
|
20
|
+
res = Net::HTTP.get_response(uri)
|
21
|
+
if res.is_a?(Net::HTTPSuccess)
|
22
|
+
html = res.body
|
23
|
+
else
|
24
|
+
puts "HTML download error"
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
File.open(CACHE_DIR + "/" + key, "w") do |f|
|
28
|
+
f.write html
|
29
|
+
end
|
30
|
+
rescue => e
|
31
|
+
puts "Not able to download HTML"
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
return html
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_json(url, without_cache = false)
|
39
|
+
url = get_final_location(url)
|
40
|
+
key = Digest::MD5.new.update(url).to_s
|
41
|
+
script = nil
|
42
|
+
if File.exists?(CACHE_DIR + "/" + key) and !without_cache
|
43
|
+
json_text = File.read(CACHE_DIR + "/" + key)
|
44
|
+
script = JSON.parse(json_text)
|
45
|
+
else
|
46
|
+
begin
|
47
|
+
uri = URI(url)
|
48
|
+
res = Net::HTTP.get_response(uri)
|
49
|
+
json_text = res.body
|
50
|
+
script = JSON.parse(json_text)
|
51
|
+
File.open(CACHE_DIR + "/" + key, "w") do |f|
|
52
|
+
f.write JSON.pretty_generate script
|
53
|
+
end
|
54
|
+
rescue => e
|
55
|
+
puts "Not able to download HTML"
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
end
|
59
|
+
return script
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_binary(url, without_cache = false)
|
63
|
+
url = get_final_location(url)
|
64
|
+
basename = File.basename(url)
|
65
|
+
filepath = CACHE_DIR + "/" + basename
|
66
|
+
return filepath if File.exists? filepath
|
67
|
+
file = File.new(filepath, "wb")
|
68
|
+
file_size = 0
|
69
|
+
uri = URI(url)
|
70
|
+
puts "Downloading file: " + basename
|
71
|
+
Net::HTTP.start(uri.host, uri.port) do |http|
|
72
|
+
http.request_get(uri.request_uri) do |res|
|
73
|
+
file_size = res.read_header["content-length"].to_i
|
74
|
+
bar = ProgressBar.new(basename, file_size)
|
75
|
+
bar.file_transfer_mode
|
76
|
+
res.read_body do |segment|
|
77
|
+
bar.inc(segment.size)
|
78
|
+
file.write(segment)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
file.close
|
83
|
+
print "\n"
|
84
|
+
download_successful?(filepath, file_size) ? filepath : false
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_wav(video_filepath)
|
88
|
+
ffmpeg = UnixTools::check_command(FFMPEG)
|
89
|
+
basename = File.basename(video_filepath, ".*")
|
90
|
+
filepath = CACHE_DIR + "/" + basename + ".wav"
|
91
|
+
return filepath if File.exists? filepath
|
92
|
+
puts "Converting to audio: #{basename}.wav"
|
93
|
+
`#{ffmpeg} -loglevel panic -i #{video_filepath} -ac 1 -vn -acodec pcm_s16le -ar 44100 #{filepath}`
|
94
|
+
return filepath
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_final_location(url)
|
98
|
+
begin
|
99
|
+
Net::HTTP.get_response(URI(url)) do |res|
|
100
|
+
location = res["location"]
|
101
|
+
return url if location.nil?
|
102
|
+
return get_final_location(location)
|
103
|
+
end
|
104
|
+
rescue => e
|
105
|
+
puts "Not able to reach at the final location"
|
106
|
+
return url
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def download_successful?(full_file_path, file_size)
|
111
|
+
File.exist?(full_file_path) && File.size(full_file_path) == file_size
|
112
|
+
end
|
113
|
+
|
114
|
+
module_function :get_html
|
115
|
+
module_function :get_json
|
116
|
+
module_function :get_binary
|
117
|
+
module_function :get_wav
|
118
|
+
module_function :get_final_location
|
119
|
+
module_function :download_successful?
|
120
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift File.expand_path(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
module UnixTools
|
7
|
+
|
8
|
+
def delete_dir(directory_path)
|
9
|
+
if FileTest.directory?(directory_path)
|
10
|
+
Dir.foreach(directory_path) do |file|
|
11
|
+
next if /^\.+$/ =~ file
|
12
|
+
delete_dir(directory_path.sub(/\/+$/,"") + "/" + file )
|
13
|
+
end
|
14
|
+
Dir.rmdir(directory_path) rescue ""
|
15
|
+
else
|
16
|
+
File.delete(directory_path) rescue ""
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def check_command(command)
|
21
|
+
basename = File.basename(command)
|
22
|
+
path = ""
|
23
|
+
print "Checking #{basename} command: "
|
24
|
+
if open("| which #{command} 2>/dev/null"){ |f| path = f.gets }
|
25
|
+
puts "detected at #{path}"
|
26
|
+
return path.strip
|
27
|
+
elsif open("| which #{basename} 2>/dev/null"){ |f| path = f.gets }
|
28
|
+
puts "detected at #{path}"
|
29
|
+
return path.strip
|
30
|
+
else
|
31
|
+
puts "not installed to the system"
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
module_function :check_command
|
37
|
+
module_function :delete_dir
|
38
|
+
end
|
data/ted_talk.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ted_talk/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "ted_talk"
|
8
|
+
gem.version = TedTalk::VERSION
|
9
|
+
gem.authors = ["Yoichiro Hasebe"]
|
10
|
+
gem.email = ["yohasebe@gmail.com"]
|
11
|
+
gem.description = "TedTalk helps download TED talk video "
|
12
|
+
gem.description += "and covert it to a slowed down MP3 with pauses that is useful for English learning"
|
13
|
+
gem.summary = "TED talk downloader and converter for English learners"
|
14
|
+
gem.homepage = "http://github.com/yohasebe/ted_talk"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split($/)
|
17
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
18
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
gem.add_development_dependency "minitest"
|
21
|
+
gem.add_runtime_dependency "progressbar"
|
22
|
+
gem.add_runtime_dependency "json"
|
23
|
+
gem.add_runtime_dependency "taglib-ruby"
|
24
|
+
gem.add_runtime_dependency "nokogiri"
|
25
|
+
gem.add_runtime_dependency "speak_slow"
|
26
|
+
gem.add_runtime_dependency "trollop"
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'ted_talk'
|
3
|
+
|
4
|
+
class TestTedTalk < MiniTest::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@source_url = "http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html"
|
8
|
+
@outdir = File.expand_path(File.dirname(__FILE__)) + "/temp"
|
9
|
+
# `rm -rf #{@outdir}` if File.exists? @outdir
|
10
|
+
`mkdir #{@outdir}` unless File.exists? @outdir
|
11
|
+
@tedtalk = TedTalk::Converter.new(@source_url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_description
|
15
|
+
@tedtalk.desc_talk("ja")
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_execution
|
19
|
+
speed = 0.8
|
20
|
+
silence = 3
|
21
|
+
language = "ja"
|
22
|
+
@tedtalk.execute(@outdir, language, speed, silence)
|
23
|
+
end
|
24
|
+
|
25
|
+
def teardown
|
26
|
+
# `rm -rf #{@outdir}`
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ted_talk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yoichiro Hasebe
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: minitest
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: progressbar
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: json
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: taglib-ruby
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: nokogiri
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: speak_slow
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: trollop
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
description: TedTalk helps download TED talk video and covert it to a slowed down
|
127
|
+
MP3 with pauses that is useful for English learning
|
128
|
+
email:
|
129
|
+
- yohasebe@gmail.com
|
130
|
+
executables:
|
131
|
+
- ted_talk
|
132
|
+
extensions: []
|
133
|
+
extra_rdoc_files: []
|
134
|
+
files:
|
135
|
+
- .gitignore
|
136
|
+
- Gemfile
|
137
|
+
- LICENSE.txt
|
138
|
+
- README.md
|
139
|
+
- Rakefile
|
140
|
+
- bin/ted_talk
|
141
|
+
- lib/ted_talk.rb
|
142
|
+
- lib/ted_talk/download_utils.rb
|
143
|
+
- lib/ted_talk/unix_tools.rb
|
144
|
+
- lib/ted_talk/version.rb
|
145
|
+
- ted_talk.gemspec
|
146
|
+
- test/ted_talk_test.rb
|
147
|
+
homepage: http://github.com/yohasebe/ted_talk
|
148
|
+
licenses: []
|
149
|
+
post_install_message:
|
150
|
+
rdoc_options: []
|
151
|
+
require_paths:
|
152
|
+
- lib
|
153
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
154
|
+
none: false
|
155
|
+
requirements:
|
156
|
+
- - ! '>='
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ! '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
requirements: []
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 1.8.24
|
168
|
+
signing_key:
|
169
|
+
specification_version: 3
|
170
|
+
summary: TED talk downloader and converter for English learners
|
171
|
+
test_files:
|
172
|
+
- test/ted_talk_test.rb
|