ted_talk 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +20 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +57 -0
- data/Rakefile +8 -0
- data/bin/ted_talk +110 -0
- data/lib/ted_talk.rb +298 -0
- data/lib/ted_talk/download_utils.rb +120 -0
- data/lib/ted_talk/unix_tools.rb +38 -0
- data/lib/ted_talk/version.rb +3 -0
- data/ted_talk.gemspec +27 -0
- data/test/ted_talk_test.rb +29 -0
- metadata +172 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Yoichiro Hasebe
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# TedTalk
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is useful for English learning
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
TedTalk requires [FFmpeg](http://ffmpeg.org/) and [SoX](http://sox.sourceforge.net/) with LAME support, as well as [TagLib](http://taglib.github.com/) audio meta-data library installed to the system
|
10
|
+
|
11
|
+
$ gem install ted_talk
|
12
|
+
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
Basic usage: tedtalk desc <option> - show TED Talk description(s)
|
17
|
+
tedtalk exec <option> - download and convert a TED Talk video
|
18
|
+
tedtalk delete - delete cache folder
|
19
|
+
|
20
|
+
For details about <option>, type:
|
21
|
+
tedtalk desc -h
|
22
|
+
or tedtalk exec -h
|
23
|
+
|
24
|
+
[global options]:
|
25
|
+
--version, -v: Print version and exit
|
26
|
+
--help, -h: Show this message
|
27
|
+
|
28
|
+
### desc
|
29
|
+
|
30
|
+
ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS
|
31
|
+
feed or the URL of a specific talk
|
32
|
+
|
33
|
+
Usage: ted_talk desc <options>
|
34
|
+
where <options> are:
|
35
|
+
|
36
|
+
[desc options]:
|
37
|
+
--lang, -l <s>: Language of description (default: en)
|
38
|
+
--rss, -r: Show descriptions of the newest talks from TED Talk RSS
|
39
|
+
--url, -u <s>: URL of a specific TED Talk
|
40
|
+
--help, -h: Show this message
|
41
|
+
|
42
|
+
### exec
|
43
|
+
|
44
|
+
ted_talk exec subcommand download TED Talk video and convert it to an MP3 file
|
45
|
+
that is modified in a specified fashion
|
46
|
+
|
47
|
+
Usage: ted_talk exec <options>
|
48
|
+
where <options> are:
|
49
|
+
|
50
|
+
[exec options]
|
51
|
+
--url, -u <s>: URL of a specific TED Talk
|
52
|
+
--lang, -l <s>: Language of (bilingual) transcripts (default: en)
|
53
|
+
--outdir, -o <s>: Directory for file output (default: ./)
|
54
|
+
--speed, -s <f>: Speed of output file [0.1 - 100] (default: 1.0)
|
55
|
+
--silence, -i <f>: Length (secondes) of a pause added to each utterance
|
56
|
+
[0.1 - 120] (default: 0.0)
|
57
|
+
--help, -h: Show this message
|
data/Rakefile
ADDED
data/bin/ted_talk
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
|
5
|
+
require 'rubygems'
|
6
|
+
require 'trollop'
|
7
|
+
require 'ted_talk'
|
8
|
+
|
9
|
+
# http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html
|
10
|
+
# http://www.ted.com/talks/jonathan_haidt_on_the_moral_mind.html
|
11
|
+
# http://www.ted.com/talks/susan_cain_the_power_of_introverts.html
|
12
|
+
# http://www.ted.com/talks/amy_cuddy_your_body_language_shapes_who_you_are.html
|
13
|
+
# http://www.ted.com/talks/ken_robinson_says_schools_kill_creativity.html
|
14
|
+
# http://www.ted.com/talks/pranav_mistry_the_thrilling_potential_of_sixthsense_technology.html
|
15
|
+
# http://www.ted.com/talks/rives_reinventing_the_encyclopedia_game.html
|
16
|
+
# http://www.ted.com/talks/hans_rosling_and_the_magic_washing_machine.html
|
17
|
+
# http://www.ted.com/talks/israel_and_iran_a_love_story.html
|
18
|
+
# http://www.ted.com/talks/lemon_andersen_performs_please_don_t_take_my_air_jordans.html
|
19
|
+
# http://www.ted.com/talks/j_j_abrams_mystery_box.html
|
20
|
+
# http://www.ted.com/talks/robert_gupta_between_music_and_medicine.html
|
21
|
+
|
22
|
+
|
23
|
+
SUB_COMMANDS = ["desc", "exec"]
|
24
|
+
global_opts = Trollop::options do
|
25
|
+
version TedTalk::VERSION
|
26
|
+
banner <<-EOS
|
27
|
+
TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is helpful for English learning
|
28
|
+
|
29
|
+
Basic usage: tedtalk desc <option> - show TED Talk description(s)
|
30
|
+
tedtalk exec <option> - download and convert a TED Talk video
|
31
|
+
tedtalk delete - delete cache folder
|
32
|
+
|
33
|
+
For details about <option>, type:
|
34
|
+
tedtalk desc -h
|
35
|
+
or tedtalk exec -h
|
36
|
+
|
37
|
+
[global options]:
|
38
|
+
EOS
|
39
|
+
|
40
|
+
stop_on SUB_COMMANDS
|
41
|
+
end
|
42
|
+
|
43
|
+
cmd = ARGV.shift # get the subcommand
|
44
|
+
cmd_opts = case cmd
|
45
|
+
|
46
|
+
when "desc" # parse delete options
|
47
|
+
desc_opts = Trollop::options do
|
48
|
+
banner <<-EOS
|
49
|
+
ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS feed or the URL of a specific talk
|
50
|
+
|
51
|
+
Usage: ted_talk desc <options>
|
52
|
+
where <options> are:
|
53
|
+
|
54
|
+
[desc options]:
|
55
|
+
EOS
|
56
|
+
|
57
|
+
opt :lang, "Language of description", :default => "en", :type => :string
|
58
|
+
opt :rss, "Show descriptions of the newest talks from TED Talk RSS", :default => false
|
59
|
+
opt :url, "URL of a specific TED Talk", :type => :string
|
60
|
+
end
|
61
|
+
|
62
|
+
if desc_opts[:url] and /http\:\/\/www\.ted\.com\/talks\// !~ desc_opts[:url]
|
63
|
+
Trollop::die :url, "must include 'http://www.ted.com/talks/'"
|
64
|
+
end
|
65
|
+
|
66
|
+
lang = desc_opts[:lang]
|
67
|
+
|
68
|
+
if source_url = desc_opts[:url]
|
69
|
+
tedtalk = TedTalk::Converter.new(source_url)
|
70
|
+
tedtalk.desc_talk(lang)
|
71
|
+
elsif desc_opts[:rss]
|
72
|
+
TedTalk.desc_talks_rss(lang)
|
73
|
+
else
|
74
|
+
Trollop::die "invalid options"
|
75
|
+
end
|
76
|
+
when "exec" # parse exec options
|
77
|
+
exec_opts = Trollop::options do
|
78
|
+
banner <<-EOS
|
79
|
+
ted_talk exec subcommand download TED Talk video and convert it to an MP3 file that is modified in a specified fashion
|
80
|
+
|
81
|
+
Usage: ted_talk exec <options>
|
82
|
+
where <options> are:
|
83
|
+
|
84
|
+
[exec options]
|
85
|
+
EOS
|
86
|
+
|
87
|
+
opt :url, "URL of a specific TED Talk", :type => :string
|
88
|
+
opt :lang, "Language of (bilingual) transcripts", :default => "en", :type => :string
|
89
|
+
opt :outdir, "Directory for file output", :default=> "./"
|
90
|
+
opt :speed, "Speed of output file [0.1 - 100]", :default => 1.0
|
91
|
+
opt :silence, "Length (secondes) of a pause added to each utterance [0.1 - 120]", :default => 0.0
|
92
|
+
end
|
93
|
+
|
94
|
+
Trollop::die :outdir, "must be an existing directory" unless File::ftype(exec_opts[:outdir]) == "directory"
|
95
|
+
Trollop::die :speed, "must between 0.1 to 100" unless exec_opts[:speed] >= 0.1 and exec_opts[:speed] <= 100
|
96
|
+
Trollop::die :silence, "must be 0 to 120" unless exec_opts[:silence] >= 0 and exec_opts[:silence] <= 120
|
97
|
+
|
98
|
+
source_url = exec_opts[:url]
|
99
|
+
lang = exec_opts[:lang]
|
100
|
+
outdir = exec_opts[:outdir]
|
101
|
+
speed = exec_opts[:speed]
|
102
|
+
silence = exec_opts[:silence]
|
103
|
+
|
104
|
+
tedtalk = TedTalk::Converter.new(source_url)
|
105
|
+
tedtalk.execute(outdir, lang, speed, silence)
|
106
|
+
when "delete"
|
107
|
+
TedTalk.delete_cache
|
108
|
+
else
|
109
|
+
Trollop::die "unknown subcommand #{cmd.inspect}"
|
110
|
+
end
|
data/lib/ted_talk.rb
ADDED
@@ -0,0 +1,298 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift "~/Dropbox/code/speak_slow/lib"
|
5
|
+
|
6
|
+
require 'speak_slow'
|
7
|
+
require 'json'
|
8
|
+
require 'net/http'
|
9
|
+
require 'digest/md5'
|
10
|
+
|
11
|
+
require 'rubygems'
|
12
|
+
require 'progressbar'
|
13
|
+
require 'taglib'
|
14
|
+
require 'nokogiri'
|
15
|
+
|
16
|
+
require 'ted_talk/version'
|
17
|
+
require 'ted_talk/download_utils'
|
18
|
+
require 'ted_talk/unix_tools'
|
19
|
+
|
20
|
+
FFMPEG = "/usr/local/bin/ffmpeg"
|
21
|
+
|
22
|
+
CACHE_DIR = File.expand_path(File.dirname(__FILE__)) + "/../cache"
|
23
|
+
|
24
|
+
INTRO_DURATION = 16500
|
25
|
+
AD_DURATION = 4000
|
26
|
+
POST_AD_DURATION = 2000
|
27
|
+
|
28
|
+
Dir.mkdir(CACHE_DIR) unless File.exists?(CACHE_DIR)
|
29
|
+
|
30
|
+
module TedTalk
|
31
|
+
|
32
|
+
def self.delete_cache
|
33
|
+
UnixTools.delete_dir(CACHE_DIR)
|
34
|
+
puts "Cache folder has been deleted"
|
35
|
+
return true
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.desc_talks_rss(lang, num = 12)
|
39
|
+
if lang != "en"
|
40
|
+
html = DownloadUtils.get_html("http://www.ted.com/translate/languages/#{lang}", true)
|
41
|
+
html_doc = Nokogiri::HTML(html)
|
42
|
+
puts "--------------------------------------------------"
|
43
|
+
html_doc.xpath("//div[@id='list']//dd//a[1]").each do |link|
|
44
|
+
puts link.attribute("title")
|
45
|
+
puts link.attribute("href").text.sub(/\A\//, "http://www.ted.com/")
|
46
|
+
puts "--------------------------------------------------"
|
47
|
+
end
|
48
|
+
else
|
49
|
+
rss_html = DownloadUtils.get_html("http://feeds.feedburner.com/tedtalks_video", true)
|
50
|
+
rss_doc = Nokogiri::XML(rss_html)
|
51
|
+
talks = rss_doc.xpath("//item")
|
52
|
+
puts "--------------------------------------------------"
|
53
|
+
talks.each_with_index do |talk, index|
|
54
|
+
puts title = talk.xpath("title").text
|
55
|
+
puts pubdate = talk.xpath("pubDate").text
|
56
|
+
puts category = talk.xpath("category").text
|
57
|
+
# puts source_url = DownloadUtils.get_final_location(talk.xpath("link").text).sub(/\?.+\z/, "")
|
58
|
+
puts source_url = talk.xpath("feedburner:origLink").text
|
59
|
+
puts description = talk.xpath("description").text
|
60
|
+
puts "--------------------------------------------------"
|
61
|
+
break if index + 1 == num
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
class Converter
|
67
|
+
include DownloadUtils
|
68
|
+
include UnixTools
|
69
|
+
|
70
|
+
def initialize(url)
|
71
|
+
begin
|
72
|
+
if /(?:http\:\/\/)?(?:www\.)?ted\.com\/talks\/(?:lang\/[^\/]+\/)?(.+\.html)/ =~ url
|
73
|
+
@url = "http://www.ted.com/talks/" + $1
|
74
|
+
else
|
75
|
+
puts "The specified URL does not seem to be a valid one"
|
76
|
+
exit
|
77
|
+
end
|
78
|
+
if html = get_html(@url)
|
79
|
+
@html = html
|
80
|
+
else
|
81
|
+
puts "The specified URL does not respond with a TED Talk content"
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
@url_basename = File.basename(@url)
|
85
|
+
ted_doc = Nokogiri::HTML(@html)
|
86
|
+
data = ted_doc.xpath("//div[@id='share_and_save']").first
|
87
|
+
@ted_id = data.attribute("data-id").value
|
88
|
+
@video_url = ted_doc.xpath("//a[@id='no-flash-video-download']").attribute("href").value
|
89
|
+
@basename = File.basename(@video_url, ".*")
|
90
|
+
@captions = {}
|
91
|
+
@title = ted_doc.xpath("//h1[1]").text.strip rescue ""
|
92
|
+
@speaker = @title.split(":", 2).first.strip rescue ""
|
93
|
+
@available_langs = []
|
94
|
+
ted_doc.xpath("//select[@id='languageCode'][1]/option").collect do |op|
|
95
|
+
v = op.attributes["value"].value.strip
|
96
|
+
@available_langs << v if v != ""
|
97
|
+
end
|
98
|
+
@available_langs.sort!
|
99
|
+
@titles = {}
|
100
|
+
@titles["en"] = get_title("en")
|
101
|
+
@descriptions = {}
|
102
|
+
@descriptions["en"] = get_description("en")
|
103
|
+
@language_hash = list_langs
|
104
|
+
rescue => e
|
105
|
+
puts "The specified URL does not seem to contain a regular TED Talk contents"
|
106
|
+
exit
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def setup_lang(lang)
|
111
|
+
unless @available_langs.index lang
|
112
|
+
puts "Description in #{lang} is not available"
|
113
|
+
return false
|
114
|
+
end
|
115
|
+
@lang = lang
|
116
|
+
if lang != "en"
|
117
|
+
@titles[lang] = get_title(lang)
|
118
|
+
@descriptions[lang] = get_description(lang)
|
119
|
+
@lang_name = @language_hash[@lang]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def desc_talk(lang = "en")
|
124
|
+
setup_lang(lang)
|
125
|
+
unless @descriptions[lang]
|
126
|
+
lang = "en"
|
127
|
+
end
|
128
|
+
puts "\nTitle:\n" + @titles["en"]
|
129
|
+
puts @titles[lang] if lang != "en"
|
130
|
+
puts ""
|
131
|
+
puts "Description:\n" + @descriptions[lang]
|
132
|
+
puts ""
|
133
|
+
puts "Available Languages: "
|
134
|
+
@available_langs.each do |lang_code|
|
135
|
+
lang_name = @language_hash[lang_code]
|
136
|
+
puts " " + lang_name + ": " + lang_code
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def execute(outdir = "./", lang = "en", speed = 1, silence = 0)
|
141
|
+
puts "TedTalk is prepararing for the process"
|
142
|
+
@outdir = File.join(outdir, @ted_id + "-" + @basename)
|
143
|
+
Dir.mkdir(@outdir) unless File.exists?(@outdir)
|
144
|
+
|
145
|
+
@speed = speed
|
146
|
+
@silence = silence
|
147
|
+
@lang = lang
|
148
|
+
get_captions("en")
|
149
|
+
setup_lang(lang)
|
150
|
+
get_captions(lang)
|
151
|
+
video_filepath = get_binary(@video_url)
|
152
|
+
wav_filepath = get_wav(video_filepath)
|
153
|
+
outfile = @outdir + "/" + @basename + "-result.mp3"
|
154
|
+
speakslow = SpeakSlow::Converter.new(wav_filepath, outfile)
|
155
|
+
speakslow.execute(speed, silence)
|
156
|
+
write_info(outfile)
|
157
|
+
end
|
158
|
+
|
159
|
+
def get_title(lang)
|
160
|
+
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
|
161
|
+
html = get_html(lang_url)
|
162
|
+
lang_doc = Nokogiri::HTML(html)
|
163
|
+
lang_doc.xpath("//meta[@name='title']").first.attribute("content").value.split("|").first.strip rescue ""
|
164
|
+
end
|
165
|
+
|
166
|
+
def get_description(lang)
|
167
|
+
lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
|
168
|
+
html = get_html(lang_url)
|
169
|
+
lang_doc = Nokogiri::HTML(html)
|
170
|
+
temp = lang_doc.xpath("//meta[@name='description']").first.attribute("content").value.strip
|
171
|
+
/\ATED Talks\s*(.+)\z/ =~ temp
|
172
|
+
$1 rescue temp ""
|
173
|
+
end
|
174
|
+
|
175
|
+
def get_captions(lang = "en")
|
176
|
+
unless @available_langs.index(lang)
|
177
|
+
puts "Caption in #{lang} is not available"
|
178
|
+
return false
|
179
|
+
end
|
180
|
+
json_url = "http://www.ted.com/talks/subtitles/id/#{@ted_id}"
|
181
|
+
json_url << "/lang/#{lang}" unless lang == "en"
|
182
|
+
script_json = get_json(json_url)
|
183
|
+
num_total_captions = script_json["captions"].size
|
184
|
+
num_digits = num_total_captions.to_s.split(//).size
|
185
|
+
captions = [{:id => sprintf("%0#{num_digits}d", 0),
|
186
|
+
:start_time_s => "00.00.00",
|
187
|
+
:duration => nil,
|
188
|
+
:content => "",
|
189
|
+
:start_of_paragraph => false,
|
190
|
+
:start_time => 0
|
191
|
+
}]
|
192
|
+
script_json["captions"].each_with_index do |caption, index|
|
193
|
+
result = {}
|
194
|
+
result[:id] = sprintf("%0#{num_digits}d", index + 1)
|
195
|
+
result[:start_time] = INTRO_DURATION - AD_DURATION + POST_AD_DURATION + caption["startTime"].to_i
|
196
|
+
result[:start_time_s] = format_time(result[:start_time])
|
197
|
+
result[:duration] = caption["duration"].to_i
|
198
|
+
result[:content] = caption["content"].gsub(/\s+/, " ")
|
199
|
+
result[:end_time_s] = format_time(result[:start_time] + caption["duration"].to_i)
|
200
|
+
result[:start_of_paragraph] = caption["startOfParagraph"]
|
201
|
+
if index == 0
|
202
|
+
intro_duration =
|
203
|
+
captions[0][:duration] = result[:start_time]
|
204
|
+
end
|
205
|
+
captions << result
|
206
|
+
end
|
207
|
+
lang_sym = lang
|
208
|
+
File.open(@outdir + "/" + @basename + "-" + lang + ".txt", "w") do |f|
|
209
|
+
f.write format_captions(captions)
|
210
|
+
end
|
211
|
+
@captions[lang_sym] = captions
|
212
|
+
return captions
|
213
|
+
end
|
214
|
+
|
215
|
+
def list_langs
|
216
|
+
language_hash = {}
|
217
|
+
lang_url = "http://www.ted.com/translate/languages"
|
218
|
+
html = get_html(lang_url)
|
219
|
+
ted_doc = Nokogiri::HTML(html)
|
220
|
+
data = ted_doc.xpath("//div[@id='content'][1]//ul//a").each do |lang|
|
221
|
+
lang_name = lang.text
|
222
|
+
lang_code = lang.attribute("href").value.split("/")[-1].strip
|
223
|
+
language_hash[lang_code] = lang_name.sub(/\(.+?\)/){""}.strip
|
224
|
+
end
|
225
|
+
return language_hash
|
226
|
+
end
|
227
|
+
|
228
|
+
def write_info(filepath)
|
229
|
+
puts "Writing captions to MP3"
|
230
|
+
TagLib::MPEG::File.open(filepath) do |mp3|
|
231
|
+
tag = mp3.id3v2_tag
|
232
|
+
tag.artist = "TED Talk "
|
233
|
+
tag.title = @title
|
234
|
+
tag.title += " (with captions in #{@lang_name})" if @lang_name
|
235
|
+
tag.title += " [x#{@speed}]" if @speed and @speed != 1
|
236
|
+
tag.genre = "Talk"
|
237
|
+
|
238
|
+
caption_text = @titles["en"] + "\n"
|
239
|
+
caption_text << @titles[@lang] + "\n" if @titles[@lang]
|
240
|
+
caption_text << "--------------------\n"
|
241
|
+
caption_text << @descriptions["en"] + "\n"
|
242
|
+
caption_text << @descriptions[@lang] + "\n" if @descriptions[@lang]
|
243
|
+
caption_text << "\n"
|
244
|
+
@captions["en"].each_with_index do |c, index|
|
245
|
+
caption_text << "--------------------\n\n" if c[:start_of_paragraph]
|
246
|
+
next if c[:content] == ""
|
247
|
+
caption_text << c[:content] + "\n"
|
248
|
+
if @captions[@lang]
|
249
|
+
bl_content = @captions[@lang][index][:content] + "\n\n" rescue ""
|
250
|
+
caption_text << bl_content
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
uslt = TagLib::ID3v2::UnsynchronizedLyricsFrame.new
|
255
|
+
uslt.language = "eng"
|
256
|
+
uslt.text_encoding = TagLib::String::UTF8
|
257
|
+
uslt.text = caption_text
|
258
|
+
|
259
|
+
tag.add_frame(uslt)
|
260
|
+
mp3.save
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def format_captions(captions)
|
265
|
+
lang_name = @lang_name || "English"
|
266
|
+
result = "TED Talk ID: #{@ted_id}\n"
|
267
|
+
result << "Speaker: #{@speaker}\n"
|
268
|
+
result << "Title: #{@title} (with captions in #{lang_name})\n"
|
269
|
+
result << "URL: #{@url}\n\n"
|
270
|
+
num_digits = captions.size.to_s.split(//).size
|
271
|
+
captions.each_with_index do |c, index|
|
272
|
+
index_s = sprintf("%0#{num_digits}d", index + 1)
|
273
|
+
result << "\n" if c[:start_of_paragraph]
|
274
|
+
result << "#{index_s} #{c[:content]} \n"
|
275
|
+
# result << "#{index_s} #{c[:start_time_s]} #{c[:content]} \n"
|
276
|
+
end
|
277
|
+
return result
|
278
|
+
end
|
279
|
+
|
280
|
+
def format_time(time)
|
281
|
+
millis = time % 1000 / 10
|
282
|
+
millis_s = sprintf("%02d", millis)
|
283
|
+
total_seconds = time / 1000
|
284
|
+
minutes = total_seconds / 60
|
285
|
+
seconds = total_seconds - minutes * 60
|
286
|
+
seconds_s = sprintf("%02d", seconds)
|
287
|
+
minutes_s = sprintf("%02d", minutes)
|
288
|
+
minutes_s = sprintf("%02d", minutes)
|
289
|
+
minutes_s + "." + seconds_s + "." + millis_s
|
290
|
+
end
|
291
|
+
|
292
|
+
def get_video_urls(html)
|
293
|
+
videos = html.scan(/http\:\/\/download.ted.com\/talks\/#{@basename}.*?\.mp4/).sort
|
294
|
+
end
|
295
|
+
|
296
|
+
end # of class
|
297
|
+
end # of module
|
298
|
+
|
@@ -0,0 +1,120 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
require "rubygems"
|
5
|
+
require "progressbar"
|
6
|
+
|
7
|
+
$:.unshift File.expand_path(File.dirname(__FILE__))
|
8
|
+
require 'unix_tools'
|
9
|
+
|
10
|
+
module DownloadUtils
|
11
|
+
def get_html(url, without_cache = false)
|
12
|
+
url = get_final_location(url)
|
13
|
+
key = Digest::MD5.new.update(url).to_s
|
14
|
+
html = ""
|
15
|
+
if File.exists?(CACHE_DIR + "/" + key) and !without_cache
|
16
|
+
html = File.read(CACHE_DIR + "/" + key)
|
17
|
+
else
|
18
|
+
begin
|
19
|
+
uri = URI(url)
|
20
|
+
res = Net::HTTP.get_response(uri)
|
21
|
+
if res.is_a?(Net::HTTPSuccess)
|
22
|
+
html = res.body
|
23
|
+
else
|
24
|
+
puts "HTML download error"
|
25
|
+
exit
|
26
|
+
end
|
27
|
+
File.open(CACHE_DIR + "/" + key, "w") do |f|
|
28
|
+
f.write html
|
29
|
+
end
|
30
|
+
rescue => e
|
31
|
+
puts "Not able to download HTML"
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
return html
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_json(url, without_cache = false)
|
39
|
+
url = get_final_location(url)
|
40
|
+
key = Digest::MD5.new.update(url).to_s
|
41
|
+
script = nil
|
42
|
+
if File.exists?(CACHE_DIR + "/" + key) and !without_cache
|
43
|
+
json_text = File.read(CACHE_DIR + "/" + key)
|
44
|
+
script = JSON.parse(json_text)
|
45
|
+
else
|
46
|
+
begin
|
47
|
+
uri = URI(url)
|
48
|
+
res = Net::HTTP.get_response(uri)
|
49
|
+
json_text = res.body
|
50
|
+
script = JSON.parse(json_text)
|
51
|
+
File.open(CACHE_DIR + "/" + key, "w") do |f|
|
52
|
+
f.write JSON.pretty_generate script
|
53
|
+
end
|
54
|
+
rescue => e
|
55
|
+
puts "Not able to download HTML"
|
56
|
+
exit
|
57
|
+
end
|
58
|
+
end
|
59
|
+
return script
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_binary(url, without_cache = false)
|
63
|
+
url = get_final_location(url)
|
64
|
+
basename = File.basename(url)
|
65
|
+
filepath = CACHE_DIR + "/" + basename
|
66
|
+
return filepath if File.exists? filepath
|
67
|
+
file = File.new(filepath, "wb")
|
68
|
+
file_size = 0
|
69
|
+
uri = URI(url)
|
70
|
+
puts "Downloading file: " + basename
|
71
|
+
Net::HTTP.start(uri.host, uri.port) do |http|
|
72
|
+
http.request_get(uri.request_uri) do |res|
|
73
|
+
file_size = res.read_header["content-length"].to_i
|
74
|
+
bar = ProgressBar.new(basename, file_size)
|
75
|
+
bar.file_transfer_mode
|
76
|
+
res.read_body do |segment|
|
77
|
+
bar.inc(segment.size)
|
78
|
+
file.write(segment)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
file.close
|
83
|
+
print "\n"
|
84
|
+
download_successful?(filepath, file_size) ? filepath : false
|
85
|
+
end
|
86
|
+
|
87
|
+
def get_wav(video_filepath)
|
88
|
+
ffmpeg = UnixTools::check_command(FFMPEG)
|
89
|
+
basename = File.basename(video_filepath, ".*")
|
90
|
+
filepath = CACHE_DIR + "/" + basename + ".wav"
|
91
|
+
return filepath if File.exists? filepath
|
92
|
+
puts "Converting to audio: #{basename}.wav"
|
93
|
+
`#{ffmpeg} -loglevel panic -i #{video_filepath} -ac 1 -vn -acodec pcm_s16le -ar 44100 #{filepath}`
|
94
|
+
return filepath
|
95
|
+
end
|
96
|
+
|
97
|
+
def get_final_location(url)
|
98
|
+
begin
|
99
|
+
Net::HTTP.get_response(URI(url)) do |res|
|
100
|
+
location = res["location"]
|
101
|
+
return url if location.nil?
|
102
|
+
return get_final_location(location)
|
103
|
+
end
|
104
|
+
rescue => e
|
105
|
+
puts "Not able to reach at the final location"
|
106
|
+
return url
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def download_successful?(full_file_path, file_size)
|
111
|
+
File.exist?(full_file_path) && File.size(full_file_path) == file_size
|
112
|
+
end
|
113
|
+
|
114
|
+
module_function :get_html
|
115
|
+
module_function :get_json
|
116
|
+
module_function :get_binary
|
117
|
+
module_function :get_wav
|
118
|
+
module_function :get_final_location
|
119
|
+
module_function :download_successful?
|
120
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
$:.unshift File.expand_path(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
module UnixTools
|
7
|
+
|
8
|
+
def delete_dir(directory_path)
|
9
|
+
if FileTest.directory?(directory_path)
|
10
|
+
Dir.foreach(directory_path) do |file|
|
11
|
+
next if /^\.+$/ =~ file
|
12
|
+
delete_dir(directory_path.sub(/\/+$/,"") + "/" + file )
|
13
|
+
end
|
14
|
+
Dir.rmdir(directory_path) rescue ""
|
15
|
+
else
|
16
|
+
File.delete(directory_path) rescue ""
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def check_command(command)
|
21
|
+
basename = File.basename(command)
|
22
|
+
path = ""
|
23
|
+
print "Checking #{basename} command: "
|
24
|
+
if open("| which #{command} 2>/dev/null"){ |f| path = f.gets }
|
25
|
+
puts "detected at #{path}"
|
26
|
+
return path.strip
|
27
|
+
elsif open("| which #{basename} 2>/dev/null"){ |f| path = f.gets }
|
28
|
+
puts "detected at #{path}"
|
29
|
+
return path.strip
|
30
|
+
else
|
31
|
+
puts "not installed to the system"
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
module_function :check_command
|
37
|
+
module_function :delete_dir
|
38
|
+
end
|
data/ted_talk.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ted_talk/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "ted_talk"
|
8
|
+
gem.version = TedTalk::VERSION
|
9
|
+
gem.authors = ["Yoichiro Hasebe"]
|
10
|
+
gem.email = ["yohasebe@gmail.com"]
|
11
|
+
gem.description = "TedTalk helps download TED talk video "
|
12
|
+
gem.description += "and covert it to a slowed down MP3 with pauses that is useful for English learning"
|
13
|
+
gem.summary = "TED talk downloader and converter for English learners"
|
14
|
+
gem.homepage = "http://github.com/yohasebe/ted_talk"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split($/)
|
17
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
18
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
19
|
+
gem.require_paths = ["lib"]
|
20
|
+
gem.add_development_dependency "minitest"
|
21
|
+
gem.add_runtime_dependency "progressbar"
|
22
|
+
gem.add_runtime_dependency "json"
|
23
|
+
gem.add_runtime_dependency "taglib-ruby"
|
24
|
+
gem.add_runtime_dependency "nokogiri"
|
25
|
+
gem.add_runtime_dependency "speak_slow"
|
26
|
+
gem.add_runtime_dependency "trollop"
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'ted_talk'
|
3
|
+
|
4
|
+
class TestTedTalk < MiniTest::Unit::TestCase
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@source_url = "http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html"
|
8
|
+
@outdir = File.expand_path(File.dirname(__FILE__)) + "/temp"
|
9
|
+
# `rm -rf #{@outdir}` if File.exists? @outdir
|
10
|
+
`mkdir #{@outdir}` unless File.exists? @outdir
|
11
|
+
@tedtalk = TedTalk::Converter.new(@source_url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_description
|
15
|
+
@tedtalk.desc_talk("ja")
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_execution
|
19
|
+
speed = 0.8
|
20
|
+
silence = 3
|
21
|
+
language = "ja"
|
22
|
+
@tedtalk.execute(@outdir, language, speed, silence)
|
23
|
+
end
|
24
|
+
|
25
|
+
def teardown
|
26
|
+
# `rm -rf #{@outdir}`
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ted_talk
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yoichiro Hasebe
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-24 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: minitest
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: progressbar
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: json
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: taglib-ruby
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: nokogiri
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: speak_slow
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :runtime
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: trollop
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
description: TedTalk helps download TED talk video and covert it to a slowed down
|
127
|
+
MP3 with pauses that is useful for English learning
|
128
|
+
email:
|
129
|
+
- yohasebe@gmail.com
|
130
|
+
executables:
|
131
|
+
- ted_talk
|
132
|
+
extensions: []
|
133
|
+
extra_rdoc_files: []
|
134
|
+
files:
|
135
|
+
- .gitignore
|
136
|
+
- Gemfile
|
137
|
+
- LICENSE.txt
|
138
|
+
- README.md
|
139
|
+
- Rakefile
|
140
|
+
- bin/ted_talk
|
141
|
+
- lib/ted_talk.rb
|
142
|
+
- lib/ted_talk/download_utils.rb
|
143
|
+
- lib/ted_talk/unix_tools.rb
|
144
|
+
- lib/ted_talk/version.rb
|
145
|
+
- ted_talk.gemspec
|
146
|
+
- test/ted_talk_test.rb
|
147
|
+
homepage: http://github.com/yohasebe/ted_talk
|
148
|
+
licenses: []
|
149
|
+
post_install_message:
|
150
|
+
rdoc_options: []
|
151
|
+
require_paths:
|
152
|
+
- lib
|
153
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
154
|
+
none: false
|
155
|
+
requirements:
|
156
|
+
- - ! '>='
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0'
|
159
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
160
|
+
none: false
|
161
|
+
requirements:
|
162
|
+
- - ! '>='
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
requirements: []
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 1.8.24
|
168
|
+
signing_key:
|
169
|
+
specification_version: 3
|
170
|
+
summary: TED talk downloader and converter for English learners
|
171
|
+
test_files:
|
172
|
+
- test/ted_talk_test.rb
|