ted_talk 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/temp
17
+ test/version_tmp
18
+ tmp
19
+ cache/*
20
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ted_talk.gemspec
4
+ gemspec
5
+
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Yoichiro Hasebe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # TedTalk
2
+
3
+ ## Description
4
+
5
+ TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is useful for English learning
6
+
7
+ ## Installation
8
+
9
+ TedTalk requires [FFmpeg](http://ffmpeg.org/) and [SoX](http://sox.sourceforge.net/) with LAME support, as well as [TagLib](http://taglib.github.com/) audio meta-data library installed to the system
10
+
11
+ $ gem install ted_talk
12
+
13
+
14
+ ## Usage
15
+
16
+ Basic usage: tedtalk desc <option> - show TED Talk description(s)
17
+ tedtalk exec <option> - download and convert a TED Talk video
18
+ tedtalk delete - delete cache folder
19
+
20
+ For details about <option>, type:
21
+ tedtalk desc -h
22
+ or tedtalk exec -h
23
+
24
+ [global options]:
25
+ --version, -v: Print version and exit
26
+ --help, -h: Show this message
27
+
28
+ ### desc
29
+
30
+ ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS
31
+ feed or the URL of a specific talk
32
+
33
+ Usage: ted_talk desc <options>
34
+ where <options> are:
35
+
36
+ [desc options]:
37
+ --lang, -l <s>: Language of description (default: en)
38
+ --rss, -r: Show descriptions of the newest talks from TED Talk RSS
39
+ --url, -u <s>: URL of a specific TED Talk
40
+ --help, -h: Show this message
41
+
42
+ ### exec
43
+
44
+ ted_talk exec subcommand download TED Talk video and convert it to an MP3 file
45
+ that is modified in a specified fashion
46
+
47
+ Usage: ted_talk exec <options>
48
+ where <options> are:
49
+
50
+ [exec options]
51
+ --url, -u <s>: URL of a specific TED Talk
52
+ --lang, -l <s>: Language of (bilingual) transcripts (default: en)
53
+ --outdir, -o <s>: Directory for file output (default: ./)
54
+ --speed, -s <f>: Speed of output file [0.1 - 100] (default: 1.0)
55
+ --silence, -i <f>: Length (secondes) of a pause added to each utterance
56
+ [0.1 - 120] (default: 0.0)
57
+ --help, -h: Show this message
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs.push "lib"
6
+ t.test_files = FileList['test/*_test.rb']
7
+ t.verbose = true
8
+ end
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
5
+ require 'rubygems'
6
+ require 'trollop'
7
+ require 'ted_talk'
8
+
9
+ # http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html
10
+ # http://www.ted.com/talks/jonathan_haidt_on_the_moral_mind.html
11
+ # http://www.ted.com/talks/susan_cain_the_power_of_introverts.html
12
+ # http://www.ted.com/talks/amy_cuddy_your_body_language_shapes_who_you_are.html
13
+ # http://www.ted.com/talks/ken_robinson_says_schools_kill_creativity.html
14
+ # http://www.ted.com/talks/pranav_mistry_the_thrilling_potential_of_sixthsense_technology.html
15
+ # http://www.ted.com/talks/rives_reinventing_the_encyclopedia_game.html
16
+ # http://www.ted.com/talks/hans_rosling_and_the_magic_washing_machine.html
17
+ # http://www.ted.com/talks/israel_and_iran_a_love_story.html
18
+ # http://www.ted.com/talks/lemon_andersen_performs_please_don_t_take_my_air_jordans.html
19
+ # http://www.ted.com/talks/j_j_abrams_mystery_box.html
20
+ # http://www.ted.com/talks/robert_gupta_between_music_and_medicine.html
21
+
22
+
23
+ SUB_COMMANDS = ["desc", "exec"]
24
+ global_opts = Trollop::options do
25
+ version TedTalk::VERSION
26
+ banner <<-EOS
27
+ TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is helpful for English learning
28
+
29
+ Basic usage: tedtalk desc <option> - show TED Talk description(s)
30
+ tedtalk exec <option> - download and convert a TED Talk video
31
+ tedtalk delete - delete cache folder
32
+
33
+ For details about <option>, type:
34
+ tedtalk desc -h
35
+ or tedtalk exec -h
36
+
37
+ [global options]:
38
+ EOS
39
+
40
+ stop_on SUB_COMMANDS
41
+ end
42
+
43
+ cmd = ARGV.shift # get the subcommand
44
+ cmd_opts = case cmd
45
+
46
+ when "desc" # parse delete options
47
+ desc_opts = Trollop::options do
48
+ banner <<-EOS
49
+ ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS feed or the URL of a specific talk
50
+
51
+ Usage: ted_talk desc <options>
52
+ where <options> are:
53
+
54
+ [desc options]:
55
+ EOS
56
+
57
+ opt :lang, "Language of description", :default => "en", :type => :string
58
+ opt :rss, "Show descriptions of the newest talks from TED Talk RSS", :default => false
59
+ opt :url, "URL of a specific TED Talk", :type => :string
60
+ end
61
+
62
+ if desc_opts[:url] and /http\:\/\/www\.ted\.com\/talks\// !~ desc_opts[:url]
63
+ Trollop::die :url, "must include 'http://www.ted.com/talks/'"
64
+ end
65
+
66
+ lang = desc_opts[:lang]
67
+
68
+ if source_url = desc_opts[:url]
69
+ tedtalk = TedTalk::Converter.new(source_url)
70
+ tedtalk.desc_talk(lang)
71
+ elsif desc_opts[:rss]
72
+ TedTalk.desc_talks_rss(lang)
73
+ else
74
+ Trollop::die "invalid options"
75
+ end
76
+ when "exec" # parse exec options
77
+ exec_opts = Trollop::options do
78
+ banner <<-EOS
79
+ ted_talk exec subcommand download TED Talk video and convert it to an MP3 file that is modified in a specified fashion
80
+
81
+ Usage: ted_talk exec <options>
82
+ where <options> are:
83
+
84
+ [exec options]
85
+ EOS
86
+
87
+ opt :url, "URL of a specific TED Talk", :type => :string
88
+ opt :lang, "Language of (bilingual) transcripts", :default => "en", :type => :string
89
+ opt :outdir, "Directory for file output", :default=> "./"
90
+ opt :speed, "Speed of output file [0.1 - 100]", :default => 1.0
91
+ opt :silence, "Length (secondes) of a pause added to each utterance [0.1 - 120]", :default => 0.0
92
+ end
93
+
94
+ Trollop::die :outdir, "must be an existing directory" unless File::ftype(exec_opts[:outdir]) == "directory"
95
+ Trollop::die :speed, "must between 0.1 to 100" unless exec_opts[:speed] >= 0.1 and exec_opts[:speed] <= 100
96
+ Trollop::die :silence, "must be 0 to 120" unless exec_opts[:silence] >= 0 and exec_opts[:silence] <= 120
97
+
98
+ source_url = exec_opts[:url]
99
+ lang = exec_opts[:lang]
100
+ outdir = exec_opts[:outdir]
101
+ speed = exec_opts[:speed]
102
+ silence = exec_opts[:silence]
103
+
104
+ tedtalk = TedTalk::Converter.new(source_url)
105
+ tedtalk.execute(outdir, lang, speed, silence)
106
+ when "delete"
107
+ TedTalk.delete_cache
108
+ else
109
+ Trollop::die "unknown subcommand #{cmd.inspect}"
110
+ end
@@ -0,0 +1,298 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift "~/Dropbox/code/speak_slow/lib"
5
+
6
+ require 'speak_slow'
7
+ require 'json'
8
+ require 'net/http'
9
+ require 'digest/md5'
10
+
11
+ require 'rubygems'
12
+ require 'progressbar'
13
+ require 'taglib'
14
+ require 'nokogiri'
15
+
16
+ require 'ted_talk/version'
17
+ require 'ted_talk/download_utils'
18
+ require 'ted_talk/unix_tools'
19
+
20
+ FFMPEG = "/usr/local/bin/ffmpeg"
21
+
22
+ CACHE_DIR = File.expand_path(File.dirname(__FILE__)) + "/../cache"
23
+
24
+ INTRO_DURATION = 16500
25
+ AD_DURATION = 4000
26
+ POST_AD_DURATION = 2000
27
+
28
+ Dir.mkdir(CACHE_DIR) unless File.exists?(CACHE_DIR)
29
+
30
+ module TedTalk
31
+
32
+ def self.delete_cache
33
+ UnixTools.delete_dir(CACHE_DIR)
34
+ puts "Cache folder has been deleted"
35
+ return true
36
+ end
37
+
38
+ def self.desc_talks_rss(lang, num = 12)
39
+ if lang != "en"
40
+ html = DownloadUtils.get_html("http://www.ted.com/translate/languages/#{lang}", true)
41
+ html_doc = Nokogiri::HTML(html)
42
+ puts "--------------------------------------------------"
43
+ html_doc.xpath("//div[@id='list']//dd//a[1]").each do |link|
44
+ puts link.attribute("title")
45
+ puts link.attribute("href").text.sub(/\A\//, "http://www.ted.com/")
46
+ puts "--------------------------------------------------"
47
+ end
48
+ else
49
+ rss_html = DownloadUtils.get_html("http://feeds.feedburner.com/tedtalks_video", true)
50
+ rss_doc = Nokogiri::XML(rss_html)
51
+ talks = rss_doc.xpath("//item")
52
+ puts "--------------------------------------------------"
53
+ talks.each_with_index do |talk, index|
54
+ puts title = talk.xpath("title").text
55
+ puts pubdate = talk.xpath("pubDate").text
56
+ puts category = talk.xpath("category").text
57
+ # puts source_url = DownloadUtils.get_final_location(talk.xpath("link").text).sub(/\?.+\z/, "")
58
+ puts source_url = talk.xpath("feedburner:origLink").text
59
+ puts description = talk.xpath("description").text
60
+ puts "--------------------------------------------------"
61
+ break if index + 1 == num
62
+ end
63
+ end
64
+ end
65
+
66
+ class Converter
67
+ include DownloadUtils
68
+ include UnixTools
69
+
70
+ def initialize(url)
71
+ begin
72
+ if /(?:http\:\/\/)?(?:www\.)?ted\.com\/talks\/(?:lang\/[^\/]+\/)?(.+\.html)/ =~ url
73
+ @url = "http://www.ted.com/talks/" + $1
74
+ else
75
+ puts "The specified URL does not seem to be a valid one"
76
+ exit
77
+ end
78
+ if html = get_html(@url)
79
+ @html = html
80
+ else
81
+ puts "The specified URL does not respond with a TED Talk content"
82
+ exit
83
+ end
84
+ @url_basename = File.basename(@url)
85
+ ted_doc = Nokogiri::HTML(@html)
86
+ data = ted_doc.xpath("//div[@id='share_and_save']").first
87
+ @ted_id = data.attribute("data-id").value
88
+ @video_url = ted_doc.xpath("//a[@id='no-flash-video-download']").attribute("href").value
89
+ @basename = File.basename(@video_url, ".*")
90
+ @captions = {}
91
+ @title = ted_doc.xpath("//h1[1]").text.strip rescue ""
92
+ @speaker = @title.split(":", 2).first.strip rescue ""
93
+ @available_langs = []
94
+ ted_doc.xpath("//select[@id='languageCode'][1]/option").collect do |op|
95
+ v = op.attributes["value"].value.strip
96
+ @available_langs << v if v != ""
97
+ end
98
+ @available_langs.sort!
99
+ @titles = {}
100
+ @titles["en"] = get_title("en")
101
+ @descriptions = {}
102
+ @descriptions["en"] = get_description("en")
103
+ @language_hash = list_langs
104
+ rescue => e
105
+ puts "The specified URL does not seem to contain a regular TED Talk contents"
106
+ exit
107
+ end
108
+ end
109
+
110
+ def setup_lang(lang)
111
+ unless @available_langs.index lang
112
+ puts "Description in #{lang} is not available"
113
+ return false
114
+ end
115
+ @lang = lang
116
+ if lang != "en"
117
+ @titles[lang] = get_title(lang)
118
+ @descriptions[lang] = get_description(lang)
119
+ @lang_name = @language_hash[@lang]
120
+ end
121
+ end
122
+
123
+ def desc_talk(lang = "en")
124
+ setup_lang(lang)
125
+ unless @descriptions[lang]
126
+ lang = "en"
127
+ end
128
+ puts "\nTitle:\n" + @titles["en"]
129
+ puts @titles[lang] if lang != "en"
130
+ puts ""
131
+ puts "Description:\n" + @descriptions[lang]
132
+ puts ""
133
+ puts "Available Languages: "
134
+ @available_langs.each do |lang_code|
135
+ lang_name = @language_hash[lang_code]
136
+ puts " " + lang_name + ": " + lang_code
137
+ end
138
+ end
139
+
140
+ def execute(outdir = "./", lang = "en", speed = 1, silence = 0)
141
+ puts "TedTalk is prepararing for the process"
142
+ @outdir = File.join(outdir, @ted_id + "-" + @basename)
143
+ Dir.mkdir(@outdir) unless File.exists?(@outdir)
144
+
145
+ @speed = speed
146
+ @silence = silence
147
+ @lang = lang
148
+ get_captions("en")
149
+ setup_lang(lang)
150
+ get_captions(lang)
151
+ video_filepath = get_binary(@video_url)
152
+ wav_filepath = get_wav(video_filepath)
153
+ outfile = @outdir + "/" + @basename + "-result.mp3"
154
+ speakslow = SpeakSlow::Converter.new(wav_filepath, outfile)
155
+ speakslow.execute(speed, silence)
156
+ write_info(outfile)
157
+ end
158
+
159
+ def get_title(lang)
160
+ lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
161
+ html = get_html(lang_url)
162
+ lang_doc = Nokogiri::HTML(html)
163
+ lang_doc.xpath("//meta[@name='title']").first.attribute("content").value.split("|").first.strip rescue ""
164
+ end
165
+
166
+ def get_description(lang)
167
+ lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
168
+ html = get_html(lang_url)
169
+ lang_doc = Nokogiri::HTML(html)
170
+ temp = lang_doc.xpath("//meta[@name='description']").first.attribute("content").value.strip
171
+ /\ATED Talks\s*(.+)\z/ =~ temp
172
+ $1 rescue temp ""
173
+ end
174
+
175
+ def get_captions(lang = "en")
176
+ unless @available_langs.index(lang)
177
+ puts "Caption in #{lang} is not available"
178
+ return false
179
+ end
180
+ json_url = "http://www.ted.com/talks/subtitles/id/#{@ted_id}"
181
+ json_url << "/lang/#{lang}" unless lang == "en"
182
+ script_json = get_json(json_url)
183
+ num_total_captions = script_json["captions"].size
184
+ num_digits = num_total_captions.to_s.split(//).size
185
+ captions = [{:id => sprintf("%0#{num_digits}d", 0),
186
+ :start_time_s => "00.00.00",
187
+ :duration => nil,
188
+ :content => "",
189
+ :start_of_paragraph => false,
190
+ :start_time => 0
191
+ }]
192
+ script_json["captions"].each_with_index do |caption, index|
193
+ result = {}
194
+ result[:id] = sprintf("%0#{num_digits}d", index + 1)
195
+ result[:start_time] = INTRO_DURATION - AD_DURATION + POST_AD_DURATION + caption["startTime"].to_i
196
+ result[:start_time_s] = format_time(result[:start_time])
197
+ result[:duration] = caption["duration"].to_i
198
+ result[:content] = caption["content"].gsub(/\s+/, " ")
199
+ result[:end_time_s] = format_time(result[:start_time] + caption["duration"].to_i)
200
+ result[:start_of_paragraph] = caption["startOfParagraph"]
201
+ if index == 0
202
+ intro_duration =
203
+ captions[0][:duration] = result[:start_time]
204
+ end
205
+ captions << result
206
+ end
207
+ lang_sym = lang
208
+ File.open(@outdir + "/" + @basename + "-" + lang + ".txt", "w") do |f|
209
+ f.write format_captions(captions)
210
+ end
211
+ @captions[lang_sym] = captions
212
+ return captions
213
+ end
214
+
215
+ def list_langs
216
+ language_hash = {}
217
+ lang_url = "http://www.ted.com/translate/languages"
218
+ html = get_html(lang_url)
219
+ ted_doc = Nokogiri::HTML(html)
220
+ data = ted_doc.xpath("//div[@id='content'][1]//ul//a").each do |lang|
221
+ lang_name = lang.text
222
+ lang_code = lang.attribute("href").value.split("/")[-1].strip
223
+ language_hash[lang_code] = lang_name.sub(/\(.+?\)/){""}.strip
224
+ end
225
+ return language_hash
226
+ end
227
+
228
+ def write_info(filepath)
229
+ puts "Writing captions to MP3"
230
+ TagLib::MPEG::File.open(filepath) do |mp3|
231
+ tag = mp3.id3v2_tag
232
+ tag.artist = "TED Talk "
233
+ tag.title = @title
234
+ tag.title += " (with captions in #{@lang_name})" if @lang_name
235
+ tag.title += " [x#{@speed}]" if @speed and @speed != 1
236
+ tag.genre = "Talk"
237
+
238
+ caption_text = @titles["en"] + "\n"
239
+ caption_text << @titles[@lang] + "\n" if @titles[@lang]
240
+ caption_text << "--------------------\n"
241
+ caption_text << @descriptions["en"] + "\n"
242
+ caption_text << @descriptions[@lang] + "\n" if @descriptions[@lang]
243
+ caption_text << "\n"
244
+ @captions["en"].each_with_index do |c, index|
245
+ caption_text << "--------------------\n\n" if c[:start_of_paragraph]
246
+ next if c[:content] == ""
247
+ caption_text << c[:content] + "\n"
248
+ if @captions[@lang]
249
+ bl_content = @captions[@lang][index][:content] + "\n\n" rescue ""
250
+ caption_text << bl_content
251
+ end
252
+ end
253
+
254
+ uslt = TagLib::ID3v2::UnsynchronizedLyricsFrame.new
255
+ uslt.language = "eng"
256
+ uslt.text_encoding = TagLib::String::UTF8
257
+ uslt.text = caption_text
258
+
259
+ tag.add_frame(uslt)
260
+ mp3.save
261
+ end
262
+ end
263
+
264
+ def format_captions(captions)
265
+ lang_name = @lang_name || "English"
266
+ result = "TED Talk ID: #{@ted_id}\n"
267
+ result << "Speaker: #{@speaker}\n"
268
+ result << "Title: #{@title} (with captions in #{lang_name})\n"
269
+ result << "URL: #{@url}\n\n"
270
+ num_digits = captions.size.to_s.split(//).size
271
+ captions.each_with_index do |c, index|
272
+ index_s = sprintf("%0#{num_digits}d", index + 1)
273
+ result << "\n" if c[:start_of_paragraph]
274
+ result << "#{index_s} #{c[:content]} \n"
275
+ # result << "#{index_s} #{c[:start_time_s]} #{c[:content]} \n"
276
+ end
277
+ return result
278
+ end
279
+
280
+ def format_time(time)
281
+ millis = time % 1000 / 10
282
+ millis_s = sprintf("%02d", millis)
283
+ total_seconds = time / 1000
284
+ minutes = total_seconds / 60
285
+ seconds = total_seconds - minutes * 60
286
+ seconds_s = sprintf("%02d", seconds)
287
+ minutes_s = sprintf("%02d", minutes)
288
+ minutes_s = sprintf("%02d", minutes)
289
+ minutes_s + "." + seconds_s + "." + millis_s
290
+ end
291
+
292
+ def get_video_urls(html)
293
+ videos = html.scan(/http\:\/\/download.ted.com\/talks\/#{@basename}.*?\.mp4/).sort
294
+ end
295
+
296
+ end # of class
297
+ end # of module
298
+
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ require "rubygems"
5
+ require "progressbar"
6
+
7
+ $:.unshift File.expand_path(File.dirname(__FILE__))
8
+ require 'unix_tools'
9
+
10
+ module DownloadUtils
11
+ def get_html(url, without_cache = false)
12
+ url = get_final_location(url)
13
+ key = Digest::MD5.new.update(url).to_s
14
+ html = ""
15
+ if File.exists?(CACHE_DIR + "/" + key) and !without_cache
16
+ html = File.read(CACHE_DIR + "/" + key)
17
+ else
18
+ begin
19
+ uri = URI(url)
20
+ res = Net::HTTP.get_response(uri)
21
+ if res.is_a?(Net::HTTPSuccess)
22
+ html = res.body
23
+ else
24
+ puts "HTML download error"
25
+ exit
26
+ end
27
+ File.open(CACHE_DIR + "/" + key, "w") do |f|
28
+ f.write html
29
+ end
30
+ rescue => e
31
+ puts "Not able to download HTML"
32
+ exit
33
+ end
34
+ end
35
+ return html
36
+ end
37
+
38
+ def get_json(url, without_cache = false)
39
+ url = get_final_location(url)
40
+ key = Digest::MD5.new.update(url).to_s
41
+ script = nil
42
+ if File.exists?(CACHE_DIR + "/" + key) and !without_cache
43
+ json_text = File.read(CACHE_DIR + "/" + key)
44
+ script = JSON.parse(json_text)
45
+ else
46
+ begin
47
+ uri = URI(url)
48
+ res = Net::HTTP.get_response(uri)
49
+ json_text = res.body
50
+ script = JSON.parse(json_text)
51
+ File.open(CACHE_DIR + "/" + key, "w") do |f|
52
+ f.write JSON.pretty_generate script
53
+ end
54
+ rescue => e
55
+ puts "Not able to download HTML"
56
+ exit
57
+ end
58
+ end
59
+ return script
60
+ end
61
+
62
+ def get_binary(url, without_cache = false)
63
+ url = get_final_location(url)
64
+ basename = File.basename(url)
65
+ filepath = CACHE_DIR + "/" + basename
66
+ return filepath if File.exists? filepath
67
+ file = File.new(filepath, "wb")
68
+ file_size = 0
69
+ uri = URI(url)
70
+ puts "Downloading file: " + basename
71
+ Net::HTTP.start(uri.host, uri.port) do |http|
72
+ http.request_get(uri.request_uri) do |res|
73
+ file_size = res.read_header["content-length"].to_i
74
+ bar = ProgressBar.new(basename, file_size)
75
+ bar.file_transfer_mode
76
+ res.read_body do |segment|
77
+ bar.inc(segment.size)
78
+ file.write(segment)
79
+ end
80
+ end
81
+ end
82
+ file.close
83
+ print "\n"
84
+ download_successful?(filepath, file_size) ? filepath : false
85
+ end
86
+
87
+ def get_wav(video_filepath)
88
+ ffmpeg = UnixTools::check_command(FFMPEG)
89
+ basename = File.basename(video_filepath, ".*")
90
+ filepath = CACHE_DIR + "/" + basename + ".wav"
91
+ return filepath if File.exists? filepath
92
+ puts "Converting to audio: #{basename}.wav"
93
+ `#{ffmpeg} -loglevel panic -i #{video_filepath} -ac 1 -vn -acodec pcm_s16le -ar 44100 #{filepath}`
94
+ return filepath
95
+ end
96
+
97
+ def get_final_location(url)
98
+ begin
99
+ Net::HTTP.get_response(URI(url)) do |res|
100
+ location = res["location"]
101
+ return url if location.nil?
102
+ return get_final_location(location)
103
+ end
104
+ rescue => e
105
+ puts "Not able to reach at the final location"
106
+ return url
107
+ end
108
+ end
109
+
110
+ def download_successful?(full_file_path, file_size)
111
+ File.exist?(full_file_path) && File.size(full_file_path) == file_size
112
+ end
113
+
114
+ module_function :get_html
115
+ module_function :get_json
116
+ module_function :get_binary
117
+ module_function :get_wav
118
+ module_function :get_final_location
119
+ module_function :download_successful?
120
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift File.expand_path(File.dirname(__FILE__))
5
+
6
+ module UnixTools
7
+
8
+ def delete_dir(directory_path)
9
+ if FileTest.directory?(directory_path)
10
+ Dir.foreach(directory_path) do |file|
11
+ next if /^\.+$/ =~ file
12
+ delete_dir(directory_path.sub(/\/+$/,"") + "/" + file )
13
+ end
14
+ Dir.rmdir(directory_path) rescue ""
15
+ else
16
+ File.delete(directory_path) rescue ""
17
+ end
18
+ end
19
+
20
+ def check_command(command)
21
+ basename = File.basename(command)
22
+ path = ""
23
+ print "Checking #{basename} command: "
24
+ if open("| which #{command} 2>/dev/null"){ |f| path = f.gets }
25
+ puts "detected at #{path}"
26
+ return path.strip
27
+ elsif open("| which #{basename} 2>/dev/null"){ |f| path = f.gets }
28
+ puts "detected at #{path}"
29
+ return path.strip
30
+ else
31
+ puts "not installed to the system"
32
+ exit
33
+ end
34
+ end
35
+
36
+ module_function :check_command
37
+ module_function :delete_dir
38
+ end
@@ -0,0 +1,3 @@
1
+ module TedTalk
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ted_talk/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "ted_talk"
8
+ gem.version = TedTalk::VERSION
9
+ gem.authors = ["Yoichiro Hasebe"]
10
+ gem.email = ["yohasebe@gmail.com"]
11
+ gem.description = "TedTalk helps download TED talk video "
12
+ gem.description += "and covert it to a slowed down MP3 with pauses that is useful for English learning"
13
+ gem.summary = "TED talk downloader and converter for English learners"
14
+ gem.homepage = "http://github.com/yohasebe/ted_talk"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+ gem.add_development_dependency "minitest"
21
+ gem.add_runtime_dependency "progressbar"
22
+ gem.add_runtime_dependency "json"
23
+ gem.add_runtime_dependency "taglib-ruby"
24
+ gem.add_runtime_dependency "nokogiri"
25
+ gem.add_runtime_dependency "speak_slow"
26
+ gem.add_runtime_dependency "trollop"
27
+ end
@@ -0,0 +1,29 @@
1
+ require 'minitest/autorun'
2
+ require 'ted_talk'
3
+
4
+ class TestTedTalk < MiniTest::Unit::TestCase
5
+
6
+ def setup
7
+ @source_url = "http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html"
8
+ @outdir = File.expand_path(File.dirname(__FILE__)) + "/temp"
9
+ # `rm -rf #{@outdir}` if File.exists? @outdir
10
+ `mkdir #{@outdir}` unless File.exists? @outdir
11
+ @tedtalk = TedTalk::Converter.new(@source_url)
12
+ end
13
+
14
+ def test_description
15
+ @tedtalk.desc_talk("ja")
16
+ end
17
+
18
+ def test_execution
19
+ speed = 0.8
20
+ silence = 3
21
+ language = "ja"
22
+ @tedtalk.execute(@outdir, language, speed, silence)
23
+ end
24
+
25
+ def teardown
26
+ # `rm -rf #{@outdir}`
27
+ end
28
+
29
+ end
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ted_talk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yoichiro Hasebe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: minitest
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: progressbar
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: json
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: taglib-ruby
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: nokogiri
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: speak_slow
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: trollop
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: TedTalk helps download TED talk video and covert it to a slowed down
127
+ MP3 with pauses that is useful for English learning
128
+ email:
129
+ - yohasebe@gmail.com
130
+ executables:
131
+ - ted_talk
132
+ extensions: []
133
+ extra_rdoc_files: []
134
+ files:
135
+ - .gitignore
136
+ - Gemfile
137
+ - LICENSE.txt
138
+ - README.md
139
+ - Rakefile
140
+ - bin/ted_talk
141
+ - lib/ted_talk.rb
142
+ - lib/ted_talk/download_utils.rb
143
+ - lib/ted_talk/unix_tools.rb
144
+ - lib/ted_talk/version.rb
145
+ - ted_talk.gemspec
146
+ - test/ted_talk_test.rb
147
+ homepage: http://github.com/yohasebe/ted_talk
148
+ licenses: []
149
+ post_install_message:
150
+ rdoc_options: []
151
+ require_paths:
152
+ - lib
153
+ required_ruby_version: !ruby/object:Gem::Requirement
154
+ none: false
155
+ requirements:
156
+ - - ! '>='
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ! '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ requirements: []
166
+ rubyforge_project:
167
+ rubygems_version: 1.8.24
168
+ signing_key:
169
+ specification_version: 3
170
+ summary: TED talk downloader and converter for English learners
171
+ test_files:
172
+ - test/ted_talk_test.rb