ted_talk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/temp
17
+ test/version_tmp
18
+ tmp
19
+ cache/*
20
+ .DS_Store
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ted_talk.gemspec
4
+ gemspec
5
+
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Yoichiro Hasebe
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # TedTalk
2
+
3
+ ## Description
4
+
5
+ TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is useful for English learning
6
+
7
+ ## Installation
8
+
9
+ TedTalk requires [FFmpeg](http://ffmpeg.org/) and [SoX](http://sox.sourceforge.net/) with LAME support, as well as [TagLib](http://taglib.github.com/) audio meta-data library installed to the system
10
+
11
+ $ gem install ted_talk
12
+
13
+
14
+ ## Usage
15
+
16
+ Basic usage: tedtalk desc <option> - show TED Talk description(s)
17
+ tedtalk exec <option> - download and convert a TED Talk video
18
+ tedtalk delete - delete cache folder
19
+
20
+ For details about <option>, type:
21
+ tedtalk desc -h
22
+ or tedtalk exec -h
23
+
24
+ [global options]:
25
+ --version, -v: Print version and exit
26
+ --help, -h: Show this message
27
+
28
+ ### desc
29
+
30
+ ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS
31
+ feed or the URL of a specific talk
32
+
33
+ Usage: ted_talk desc <options>
34
+ where <options> are:
35
+
36
+ [desc options]:
37
+ --lang, -l <s>: Language of description (default: en)
38
+ --rss, -r: Show descriptions of the newest talks from TED Talk RSS
39
+ --url, -u <s>: URL of a specific TED Talk
40
+ --help, -h: Show this message
41
+
42
+ ### exec
43
+
44
+ ted_talk exec subcommand download TED Talk video and convert it to an MP3 file
45
+ that is modified in a specified fashion
46
+
47
+ Usage: ted_talk exec <options>
48
+ where <options> are:
49
+
50
+ [exec options]
51
+ --url, -u <s>: URL of a specific TED Talk
52
+ --lang, -l <s>: Language of (bilingual) transcripts (default: en)
53
+ --outdir, -o <s>: Directory for file output (default: ./)
54
+ --speed, -s <f>: Speed of output file [0.1 - 100] (default: 1.0)
55
+ --silence, -i <f>: Length (secondes) of a pause added to each utterance
56
+ [0.1 - 120] (default: 0.0)
57
+ --help, -h: Show this message
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new do |t|
5
+ t.libs.push "lib"
6
+ t.test_files = FileList['test/*_test.rb']
7
+ t.verbose = true
8
+ end
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
5
+ require 'rubygems'
6
+ require 'trollop'
7
+ require 'ted_talk'
8
+
9
+ # http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html
10
+ # http://www.ted.com/talks/jonathan_haidt_on_the_moral_mind.html
11
+ # http://www.ted.com/talks/susan_cain_the_power_of_introverts.html
12
+ # http://www.ted.com/talks/amy_cuddy_your_body_language_shapes_who_you_are.html
13
+ # http://www.ted.com/talks/ken_robinson_says_schools_kill_creativity.html
14
+ # http://www.ted.com/talks/pranav_mistry_the_thrilling_potential_of_sixthsense_technology.html
15
+ # http://www.ted.com/talks/rives_reinventing_the_encyclopedia_game.html
16
+ # http://www.ted.com/talks/hans_rosling_and_the_magic_washing_machine.html
17
+ # http://www.ted.com/talks/israel_and_iran_a_love_story.html
18
+ # http://www.ted.com/talks/lemon_andersen_performs_please_don_t_take_my_air_jordans.html
19
+ # http://www.ted.com/talks/j_j_abrams_mystery_box.html
20
+ # http://www.ted.com/talks/robert_gupta_between_music_and_medicine.html
21
+
22
+
23
+ SUB_COMMANDS = ["desc", "exec"]
24
+ global_opts = Trollop::options do
25
+ version TedTalk::VERSION
26
+ banner <<-EOS
27
+ TedTalk helps download TED talk video and covert it to a slowed down MP3 with pauses that is helpful for English learning
28
+
29
+ Basic usage: tedtalk desc <option> - show TED Talk description(s)
30
+ tedtalk exec <option> - download and convert a TED Talk video
31
+ tedtalk delete - delete cache folder
32
+
33
+ For details about <option>, type:
34
+ tedtalk desc -h
35
+ or tedtalk exec -h
36
+
37
+ [global options]:
38
+ EOS
39
+
40
+ stop_on SUB_COMMANDS
41
+ end
42
+
43
+ cmd = ARGV.shift # get the subcommand
44
+ cmd_opts = case cmd
45
+
46
+ when "desc" # parse delete options
47
+ desc_opts = Trollop::options do
48
+ banner <<-EOS
49
+ ted_talk desc subcommand shows TED Talk descriptions in the newest official RSS feed or the URL of a specific talk
50
+
51
+ Usage: ted_talk desc <options>
52
+ where <options> are:
53
+
54
+ [desc options]:
55
+ EOS
56
+
57
+ opt :lang, "Language of description", :default => "en", :type => :string
58
+ opt :rss, "Show descriptions of the newest talks from TED Talk RSS", :default => false
59
+ opt :url, "URL of a specific TED Talk", :type => :string
60
+ end
61
+
62
+ if desc_opts[:url] and /http\:\/\/www\.ted\.com\/talks\// !~ desc_opts[:url]
63
+ Trollop::die :url, "must include 'http://www.ted.com/talks/'"
64
+ end
65
+
66
+ lang = desc_opts[:lang]
67
+
68
+ if source_url = desc_opts[:url]
69
+ tedtalk = TedTalk::Converter.new(source_url)
70
+ tedtalk.desc_talk(lang)
71
+ elsif desc_opts[:rss]
72
+ TedTalk.desc_talks_rss(lang)
73
+ else
74
+ Trollop::die "invalid options"
75
+ end
76
+ when "exec" # parse exec options
77
+ exec_opts = Trollop::options do
78
+ banner <<-EOS
79
+ ted_talk exec subcommand download TED Talk video and convert it to an MP3 file that is modified in a specified fashion
80
+
81
+ Usage: ted_talk exec <options>
82
+ where <options> are:
83
+
84
+ [exec options]
85
+ EOS
86
+
87
+ opt :url, "URL of a specific TED Talk", :type => :string
88
+ opt :lang, "Language of (bilingual) transcripts", :default => "en", :type => :string
89
+ opt :outdir, "Directory for file output", :default=> "./"
90
+ opt :speed, "Speed of output file [0.1 - 100]", :default => 1.0
91
+ opt :silence, "Length (secondes) of a pause added to each utterance [0.1 - 120]", :default => 0.0
92
+ end
93
+
94
+ Trollop::die :outdir, "must be an existing directory" unless File::ftype(exec_opts[:outdir]) == "directory"
95
+ Trollop::die :speed, "must between 0.1 to 100" unless exec_opts[:speed] >= 0.1 and exec_opts[:speed] <= 100
96
+ Trollop::die :silence, "must be 0 to 120" unless exec_opts[:silence] >= 0 and exec_opts[:silence] <= 120
97
+
98
+ source_url = exec_opts[:url]
99
+ lang = exec_opts[:lang]
100
+ outdir = exec_opts[:outdir]
101
+ speed = exec_opts[:speed]
102
+ silence = exec_opts[:silence]
103
+
104
+ tedtalk = TedTalk::Converter.new(source_url)
105
+ tedtalk.execute(outdir, lang, speed, silence)
106
+ when "delete"
107
+ TedTalk.delete_cache
108
+ else
109
+ Trollop::die "unknown subcommand #{cmd.inspect}"
110
+ end
@@ -0,0 +1,298 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift "~/Dropbox/code/speak_slow/lib"
5
+
6
+ require 'speak_slow'
7
+ require 'json'
8
+ require 'net/http'
9
+ require 'digest/md5'
10
+
11
+ require 'rubygems'
12
+ require 'progressbar'
13
+ require 'taglib'
14
+ require 'nokogiri'
15
+
16
+ require 'ted_talk/version'
17
+ require 'ted_talk/download_utils'
18
+ require 'ted_talk/unix_tools'
19
+
20
+ FFMPEG = "/usr/local/bin/ffmpeg"
21
+
22
+ CACHE_DIR = File.expand_path(File.dirname(__FILE__)) + "/../cache"
23
+
24
+ INTRO_DURATION = 16500
25
+ AD_DURATION = 4000
26
+ POST_AD_DURATION = 2000
27
+
28
+ Dir.mkdir(CACHE_DIR) unless File.exists?(CACHE_DIR)
29
+
30
+ module TedTalk
31
+
32
+ def self.delete_cache
33
+ UnixTools.delete_dir(CACHE_DIR)
34
+ puts "Cache folder has been deleted"
35
+ return true
36
+ end
37
+
38
+ def self.desc_talks_rss(lang, num = 12)
39
+ if lang != "en"
40
+ html = DownloadUtils.get_html("http://www.ted.com/translate/languages/#{lang}", true)
41
+ html_doc = Nokogiri::HTML(html)
42
+ puts "--------------------------------------------------"
43
+ html_doc.xpath("//div[@id='list']//dd//a[1]").each do |link|
44
+ puts link.attribute("title")
45
+ puts link.attribute("href").text.sub(/\A\//, "http://www.ted.com/")
46
+ puts "--------------------------------------------------"
47
+ end
48
+ else
49
+ rss_html = DownloadUtils.get_html("http://feeds.feedburner.com/tedtalks_video", true)
50
+ rss_doc = Nokogiri::XML(rss_html)
51
+ talks = rss_doc.xpath("//item")
52
+ puts "--------------------------------------------------"
53
+ talks.each_with_index do |talk, index|
54
+ puts title = talk.xpath("title").text
55
+ puts pubdate = talk.xpath("pubDate").text
56
+ puts category = talk.xpath("category").text
57
+ # puts source_url = DownloadUtils.get_final_location(talk.xpath("link").text).sub(/\?.+\z/, "")
58
+ puts source_url = talk.xpath("feedburner:origLink").text
59
+ puts description = talk.xpath("description").text
60
+ puts "--------------------------------------------------"
61
+ break if index + 1 == num
62
+ end
63
+ end
64
+ end
65
+
66
+ class Converter
67
+ include DownloadUtils
68
+ include UnixTools
69
+
70
+ def initialize(url)
71
+ begin
72
+ if /(?:http\:\/\/)?(?:www\.)?ted\.com\/talks\/(?:lang\/[^\/]+\/)?(.+\.html)/ =~ url
73
+ @url = "http://www.ted.com/talks/" + $1
74
+ else
75
+ puts "The specified URL does not seem to be a valid one"
76
+ exit
77
+ end
78
+ if html = get_html(@url)
79
+ @html = html
80
+ else
81
+ puts "The specified URL does not respond with a TED Talk content"
82
+ exit
83
+ end
84
+ @url_basename = File.basename(@url)
85
+ ted_doc = Nokogiri::HTML(@html)
86
+ data = ted_doc.xpath("//div[@id='share_and_save']").first
87
+ @ted_id = data.attribute("data-id").value
88
+ @video_url = ted_doc.xpath("//a[@id='no-flash-video-download']").attribute("href").value
89
+ @basename = File.basename(@video_url, ".*")
90
+ @captions = {}
91
+ @title = ted_doc.xpath("//h1[1]").text.strip rescue ""
92
+ @speaker = @title.split(":", 2).first.strip rescue ""
93
+ @available_langs = []
94
+ ted_doc.xpath("//select[@id='languageCode'][1]/option").collect do |op|
95
+ v = op.attributes["value"].value.strip
96
+ @available_langs << v if v != ""
97
+ end
98
+ @available_langs.sort!
99
+ @titles = {}
100
+ @titles["en"] = get_title("en")
101
+ @descriptions = {}
102
+ @descriptions["en"] = get_description("en")
103
+ @language_hash = list_langs
104
+ rescue => e
105
+ puts "The specified URL does not seem to contain a regular TED Talk contents"
106
+ exit
107
+ end
108
+ end
109
+
110
+ def setup_lang(lang)
111
+ unless @available_langs.index lang
112
+ puts "Description in #{lang} is not available"
113
+ return false
114
+ end
115
+ @lang = lang
116
+ if lang != "en"
117
+ @titles[lang] = get_title(lang)
118
+ @descriptions[lang] = get_description(lang)
119
+ @lang_name = @language_hash[@lang]
120
+ end
121
+ end
122
+
123
+ def desc_talk(lang = "en")
124
+ setup_lang(lang)
125
+ unless @descriptions[lang]
126
+ lang = "en"
127
+ end
128
+ puts "\nTitle:\n" + @titles["en"]
129
+ puts @titles[lang] if lang != "en"
130
+ puts ""
131
+ puts "Description:\n" + @descriptions[lang]
132
+ puts ""
133
+ puts "Available Languages: "
134
+ @available_langs.each do |lang_code|
135
+ lang_name = @language_hash[lang_code]
136
+ puts " " + lang_name + ": " + lang_code
137
+ end
138
+ end
139
+
140
+ def execute(outdir = "./", lang = "en", speed = 1, silence = 0)
141
+ puts "TedTalk is prepararing for the process"
142
+ @outdir = File.join(outdir, @ted_id + "-" + @basename)
143
+ Dir.mkdir(@outdir) unless File.exists?(@outdir)
144
+
145
+ @speed = speed
146
+ @silence = silence
147
+ @lang = lang
148
+ get_captions("en")
149
+ setup_lang(lang)
150
+ get_captions(lang)
151
+ video_filepath = get_binary(@video_url)
152
+ wav_filepath = get_wav(video_filepath)
153
+ outfile = @outdir + "/" + @basename + "-result.mp3"
154
+ speakslow = SpeakSlow::Converter.new(wav_filepath, outfile)
155
+ speakslow.execute(speed, silence)
156
+ write_info(outfile)
157
+ end
158
+
159
+ def get_title(lang)
160
+ lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
161
+ html = get_html(lang_url)
162
+ lang_doc = Nokogiri::HTML(html)
163
+ lang_doc.xpath("//meta[@name='title']").first.attribute("content").value.split("|").first.strip rescue ""
164
+ end
165
+
166
+ def get_description(lang)
167
+ lang_url = "http://www.ted.com/talks/lang/#{lang}/" + @url_basename
168
+ html = get_html(lang_url)
169
+ lang_doc = Nokogiri::HTML(html)
170
+ temp = lang_doc.xpath("//meta[@name='description']").first.attribute("content").value.strip
171
+ /\ATED Talks\s*(.+)\z/ =~ temp
172
+ $1 rescue temp ""
173
+ end
174
+
175
+ def get_captions(lang = "en")
176
+ unless @available_langs.index(lang)
177
+ puts "Caption in #{lang} is not available"
178
+ return false
179
+ end
180
+ json_url = "http://www.ted.com/talks/subtitles/id/#{@ted_id}"
181
+ json_url << "/lang/#{lang}" unless lang == "en"
182
+ script_json = get_json(json_url)
183
+ num_total_captions = script_json["captions"].size
184
+ num_digits = num_total_captions.to_s.split(//).size
185
+ captions = [{:id => sprintf("%0#{num_digits}d", 0),
186
+ :start_time_s => "00.00.00",
187
+ :duration => nil,
188
+ :content => "",
189
+ :start_of_paragraph => false,
190
+ :start_time => 0
191
+ }]
192
+ script_json["captions"].each_with_index do |caption, index|
193
+ result = {}
194
+ result[:id] = sprintf("%0#{num_digits}d", index + 1)
195
+ result[:start_time] = INTRO_DURATION - AD_DURATION + POST_AD_DURATION + caption["startTime"].to_i
196
+ result[:start_time_s] = format_time(result[:start_time])
197
+ result[:duration] = caption["duration"].to_i
198
+ result[:content] = caption["content"].gsub(/\s+/, " ")
199
+ result[:end_time_s] = format_time(result[:start_time] + caption["duration"].to_i)
200
+ result[:start_of_paragraph] = caption["startOfParagraph"]
201
+ if index == 0
202
+ intro_duration =
203
+ captions[0][:duration] = result[:start_time]
204
+ end
205
+ captions << result
206
+ end
207
+ lang_sym = lang
208
+ File.open(@outdir + "/" + @basename + "-" + lang + ".txt", "w") do |f|
209
+ f.write format_captions(captions)
210
+ end
211
+ @captions[lang_sym] = captions
212
+ return captions
213
+ end
214
+
215
+ def list_langs
216
+ language_hash = {}
217
+ lang_url = "http://www.ted.com/translate/languages"
218
+ html = get_html(lang_url)
219
+ ted_doc = Nokogiri::HTML(html)
220
+ data = ted_doc.xpath("//div[@id='content'][1]//ul//a").each do |lang|
221
+ lang_name = lang.text
222
+ lang_code = lang.attribute("href").value.split("/")[-1].strip
223
+ language_hash[lang_code] = lang_name.sub(/\(.+?\)/){""}.strip
224
+ end
225
+ return language_hash
226
+ end
227
+
228
+ def write_info(filepath)
229
+ puts "Writing captions to MP3"
230
+ TagLib::MPEG::File.open(filepath) do |mp3|
231
+ tag = mp3.id3v2_tag
232
+ tag.artist = "TED Talk "
233
+ tag.title = @title
234
+ tag.title += " (with captions in #{@lang_name})" if @lang_name
235
+ tag.title += " [x#{@speed}]" if @speed and @speed != 1
236
+ tag.genre = "Talk"
237
+
238
+ caption_text = @titles["en"] + "\n"
239
+ caption_text << @titles[@lang] + "\n" if @titles[@lang]
240
+ caption_text << "--------------------\n"
241
+ caption_text << @descriptions["en"] + "\n"
242
+ caption_text << @descriptions[@lang] + "\n" if @descriptions[@lang]
243
+ caption_text << "\n"
244
+ @captions["en"].each_with_index do |c, index|
245
+ caption_text << "--------------------\n\n" if c[:start_of_paragraph]
246
+ next if c[:content] == ""
247
+ caption_text << c[:content] + "\n"
248
+ if @captions[@lang]
249
+ bl_content = @captions[@lang][index][:content] + "\n\n" rescue ""
250
+ caption_text << bl_content
251
+ end
252
+ end
253
+
254
+ uslt = TagLib::ID3v2::UnsynchronizedLyricsFrame.new
255
+ uslt.language = "eng"
256
+ uslt.text_encoding = TagLib::String::UTF8
257
+ uslt.text = caption_text
258
+
259
+ tag.add_frame(uslt)
260
+ mp3.save
261
+ end
262
+ end
263
+
264
+ def format_captions(captions)
265
+ lang_name = @lang_name || "English"
266
+ result = "TED Talk ID: #{@ted_id}\n"
267
+ result << "Speaker: #{@speaker}\n"
268
+ result << "Title: #{@title} (with captions in #{lang_name})\n"
269
+ result << "URL: #{@url}\n\n"
270
+ num_digits = captions.size.to_s.split(//).size
271
+ captions.each_with_index do |c, index|
272
+ index_s = sprintf("%0#{num_digits}d", index + 1)
273
+ result << "\n" if c[:start_of_paragraph]
274
+ result << "#{index_s} #{c[:content]} \n"
275
+ # result << "#{index_s} #{c[:start_time_s]} #{c[:content]} \n"
276
+ end
277
+ return result
278
+ end
279
+
280
+ def format_time(time)
281
+ millis = time % 1000 / 10
282
+ millis_s = sprintf("%02d", millis)
283
+ total_seconds = time / 1000
284
+ minutes = total_seconds / 60
285
+ seconds = total_seconds - minutes * 60
286
+ seconds_s = sprintf("%02d", seconds)
287
+ minutes_s = sprintf("%02d", minutes)
288
+ minutes_s = sprintf("%02d", minutes)
289
+ minutes_s + "." + seconds_s + "." + millis_s
290
+ end
291
+
292
+ def get_video_urls(html)
293
+ videos = html.scan(/http\:\/\/download.ted.com\/talks\/#{@basename}.*?\.mp4/).sort
294
+ end
295
+
296
+ end # of class
297
+ end # of module
298
+
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ require "rubygems"
5
+ require "progressbar"
6
+
7
+ $:.unshift File.expand_path(File.dirname(__FILE__))
8
+ require 'unix_tools'
9
+
10
+ module DownloadUtils
11
+ def get_html(url, without_cache = false)
12
+ url = get_final_location(url)
13
+ key = Digest::MD5.new.update(url).to_s
14
+ html = ""
15
+ if File.exists?(CACHE_DIR + "/" + key) and !without_cache
16
+ html = File.read(CACHE_DIR + "/" + key)
17
+ else
18
+ begin
19
+ uri = URI(url)
20
+ res = Net::HTTP.get_response(uri)
21
+ if res.is_a?(Net::HTTPSuccess)
22
+ html = res.body
23
+ else
24
+ puts "HTML download error"
25
+ exit
26
+ end
27
+ File.open(CACHE_DIR + "/" + key, "w") do |f|
28
+ f.write html
29
+ end
30
+ rescue => e
31
+ puts "Not able to download HTML"
32
+ exit
33
+ end
34
+ end
35
+ return html
36
+ end
37
+
38
+ def get_json(url, without_cache = false)
39
+ url = get_final_location(url)
40
+ key = Digest::MD5.new.update(url).to_s
41
+ script = nil
42
+ if File.exists?(CACHE_DIR + "/" + key) and !without_cache
43
+ json_text = File.read(CACHE_DIR + "/" + key)
44
+ script = JSON.parse(json_text)
45
+ else
46
+ begin
47
+ uri = URI(url)
48
+ res = Net::HTTP.get_response(uri)
49
+ json_text = res.body
50
+ script = JSON.parse(json_text)
51
+ File.open(CACHE_DIR + "/" + key, "w") do |f|
52
+ f.write JSON.pretty_generate script
53
+ end
54
+ rescue => e
55
+ puts "Not able to download HTML"
56
+ exit
57
+ end
58
+ end
59
+ return script
60
+ end
61
+
62
+ def get_binary(url, without_cache = false)
63
+ url = get_final_location(url)
64
+ basename = File.basename(url)
65
+ filepath = CACHE_DIR + "/" + basename
66
+ return filepath if File.exists? filepath
67
+ file = File.new(filepath, "wb")
68
+ file_size = 0
69
+ uri = URI(url)
70
+ puts "Downloading file: " + basename
71
+ Net::HTTP.start(uri.host, uri.port) do |http|
72
+ http.request_get(uri.request_uri) do |res|
73
+ file_size = res.read_header["content-length"].to_i
74
+ bar = ProgressBar.new(basename, file_size)
75
+ bar.file_transfer_mode
76
+ res.read_body do |segment|
77
+ bar.inc(segment.size)
78
+ file.write(segment)
79
+ end
80
+ end
81
+ end
82
+ file.close
83
+ print "\n"
84
+ download_successful?(filepath, file_size) ? filepath : false
85
+ end
86
+
87
+ def get_wav(video_filepath)
88
+ ffmpeg = UnixTools::check_command(FFMPEG)
89
+ basename = File.basename(video_filepath, ".*")
90
+ filepath = CACHE_DIR + "/" + basename + ".wav"
91
+ return filepath if File.exists? filepath
92
+ puts "Converting to audio: #{basename}.wav"
93
+ `#{ffmpeg} -loglevel panic -i #{video_filepath} -ac 1 -vn -acodec pcm_s16le -ar 44100 #{filepath}`
94
+ return filepath
95
+ end
96
+
97
+ def get_final_location(url)
98
+ begin
99
+ Net::HTTP.get_response(URI(url)) do |res|
100
+ location = res["location"]
101
+ return url if location.nil?
102
+ return get_final_location(location)
103
+ end
104
+ rescue => e
105
+ puts "Not able to reach at the final location"
106
+ return url
107
+ end
108
+ end
109
+
110
+ def download_successful?(full_file_path, file_size)
111
+ File.exist?(full_file_path) && File.size(full_file_path) == file_size
112
+ end
113
+
114
+ module_function :get_html
115
+ module_function :get_json
116
+ module_function :get_binary
117
+ module_function :get_wav
118
+ module_function :get_final_location
119
+ module_function :download_successful?
120
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+
4
+ $:.unshift File.expand_path(File.dirname(__FILE__))
5
+
6
+ module UnixTools
7
+
8
+ def delete_dir(directory_path)
9
+ if FileTest.directory?(directory_path)
10
+ Dir.foreach(directory_path) do |file|
11
+ next if /^\.+$/ =~ file
12
+ delete_dir(directory_path.sub(/\/+$/,"") + "/" + file )
13
+ end
14
+ Dir.rmdir(directory_path) rescue ""
15
+ else
16
+ File.delete(directory_path) rescue ""
17
+ end
18
+ end
19
+
20
+ def check_command(command)
21
+ basename = File.basename(command)
22
+ path = ""
23
+ print "Checking #{basename} command: "
24
+ if open("| which #{command} 2>/dev/null"){ |f| path = f.gets }
25
+ puts "detected at #{path}"
26
+ return path.strip
27
+ elsif open("| which #{basename} 2>/dev/null"){ |f| path = f.gets }
28
+ puts "detected at #{path}"
29
+ return path.strip
30
+ else
31
+ puts "not installed to the system"
32
+ exit
33
+ end
34
+ end
35
+
36
+ module_function :check_command
37
+ module_function :delete_dir
38
+ end
@@ -0,0 +1,3 @@
1
+ module TedTalk
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ted_talk/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "ted_talk"
8
+ gem.version = TedTalk::VERSION
9
+ gem.authors = ["Yoichiro Hasebe"]
10
+ gem.email = ["yohasebe@gmail.com"]
11
+ gem.description = "TedTalk helps download TED talk video "
12
+ gem.description += "and covert it to a slowed down MP3 with pauses that is useful for English learning"
13
+ gem.summary = "TED talk downloader and converter for English learners"
14
+ gem.homepage = "http://github.com/yohasebe/ted_talk"
15
+
16
+ gem.files = `git ls-files`.split($/)
17
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
19
+ gem.require_paths = ["lib"]
20
+ gem.add_development_dependency "minitest"
21
+ gem.add_runtime_dependency "progressbar"
22
+ gem.add_runtime_dependency "json"
23
+ gem.add_runtime_dependency "taglib-ruby"
24
+ gem.add_runtime_dependency "nokogiri"
25
+ gem.add_runtime_dependency "speak_slow"
26
+ gem.add_runtime_dependency "trollop"
27
+ end
@@ -0,0 +1,29 @@
1
+ require 'minitest/autorun'
2
+ require 'ted_talk'
3
+
4
+ class TestTedTalk < MiniTest::Unit::TestCase
5
+
6
+ def setup
7
+ @source_url = "http://www.ted.com/talks/steven_addis_a_father_daughter_bond_one_photo_at_a_time.html"
8
+ @outdir = File.expand_path(File.dirname(__FILE__)) + "/temp"
9
+ # `rm -rf #{@outdir}` if File.exists? @outdir
10
+ `mkdir #{@outdir}` unless File.exists? @outdir
11
+ @tedtalk = TedTalk::Converter.new(@source_url)
12
+ end
13
+
14
+ def test_description
15
+ @tedtalk.desc_talk("ja")
16
+ end
17
+
18
+ def test_execution
19
+ speed = 0.8
20
+ silence = 3
21
+ language = "ja"
22
+ @tedtalk.execute(@outdir, language, speed, silence)
23
+ end
24
+
25
+ def teardown
26
+ # `rm -rf #{@outdir}`
27
+ end
28
+
29
+ end
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ted_talk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yoichiro Hasebe
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: minitest
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: progressbar
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: json
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: taglib-ruby
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: nokogiri
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: speak_slow
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: trollop
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: TedTalk helps download TED talk video and covert it to a slowed down
127
+ MP3 with pauses that is useful for English learning
128
+ email:
129
+ - yohasebe@gmail.com
130
+ executables:
131
+ - ted_talk
132
+ extensions: []
133
+ extra_rdoc_files: []
134
+ files:
135
+ - .gitignore
136
+ - Gemfile
137
+ - LICENSE.txt
138
+ - README.md
139
+ - Rakefile
140
+ - bin/ted_talk
141
+ - lib/ted_talk.rb
142
+ - lib/ted_talk/download_utils.rb
143
+ - lib/ted_talk/unix_tools.rb
144
+ - lib/ted_talk/version.rb
145
+ - ted_talk.gemspec
146
+ - test/ted_talk_test.rb
147
+ homepage: http://github.com/yohasebe/ted_talk
148
+ licenses: []
149
+ post_install_message:
150
+ rdoc_options: []
151
+ require_paths:
152
+ - lib
153
+ required_ruby_version: !ruby/object:Gem::Requirement
154
+ none: false
155
+ requirements:
156
+ - - ! '>='
157
+ - !ruby/object:Gem::Version
158
+ version: '0'
159
+ required_rubygems_version: !ruby/object:Gem::Requirement
160
+ none: false
161
+ requirements:
162
+ - - ! '>='
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ requirements: []
166
+ rubyforge_project:
167
+ rubygems_version: 1.8.24
168
+ signing_key:
169
+ specification_version: 3
170
+ summary: TED talk downloader and converter for English learners
171
+ test_files:
172
+ - test/ted_talk_test.rb