mp3scrape 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README +43 -0
  2. data/bin/mp3scrape +154 -0
  3. data/gemspec.rb +35 -0
  4. data/install.rb +214 -0
  5. metadata +57 -0
data/README ADDED
@@ -0,0 +1,43 @@
1
+ NAME
2
+ mp3scrape
3
+
4
+ SYNOPSIS
5
+ mp3scrape uri [options]+
6
+
7
+ DESCRIPTION
8
+ mp3scrape will scour any url for it's mp3 content - the script mirrors,
9
+ never downloading the same file twice. it does not, however, crawl a
10
+ website for links, it simple scapes all the songs from a single page.
11
+
12
+ PARAMETERS
13
+ uri (1 -> uri(uri))
14
+ the uri to scrape
15
+ --pattern=pattern, -p (0 ~> pattern=['"](http://[^\s]+[^/\s]+.mp3)["'])
16
+ specifiy the mp3 pattern
17
+ --basedir=basedir, -b (0 ~> basedir=/Users/ahoward/mp3)
18
+ specifiy the base download dir - default(/Users/ahoward/mp3)
19
+ --destination=destination, -d (0 ~> destination)
20
+ specifiy the absolute download dir -
21
+ default(/Users/ahoward/mp3/auto-based-on-uri)
22
+ --list
23
+ only list the mp3s that would be scraped
24
+ --threads=threads, -t (0 ~> integer(threads=8))
25
+ specify the number of threads to download with in parallel -
26
+ default(8)
27
+ --noop, -n
28
+ show the downloads that would be performed
29
+ --help, -h
30
+
31
+ INSTALL
32
+ sudo gem install mp3scrape
33
+
34
+ URI
35
+ http://codeforpeople.com
36
+
37
+ EXAMPLES
38
+ 1) get a bunch of xmas tunes
39
+ mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
40
+
41
+ 2) get a bunch of tunes
42
+ mp2scrape http://troubledsoulsunite.blogspot.com/
43
+
@@ -0,0 +1,154 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ Main {
4
+ Home = File.expand_path(ENV["HOME"] || ENV["USERPROFILE"] || "~")
5
+ Basedir = File.join(Home, "mp3")
6
+ Threads = 8
7
+
8
+ description <<-txt
9
+ mp3scrape will scour any url for it's mp3 content - the script mirrors,
10
+ never downloading the same file twice. it does not, however, crawl a
11
+ website for links, it simple scapes all the songs from a single page.
12
+ txt
13
+
14
+ usage['INSTALL'] = 'sudo gem install mp3scrape'
15
+
16
+ usage['URI'] = 'http://codeforpeople.com'
17
+
18
+ example <<-txt
19
+ 1) get a bunch of xmas tunes
20
+ mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
21
+
22
+ 2) get a bunch of tunes
23
+ mp2scrape http://troubledsoulsunite.blogspot.com/
24
+ txt
25
+
26
+ argument("uri"){
27
+ description "the uri to scrape"
28
+ cast :uri
29
+ }
30
+
31
+ option("pattern", "p"){
32
+ description "specifiy the mp3 pattern"
33
+ argument_required
34
+ default %|['"](http://[^\\s]+[^/\\s]+.mp3)["']|
35
+ }
36
+
37
+ option("basedir", "b"){
38
+ description "specifiy the base download dir - default(#{ Basedir })"
39
+ argument_required
40
+ default Basedir
41
+ }
42
+
43
+ option("destination", "d"){
44
+ description "specifiy the absolute download dir - default(#{ File.join Basedir, 'auto-based-on-uri' })"
45
+ argument_required
46
+ }
47
+
48
+ option("list"){
49
+ description "only list the mp3s that would be scraped"
50
+ }
51
+
52
+ option("threads", "t"){
53
+ description "specify the number of threads to download with in parallel - default(#{ Threads })"
54
+ argument_required
55
+ default Threads
56
+ cast :integer
57
+ }
58
+
59
+ option("noop", "n"){
60
+ description "show the downloads that would be performed"
61
+ }
62
+
63
+ def run
64
+ uri = param["uri"].value
65
+ pattern = %r/#{ param["pattern"].value }/
66
+ srcs = open(uri.to_s).read.scan(pattern).flatten.compact
67
+ if param["list"].given?
68
+ puts srcs
69
+ exit
70
+ end
71
+ dsts = destinations_for srcs, param["destination"].value
72
+ spec = srcs.zip dsts
73
+ if param["noop"].given?
74
+ spec.each{|src, dst| puts "#{ src } -> #{ dst }"}
75
+ else
76
+ mirror spec
77
+ end
78
+ end
79
+
80
+ def mirror spec
81
+ spec.threadify(params["threads"].value) do |src, dst|
82
+ begin
83
+ FileUtils.mkdir_p(File.dirname(dst))
84
+ mtime = File.stat(dst).mtime rescue Time.at(0)
85
+ open src do |fd|
86
+ last_modified = fd.last_modified || Time.now
87
+ unless last_modified > mtime
88
+ print "#{ src } == #{ dst }"
89
+ break
90
+ end
91
+ data = fd.read and fd.close
92
+ open(dst, "wb"){|fd| fd.write data}
93
+ File.utime last_modified, last_modified, dst
94
+ print "#{ src } -> #{ dst }"
95
+ end
96
+ rescue Object => e
97
+ STDERR.puts "#{ e.message } (#{ e.class })"
98
+ end
99
+ end
100
+ end
101
+
102
+ def destinations_for srcs, destination = nil
103
+ srcs.map do |src|
104
+ basename = File.basename src
105
+ basename = clean basename
106
+ File.expand_path(
107
+ if destination
108
+ File.join destination, basename
109
+ else
110
+ uri = URI.parse src.to_s
111
+ host, paths = uri.host, uri.path.split("/").map{|path| clean path}
112
+ basename = clean paths.pop
113
+ [ Basedir, host, paths, basename ].flatten.compact.join(File::SEPARATOR)
114
+ end
115
+ )
116
+ end
117
+ end
118
+
119
+ def clean basename
120
+ CGI.unescape(basename.to_s).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
121
+ end
122
+ }
123
+
124
+ BEGIN {
125
+ require "yaml"
126
+ require "uri"
127
+ require "open-uri"
128
+ require "fileutils"
129
+ require "cgi"
130
+
131
+ begin
132
+ require "rubygems"
133
+ rescue LoadError
134
+ 42
135
+ end
136
+
137
+ begin
138
+ require "main"
139
+ rescue LoadError
140
+ STDERR.puts "gem install main"
141
+ exit 1
142
+ end
143
+
144
+ begin
145
+ require "threadify"
146
+ rescue LoadError
147
+ STDERR.puts "gem install threadify"
148
+ exit 1
149
+ end
150
+
151
+ STDERR.sync = STDOUT.sync = true
152
+
153
+ trap("INT"){ exit! }
154
+ }
@@ -0,0 +1,35 @@
1
+ lib, version = File::basename(File::dirname(File::expand_path(__FILE__))).split %r/-/, 2
2
+
3
+ require 'rubygems'
4
+
5
+ Gem::Specification::new do |spec|
6
+ $VERBOSE = nil
7
+
8
+ shiteless = lambda do |list|
9
+ list.delete_if do |file|
10
+ file =~ %r/\.svn/ or
11
+ file =~ %r/\.tmp/
12
+ end
13
+ end
14
+
15
+ spec.name = lib
16
+ spec.version = version
17
+ spec.platform = Gem::Platform::RUBY
18
+ spec.summary = lib
19
+
20
+ spec.files = shiteless[Dir::glob("**/**")]
21
+ spec.executables = shiteless[Dir::glob("bin/*")].map{|exe| File::basename exe}
22
+
23
+ spec.require_path = "lib"
24
+
25
+ spec.has_rdoc = File::exist? "doc"
26
+ spec.test_suite_file = "test/#{ lib }.rb" if File::directory? "test"
27
+ #spec.add_dependency 'lib', '>= version'
28
+
29
+ spec.extensions << "extconf.rb" if File::exists? "extconf.rb"
30
+
31
+ spec.rubyforge_project = 'codeforpeople'
32
+ spec.author = "Ara T. Howard"
33
+ spec.email = "ara.t.howard@gmail.com"
34
+ spec.homepage = "http://codeforpeople.com/lib/ruby/#{ lib }/"
35
+ end
@@ -0,0 +1,214 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rbconfig'
3
+ require 'find'
4
+ require 'ftools'
5
+ require 'tempfile'
6
+ include Config
7
+
8
+ LIBDIR = "lib"
9
+ LIBDIR_MODE = 0644
10
+
11
+ BINDIR = "bin"
12
+ BINDIR_MODE = 0755
13
+
14
+
15
+ $srcdir = CONFIG["srcdir"]
16
+ $version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
17
+ $libdir = File.join(CONFIG["libdir"], "ruby", $version)
18
+ $archdir = File.join($libdir, CONFIG["arch"])
19
+ $site_libdir = $:.find {|x| x =~ /site_ruby$/}
20
+ $bindir = CONFIG["bindir"] || CONFIG['BINDIR']
21
+ $ruby_install_name = CONFIG['ruby_install_name'] || CONFIG['RUBY_INSTALL_NAME'] || 'ruby'
22
+ $ruby_ext = CONFIG['EXEEXT'] || ''
23
+ $ruby = File.join($bindir, ($ruby_install_name + $ruby_ext))
24
+
25
+ if !$site_libdir
26
+ $site_libdir = File.join($libdir, "site_ruby")
27
+ elsif $site_libdir !~ %r/#{Regexp.quote($version)}/
28
+ $site_libdir = File.join($site_libdir, $version)
29
+ end
30
+
31
+ def install_rb(srcdir=nil, destdir=nil, mode=nil, bin=nil)
32
+ #{{{
33
+ path = []
34
+ dir = []
35
+ Find.find(srcdir) do |f|
36
+ next unless FileTest.file?(f)
37
+ next if (f = f[srcdir.length+1..-1]) == nil
38
+ next if (/CVS$/ =~ File.dirname(f))
39
+ next if (/\.svn/ =~ File.dirname(f))
40
+ next if f =~ %r/\.lnk/
41
+ next if f =~ %r/\.svn/
42
+ next if f =~ %r/\.swp/
43
+ next if f =~ %r/\.svn/
44
+ path.push f
45
+ dir |= [File.dirname(f)]
46
+ end
47
+ for f in dir
48
+ next if f == "."
49
+ next if f == "CVS"
50
+ File::makedirs(File.join(destdir, f))
51
+ end
52
+ for f in path
53
+ next if (/\~$/ =~ f)
54
+ next if (/^\./ =~ File.basename(f))
55
+ unless bin
56
+ File::install(File.join(srcdir, f), File.join(destdir, f), mode, true)
57
+ else
58
+ from = File.join(srcdir, f)
59
+ to = File.join(destdir, f)
60
+ shebangify(from) do |sf|
61
+ $deferr.print from, " -> ", File::catname(from, to), "\n"
62
+ $deferr.printf "chmod %04o %s\n", mode, to
63
+ File::install(sf, to, mode, false)
64
+ end
65
+ end
66
+ end
67
+ #}}}
68
+ end
69
+ def shebangify f
70
+ #{{{
71
+ open(f) do |fd|
72
+ buf = fd.read 42
73
+ if buf =~ %r/^\s*#\s*!.*ruby/o
74
+ ftmp = Tempfile::new("#{ $$ }_#{ File::basename(f) }")
75
+ begin
76
+ fd.rewind
77
+ ftmp.puts "#!#{ $ruby }"
78
+ while((buf = fd.read(8192)))
79
+ ftmp.write buf
80
+ end
81
+ ftmp.close
82
+ yield ftmp.path
83
+ ensure
84
+ ftmp.close!
85
+ end
86
+ else
87
+ yield f
88
+ end
89
+ end
90
+ #}}}
91
+ end
92
+ def ARGV.switch
93
+ #{{{
94
+ return nil if self.empty?
95
+ arg = self.shift
96
+ return nil if arg == '--'
97
+ if arg =~ /^-(.)(.*)/
98
+ return arg if $1 == '-'
99
+ raise 'unknown switch "-"' if $2.index('-')
100
+ self.unshift "-#{$2}" if $2.size > 0
101
+ "-#{$1}"
102
+ else
103
+ self.unshift arg
104
+ nil
105
+ end
106
+ #}}}
107
+ end
108
+ def ARGV.req_arg
109
+ #{{{
110
+ self.shift || raise('missing argument')
111
+ #}}}
112
+ end
113
+ def linkify d, linked = []
114
+ #--{{{
115
+ if test ?d, d
116
+ versioned = Dir[ File::join(d, "*-[0-9].[0-9].[0-9].rb") ]
117
+ versioned.each do |v|
118
+ src, dst = v, v.gsub(%r/\-[\d\.]+\.rb$/, '.rb')
119
+ lnk = nil
120
+ begin
121
+ if test ?l, dst
122
+ lnk = "#{ dst }.lnk"
123
+ puts "#{ dst } -> #{ lnk }"
124
+ File::rename dst, lnk
125
+ end
126
+ unless test ?e, dst
127
+ puts "#{ src } -> #{ dst }"
128
+ File::copy src, dst
129
+ linked << dst
130
+ end
131
+ ensure
132
+ if lnk
133
+ at_exit do
134
+ puts "#{ lnk } -> #{ dst }"
135
+ File::rename lnk, dst
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+ linked
142
+ #--}}}
143
+ end
144
+
145
+
146
+ #
147
+ # main program
148
+ #
149
+
150
+ libdir = $site_libdir
151
+ bindir = $bindir
152
+ no_linkify = false
153
+ linked = nil
154
+ help = false
155
+
156
+ usage = <<-usage
157
+ #{ File::basename $0 }
158
+ -d, --destdir <destdir>
159
+ -l, --libdir <libdir>
160
+ -b, --bindir <bindir>
161
+ -r, --ruby <ruby>
162
+ -n, --no_linkify
163
+ -s, --sudo
164
+ -h, --help
165
+ usage
166
+
167
+ begin
168
+ while switch = ARGV.switch
169
+ case switch
170
+ when '-d', '--destdir'
171
+ libdir = ARGV.req_arg
172
+ when '-l', '--libdir'
173
+ libdir = ARGV.req_arg
174
+ when '-b', '--bindir'
175
+ bindir = ARGV.req_arg
176
+ when '-r', '--ruby'
177
+ $ruby = ARGV.req_arg
178
+ when '-n', '--no_linkify'
179
+ no_linkify = true
180
+ when '-s', '--sudo'
181
+ sudo = 'sudo'
182
+ when '-h', '--help'
183
+ help = true
184
+ else
185
+ raise "unknown switch #{switch.dump}"
186
+ end
187
+ end
188
+ rescue
189
+ STDERR.puts $!.to_s
190
+ STDERR.puts usage
191
+ exit 1
192
+ end
193
+
194
+ if help
195
+ STDOUT.puts usage
196
+ exit
197
+ end
198
+
199
+ system "#{ sudo } #{ $ruby } pre-install.rb" if test(?s, 'pre-install.rb')
200
+
201
+ unless no_linkify
202
+ linked = linkify('lib') + linkify('bin')
203
+ end
204
+
205
+ system "#{ $ruby } extconf.rb && make && #{ sudo } make install" if test(?s, 'extconf.rb')
206
+
207
+ install_rb(LIBDIR, libdir, LIBDIR_MODE)
208
+ install_rb(BINDIR, bindir, BINDIR_MODE, bin=true)
209
+
210
+ if linked
211
+ linked.each{|path| File::rm_f path}
212
+ end
213
+
214
+ system "#{ sudo } #{ $ruby } post-install.rb" if test(?s, 'post-install.rb')
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mp3scrape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ara T. Howard
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-31 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: ara.t.howard@gmail.com
18
+ executables:
19
+ - mp3scrape
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - bin
26
+ - bin/mp3scrape
27
+ - gemspec.rb
28
+ - install.rb
29
+ - README
30
+ has_rdoc: false
31
+ homepage: http://codeforpeople.com/lib/ruby/mp3scrape/
32
+ post_install_message:
33
+ rdoc_options: []
34
+
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ version:
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ requirements: []
50
+
51
+ rubyforge_project: codeforpeople
52
+ rubygems_version: 1.2.0
53
+ signing_key:
54
+ specification_version: 2
55
+ summary: mp3scrape
56
+ test_files: []
57
+