mp3scrape 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +43 -0
- data/bin/mp3scrape +154 -0
- data/gemspec.rb +35 -0
- data/install.rb +214 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
NAME
|
2
|
+
mp3scrape
|
3
|
+
|
4
|
+
SYNOPSIS
|
5
|
+
mp3scrape uri [options]+
|
6
|
+
|
7
|
+
DESCRIPTION
|
8
|
+
mp3scrape will scour any url for it's mp3 content - the script mirrors,
|
9
|
+
never downloading the same file twice. it does not, however, crawl a
|
10
|
+
website for links, it simple scapes all the songs from a single page.
|
11
|
+
|
12
|
+
PARAMETERS
|
13
|
+
uri (1 -> uri(uri))
|
14
|
+
the uri to scrape
|
15
|
+
--pattern=pattern, -p (0 ~> pattern=['"](http://[^\s]+[^/\s]+.mp3)["'])
|
16
|
+
specifiy the mp3 pattern
|
17
|
+
--basedir=basedir, -b (0 ~> basedir=/Users/ahoward/mp3)
|
18
|
+
specifiy the base download dir - default(/Users/ahoward/mp3)
|
19
|
+
--destination=destination, -d (0 ~> destination)
|
20
|
+
specifiy the absolute download dir -
|
21
|
+
default(/Users/ahoward/mp3/auto-based-on-uri)
|
22
|
+
--list
|
23
|
+
only list the mp3s that would be scraped
|
24
|
+
--threads=threads, -t (0 ~> integer(threads=8))
|
25
|
+
specify the number of threads to download with in parallel -
|
26
|
+
default(8)
|
27
|
+
--noop, -n
|
28
|
+
show the downloads that would be performed
|
29
|
+
--help, -h
|
30
|
+
|
31
|
+
INSTALL
|
32
|
+
sudo gem install mp3scrape
|
33
|
+
|
34
|
+
URI
|
35
|
+
http://codeforpeople.com
|
36
|
+
|
37
|
+
EXAMPLES
|
38
|
+
1) get a bunch of xmas tunes
|
39
|
+
mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
|
40
|
+
|
41
|
+
2) get a bunch of tunes
|
42
|
+
mp2scrape http://troubledsoulsunite.blogspot.com/
|
43
|
+
|
data/bin/mp3scrape
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
Main {
|
4
|
+
Home = File.expand_path(ENV["HOME"] || ENV["USERPROFILE"] || "~")
|
5
|
+
Basedir = File.join(Home, "mp3")
|
6
|
+
Threads = 8
|
7
|
+
|
8
|
+
description <<-txt
|
9
|
+
mp3scrape will scour any url for it's mp3 content - the script mirrors,
|
10
|
+
never downloading the same file twice. it does not, however, crawl a
|
11
|
+
website for links, it simple scapes all the songs from a single page.
|
12
|
+
txt
|
13
|
+
|
14
|
+
usage['INSTALL'] = 'sudo gem install mp3scrape'
|
15
|
+
|
16
|
+
usage['URI'] = 'http://codeforpeople.com'
|
17
|
+
|
18
|
+
example <<-txt
|
19
|
+
1) get a bunch of xmas tunes
|
20
|
+
mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
|
21
|
+
|
22
|
+
2) get a bunch of tunes
|
23
|
+
mp2scrape http://troubledsoulsunite.blogspot.com/
|
24
|
+
txt
|
25
|
+
|
26
|
+
argument("uri"){
|
27
|
+
description "the uri to scrape"
|
28
|
+
cast :uri
|
29
|
+
}
|
30
|
+
|
31
|
+
option("pattern", "p"){
|
32
|
+
description "specifiy the mp3 pattern"
|
33
|
+
argument_required
|
34
|
+
default %|['"](http://[^\\s]+[^/\\s]+.mp3)["']|
|
35
|
+
}
|
36
|
+
|
37
|
+
option("basedir", "b"){
|
38
|
+
description "specifiy the base download dir - default(#{ Basedir })"
|
39
|
+
argument_required
|
40
|
+
default Basedir
|
41
|
+
}
|
42
|
+
|
43
|
+
option("destination", "d"){
|
44
|
+
description "specifiy the absolute download dir - default(#{ File.join Basedir, 'auto-based-on-uri' })"
|
45
|
+
argument_required
|
46
|
+
}
|
47
|
+
|
48
|
+
option("list"){
|
49
|
+
description "only list the mp3s that would be scraped"
|
50
|
+
}
|
51
|
+
|
52
|
+
option("threads", "t"){
|
53
|
+
description "specify the number of threads to download with in parallel - default(#{ Threads })"
|
54
|
+
argument_required
|
55
|
+
default Threads
|
56
|
+
cast :integer
|
57
|
+
}
|
58
|
+
|
59
|
+
option("noop", "n"){
|
60
|
+
description "show the downloads that would be performed"
|
61
|
+
}
|
62
|
+
|
63
|
+
def run
|
64
|
+
uri = param["uri"].value
|
65
|
+
pattern = %r/#{ param["pattern"].value }/
|
66
|
+
srcs = open(uri.to_s).read.scan(pattern).flatten.compact
|
67
|
+
if param["list"].given?
|
68
|
+
puts srcs
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
dsts = destinations_for srcs, param["destination"].value
|
72
|
+
spec = srcs.zip dsts
|
73
|
+
if param["noop"].given?
|
74
|
+
spec.each{|src, dst| puts "#{ src } -> #{ dst }"}
|
75
|
+
else
|
76
|
+
mirror spec
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def mirror spec
|
81
|
+
spec.threadify(params["threads"].value) do |src, dst|
|
82
|
+
begin
|
83
|
+
FileUtils.mkdir_p(File.dirname(dst))
|
84
|
+
mtime = File.stat(dst).mtime rescue Time.at(0)
|
85
|
+
open src do |fd|
|
86
|
+
last_modified = fd.last_modified || Time.now
|
87
|
+
unless last_modified > mtime
|
88
|
+
print "#{ src } == #{ dst }"
|
89
|
+
break
|
90
|
+
end
|
91
|
+
data = fd.read and fd.close
|
92
|
+
open(dst, "wb"){|fd| fd.write data}
|
93
|
+
File.utime last_modified, last_modified, dst
|
94
|
+
print "#{ src } -> #{ dst }"
|
95
|
+
end
|
96
|
+
rescue Object => e
|
97
|
+
STDERR.puts "#{ e.message } (#{ e.class })"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def destinations_for srcs, destination = nil
|
103
|
+
srcs.map do |src|
|
104
|
+
basename = File.basename src
|
105
|
+
basename = clean basename
|
106
|
+
File.expand_path(
|
107
|
+
if destination
|
108
|
+
File.join destination, basename
|
109
|
+
else
|
110
|
+
uri = URI.parse src.to_s
|
111
|
+
host, paths = uri.host, uri.path.split("/").map{|path| clean path}
|
112
|
+
basename = clean paths.pop
|
113
|
+
[ Basedir, host, paths, basename ].flatten.compact.join(File::SEPARATOR)
|
114
|
+
end
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def clean basename
|
120
|
+
CGI.unescape(basename.to_s).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
|
121
|
+
end
|
122
|
+
}
|
123
|
+
|
124
|
+
BEGIN {
|
125
|
+
require "yaml"
|
126
|
+
require "uri"
|
127
|
+
require "open-uri"
|
128
|
+
require "fileutils"
|
129
|
+
require "cgi"
|
130
|
+
|
131
|
+
begin
|
132
|
+
require "rubygems"
|
133
|
+
rescue LoadError
|
134
|
+
42
|
135
|
+
end
|
136
|
+
|
137
|
+
begin
|
138
|
+
require "main"
|
139
|
+
rescue LoadError
|
140
|
+
STDERR.puts "gem install main"
|
141
|
+
exit 1
|
142
|
+
end
|
143
|
+
|
144
|
+
begin
|
145
|
+
require "threadify"
|
146
|
+
rescue LoadError
|
147
|
+
STDERR.puts "gem install threadify"
|
148
|
+
exit 1
|
149
|
+
end
|
150
|
+
|
151
|
+
STDERR.sync = STDOUT.sync = true
|
152
|
+
|
153
|
+
trap("INT"){ exit! }
|
154
|
+
}
|
data/gemspec.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
lib, version = File::basename(File::dirname(File::expand_path(__FILE__))).split %r/-/, 2
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
Gem::Specification::new do |spec|
|
6
|
+
$VERBOSE = nil
|
7
|
+
|
8
|
+
shiteless = lambda do |list|
|
9
|
+
list.delete_if do |file|
|
10
|
+
file =~ %r/\.svn/ or
|
11
|
+
file =~ %r/\.tmp/
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
spec.name = lib
|
16
|
+
spec.version = version
|
17
|
+
spec.platform = Gem::Platform::RUBY
|
18
|
+
spec.summary = lib
|
19
|
+
|
20
|
+
spec.files = shiteless[Dir::glob("**/**")]
|
21
|
+
spec.executables = shiteless[Dir::glob("bin/*")].map{|exe| File::basename exe}
|
22
|
+
|
23
|
+
spec.require_path = "lib"
|
24
|
+
|
25
|
+
spec.has_rdoc = File::exist? "doc"
|
26
|
+
spec.test_suite_file = "test/#{ lib }.rb" if File::directory? "test"
|
27
|
+
#spec.add_dependency 'lib', '>= version'
|
28
|
+
|
29
|
+
spec.extensions << "extconf.rb" if File::exists? "extconf.rb"
|
30
|
+
|
31
|
+
spec.rubyforge_project = 'codeforpeople'
|
32
|
+
spec.author = "Ara T. Howard"
|
33
|
+
spec.email = "ara.t.howard@gmail.com"
|
34
|
+
spec.homepage = "http://codeforpeople.com/lib/ruby/#{ lib }/"
|
35
|
+
end
|
data/install.rb
ADDED
@@ -0,0 +1,214 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'find'
|
4
|
+
require 'ftools'
|
5
|
+
require 'tempfile'
|
6
|
+
include Config
|
7
|
+
|
8
|
+
LIBDIR = "lib"
|
9
|
+
LIBDIR_MODE = 0644
|
10
|
+
|
11
|
+
BINDIR = "bin"
|
12
|
+
BINDIR_MODE = 0755
|
13
|
+
|
14
|
+
|
15
|
+
$srcdir = CONFIG["srcdir"]
|
16
|
+
$version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
|
17
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", $version)
|
18
|
+
$archdir = File.join($libdir, CONFIG["arch"])
|
19
|
+
$site_libdir = $:.find {|x| x =~ /site_ruby$/}
|
20
|
+
$bindir = CONFIG["bindir"] || CONFIG['BINDIR']
|
21
|
+
$ruby_install_name = CONFIG['ruby_install_name'] || CONFIG['RUBY_INSTALL_NAME'] || 'ruby'
|
22
|
+
$ruby_ext = CONFIG['EXEEXT'] || ''
|
23
|
+
$ruby = File.join($bindir, ($ruby_install_name + $ruby_ext))
|
24
|
+
|
25
|
+
if !$site_libdir
|
26
|
+
$site_libdir = File.join($libdir, "site_ruby")
|
27
|
+
elsif $site_libdir !~ %r/#{Regexp.quote($version)}/
|
28
|
+
$site_libdir = File.join($site_libdir, $version)
|
29
|
+
end
|
30
|
+
|
31
|
+
def install_rb(srcdir=nil, destdir=nil, mode=nil, bin=nil)
|
32
|
+
#{{{
|
33
|
+
path = []
|
34
|
+
dir = []
|
35
|
+
Find.find(srcdir) do |f|
|
36
|
+
next unless FileTest.file?(f)
|
37
|
+
next if (f = f[srcdir.length+1..-1]) == nil
|
38
|
+
next if (/CVS$/ =~ File.dirname(f))
|
39
|
+
next if (/\.svn/ =~ File.dirname(f))
|
40
|
+
next if f =~ %r/\.lnk/
|
41
|
+
next if f =~ %r/\.svn/
|
42
|
+
next if f =~ %r/\.swp/
|
43
|
+
next if f =~ %r/\.svn/
|
44
|
+
path.push f
|
45
|
+
dir |= [File.dirname(f)]
|
46
|
+
end
|
47
|
+
for f in dir
|
48
|
+
next if f == "."
|
49
|
+
next if f == "CVS"
|
50
|
+
File::makedirs(File.join(destdir, f))
|
51
|
+
end
|
52
|
+
for f in path
|
53
|
+
next if (/\~$/ =~ f)
|
54
|
+
next if (/^\./ =~ File.basename(f))
|
55
|
+
unless bin
|
56
|
+
File::install(File.join(srcdir, f), File.join(destdir, f), mode, true)
|
57
|
+
else
|
58
|
+
from = File.join(srcdir, f)
|
59
|
+
to = File.join(destdir, f)
|
60
|
+
shebangify(from) do |sf|
|
61
|
+
$deferr.print from, " -> ", File::catname(from, to), "\n"
|
62
|
+
$deferr.printf "chmod %04o %s\n", mode, to
|
63
|
+
File::install(sf, to, mode, false)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
#}}}
|
68
|
+
end
|
69
|
+
def shebangify f
|
70
|
+
#{{{
|
71
|
+
open(f) do |fd|
|
72
|
+
buf = fd.read 42
|
73
|
+
if buf =~ %r/^\s*#\s*!.*ruby/o
|
74
|
+
ftmp = Tempfile::new("#{ $$ }_#{ File::basename(f) }")
|
75
|
+
begin
|
76
|
+
fd.rewind
|
77
|
+
ftmp.puts "#!#{ $ruby }"
|
78
|
+
while((buf = fd.read(8192)))
|
79
|
+
ftmp.write buf
|
80
|
+
end
|
81
|
+
ftmp.close
|
82
|
+
yield ftmp.path
|
83
|
+
ensure
|
84
|
+
ftmp.close!
|
85
|
+
end
|
86
|
+
else
|
87
|
+
yield f
|
88
|
+
end
|
89
|
+
end
|
90
|
+
#}}}
|
91
|
+
end
|
92
|
+
def ARGV.switch
|
93
|
+
#{{{
|
94
|
+
return nil if self.empty?
|
95
|
+
arg = self.shift
|
96
|
+
return nil if arg == '--'
|
97
|
+
if arg =~ /^-(.)(.*)/
|
98
|
+
return arg if $1 == '-'
|
99
|
+
raise 'unknown switch "-"' if $2.index('-')
|
100
|
+
self.unshift "-#{$2}" if $2.size > 0
|
101
|
+
"-#{$1}"
|
102
|
+
else
|
103
|
+
self.unshift arg
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
#}}}
|
107
|
+
end
|
108
|
+
def ARGV.req_arg
|
109
|
+
#{{{
|
110
|
+
self.shift || raise('missing argument')
|
111
|
+
#}}}
|
112
|
+
end
|
113
|
+
def linkify d, linked = []
|
114
|
+
#--{{{
|
115
|
+
if test ?d, d
|
116
|
+
versioned = Dir[ File::join(d, "*-[0-9].[0-9].[0-9].rb") ]
|
117
|
+
versioned.each do |v|
|
118
|
+
src, dst = v, v.gsub(%r/\-[\d\.]+\.rb$/, '.rb')
|
119
|
+
lnk = nil
|
120
|
+
begin
|
121
|
+
if test ?l, dst
|
122
|
+
lnk = "#{ dst }.lnk"
|
123
|
+
puts "#{ dst } -> #{ lnk }"
|
124
|
+
File::rename dst, lnk
|
125
|
+
end
|
126
|
+
unless test ?e, dst
|
127
|
+
puts "#{ src } -> #{ dst }"
|
128
|
+
File::copy src, dst
|
129
|
+
linked << dst
|
130
|
+
end
|
131
|
+
ensure
|
132
|
+
if lnk
|
133
|
+
at_exit do
|
134
|
+
puts "#{ lnk } -> #{ dst }"
|
135
|
+
File::rename lnk, dst
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
linked
|
142
|
+
#--}}}
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
#
|
147
|
+
# main program
|
148
|
+
#
|
149
|
+
|
150
|
+
libdir = $site_libdir
|
151
|
+
bindir = $bindir
|
152
|
+
no_linkify = false
|
153
|
+
linked = nil
|
154
|
+
help = false
|
155
|
+
|
156
|
+
usage = <<-usage
|
157
|
+
#{ File::basename $0 }
|
158
|
+
-d, --destdir <destdir>
|
159
|
+
-l, --libdir <libdir>
|
160
|
+
-b, --bindir <bindir>
|
161
|
+
-r, --ruby <ruby>
|
162
|
+
-n, --no_linkify
|
163
|
+
-s, --sudo
|
164
|
+
-h, --help
|
165
|
+
usage
|
166
|
+
|
167
|
+
begin
|
168
|
+
while switch = ARGV.switch
|
169
|
+
case switch
|
170
|
+
when '-d', '--destdir'
|
171
|
+
libdir = ARGV.req_arg
|
172
|
+
when '-l', '--libdir'
|
173
|
+
libdir = ARGV.req_arg
|
174
|
+
when '-b', '--bindir'
|
175
|
+
bindir = ARGV.req_arg
|
176
|
+
when '-r', '--ruby'
|
177
|
+
$ruby = ARGV.req_arg
|
178
|
+
when '-n', '--no_linkify'
|
179
|
+
no_linkify = true
|
180
|
+
when '-s', '--sudo'
|
181
|
+
sudo = 'sudo'
|
182
|
+
when '-h', '--help'
|
183
|
+
help = true
|
184
|
+
else
|
185
|
+
raise "unknown switch #{switch.dump}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
rescue
|
189
|
+
STDERR.puts $!.to_s
|
190
|
+
STDERR.puts usage
|
191
|
+
exit 1
|
192
|
+
end
|
193
|
+
|
194
|
+
if help
|
195
|
+
STDOUT.puts usage
|
196
|
+
exit
|
197
|
+
end
|
198
|
+
|
199
|
+
system "#{ sudo } #{ $ruby } pre-install.rb" if test(?s, 'pre-install.rb')
|
200
|
+
|
201
|
+
unless no_linkify
|
202
|
+
linked = linkify('lib') + linkify('bin')
|
203
|
+
end
|
204
|
+
|
205
|
+
system "#{ $ruby } extconf.rb && make && #{ sudo } make install" if test(?s, 'extconf.rb')
|
206
|
+
|
207
|
+
install_rb(LIBDIR, libdir, LIBDIR_MODE)
|
208
|
+
install_rb(BINDIR, bindir, BINDIR_MODE, bin=true)
|
209
|
+
|
210
|
+
if linked
|
211
|
+
linked.each{|path| File::rm_f path}
|
212
|
+
end
|
213
|
+
|
214
|
+
system "#{ sudo } #{ $ruby } post-install.rb" if test(?s, 'post-install.rb')
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mp3scrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ara T. Howard
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-31 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: ara.t.howard@gmail.com
|
18
|
+
executables:
|
19
|
+
- mp3scrape
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- bin
|
26
|
+
- bin/mp3scrape
|
27
|
+
- gemspec.rb
|
28
|
+
- install.rb
|
29
|
+
- README
|
30
|
+
has_rdoc: false
|
31
|
+
homepage: http://codeforpeople.com/lib/ruby/mp3scrape/
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
|
35
|
+
require_paths:
|
36
|
+
- lib
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: "0"
|
42
|
+
version:
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
rubyforge_project: codeforpeople
|
52
|
+
rubygems_version: 1.2.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 2
|
55
|
+
summary: mp3scrape
|
56
|
+
test_files: []
|
57
|
+
|