mp3scrape 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +43 -0
- data/bin/mp3scrape +154 -0
- data/gemspec.rb +35 -0
- data/install.rb +214 -0
- metadata +57 -0
data/README
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
NAME
|
2
|
+
mp3scrape
|
3
|
+
|
4
|
+
SYNOPSIS
|
5
|
+
mp3scrape uri [options]+
|
6
|
+
|
7
|
+
DESCRIPTION
|
8
|
+
mp3scrape will scour any url for it's mp3 content - the script mirrors,
|
9
|
+
never downloading the same file twice. it does not, however, crawl a
|
10
|
+
website for links, it simple scapes all the songs from a single page.
|
11
|
+
|
12
|
+
PARAMETERS
|
13
|
+
uri (1 -> uri(uri))
|
14
|
+
the uri to scrape
|
15
|
+
--pattern=pattern, -p (0 ~> pattern=['"](http://[^\s]+[^/\s]+.mp3)["'])
|
16
|
+
specifiy the mp3 pattern
|
17
|
+
--basedir=basedir, -b (0 ~> basedir=/Users/ahoward/mp3)
|
18
|
+
specifiy the base download dir - default(/Users/ahoward/mp3)
|
19
|
+
--destination=destination, -d (0 ~> destination)
|
20
|
+
specifiy the absolute download dir -
|
21
|
+
default(/Users/ahoward/mp3/auto-based-on-uri)
|
22
|
+
--list
|
23
|
+
only list the mp3s that would be scraped
|
24
|
+
--threads=threads, -t (0 ~> integer(threads=8))
|
25
|
+
specify the number of threads to download with in parallel -
|
26
|
+
default(8)
|
27
|
+
--noop, -n
|
28
|
+
show the downloads that would be performed
|
29
|
+
--help, -h
|
30
|
+
|
31
|
+
INSTALL
|
32
|
+
sudo gem install mp3scrape
|
33
|
+
|
34
|
+
URI
|
35
|
+
http://codeforpeople.com
|
36
|
+
|
37
|
+
EXAMPLES
|
38
|
+
1) get a bunch of xmas tunes
|
39
|
+
mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
|
40
|
+
|
41
|
+
2) get a bunch of tunes
|
42
|
+
mp2scrape http://troubledsoulsunite.blogspot.com/
|
43
|
+
|
data/bin/mp3scrape
ADDED
@@ -0,0 +1,154 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
Main {
|
4
|
+
Home = File.expand_path(ENV["HOME"] || ENV["USERPROFILE"] || "~")
|
5
|
+
Basedir = File.join(Home, "mp3")
|
6
|
+
Threads = 8
|
7
|
+
|
8
|
+
description <<-txt
|
9
|
+
mp3scrape will scour any url for it's mp3 content - the script mirrors,
|
10
|
+
never downloading the same file twice. it does not, however, crawl a
|
11
|
+
website for links, it simple scapes all the songs from a single page.
|
12
|
+
txt
|
13
|
+
|
14
|
+
usage['INSTALL'] = 'sudo gem install mp3scrape'
|
15
|
+
|
16
|
+
usage['URI'] = 'http://codeforpeople.com'
|
17
|
+
|
18
|
+
example <<-txt
|
19
|
+
1) get a bunch of xmas tunes
|
20
|
+
mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
|
21
|
+
|
22
|
+
2) get a bunch of tunes
|
23
|
+
mp2scrape http://troubledsoulsunite.blogspot.com/
|
24
|
+
txt
|
25
|
+
|
26
|
+
argument("uri"){
|
27
|
+
description "the uri to scrape"
|
28
|
+
cast :uri
|
29
|
+
}
|
30
|
+
|
31
|
+
option("pattern", "p"){
|
32
|
+
description "specifiy the mp3 pattern"
|
33
|
+
argument_required
|
34
|
+
default %|['"](http://[^\\s]+[^/\\s]+.mp3)["']|
|
35
|
+
}
|
36
|
+
|
37
|
+
option("basedir", "b"){
|
38
|
+
description "specifiy the base download dir - default(#{ Basedir })"
|
39
|
+
argument_required
|
40
|
+
default Basedir
|
41
|
+
}
|
42
|
+
|
43
|
+
option("destination", "d"){
|
44
|
+
description "specifiy the absolute download dir - default(#{ File.join Basedir, 'auto-based-on-uri' })"
|
45
|
+
argument_required
|
46
|
+
}
|
47
|
+
|
48
|
+
option("list"){
|
49
|
+
description "only list the mp3s that would be scraped"
|
50
|
+
}
|
51
|
+
|
52
|
+
option("threads", "t"){
|
53
|
+
description "specify the number of threads to download with in parallel - default(#{ Threads })"
|
54
|
+
argument_required
|
55
|
+
default Threads
|
56
|
+
cast :integer
|
57
|
+
}
|
58
|
+
|
59
|
+
option("noop", "n"){
|
60
|
+
description "show the downloads that would be performed"
|
61
|
+
}
|
62
|
+
|
63
|
+
def run
|
64
|
+
uri = param["uri"].value
|
65
|
+
pattern = %r/#{ param["pattern"].value }/
|
66
|
+
srcs = open(uri.to_s).read.scan(pattern).flatten.compact
|
67
|
+
if param["list"].given?
|
68
|
+
puts srcs
|
69
|
+
exit
|
70
|
+
end
|
71
|
+
dsts = destinations_for srcs, param["destination"].value
|
72
|
+
spec = srcs.zip dsts
|
73
|
+
if param["noop"].given?
|
74
|
+
spec.each{|src, dst| puts "#{ src } -> #{ dst }"}
|
75
|
+
else
|
76
|
+
mirror spec
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def mirror spec
|
81
|
+
spec.threadify(params["threads"].value) do |src, dst|
|
82
|
+
begin
|
83
|
+
FileUtils.mkdir_p(File.dirname(dst))
|
84
|
+
mtime = File.stat(dst).mtime rescue Time.at(0)
|
85
|
+
open src do |fd|
|
86
|
+
last_modified = fd.last_modified || Time.now
|
87
|
+
unless last_modified > mtime
|
88
|
+
print "#{ src } == #{ dst }"
|
89
|
+
break
|
90
|
+
end
|
91
|
+
data = fd.read and fd.close
|
92
|
+
open(dst, "wb"){|fd| fd.write data}
|
93
|
+
File.utime last_modified, last_modified, dst
|
94
|
+
print "#{ src } -> #{ dst }"
|
95
|
+
end
|
96
|
+
rescue Object => e
|
97
|
+
STDERR.puts "#{ e.message } (#{ e.class })"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def destinations_for srcs, destination = nil
|
103
|
+
srcs.map do |src|
|
104
|
+
basename = File.basename src
|
105
|
+
basename = clean basename
|
106
|
+
File.expand_path(
|
107
|
+
if destination
|
108
|
+
File.join destination, basename
|
109
|
+
else
|
110
|
+
uri = URI.parse src.to_s
|
111
|
+
host, paths = uri.host, uri.path.split("/").map{|path| clean path}
|
112
|
+
basename = clean paths.pop
|
113
|
+
[ Basedir, host, paths, basename ].flatten.compact.join(File::SEPARATOR)
|
114
|
+
end
|
115
|
+
)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def clean basename
|
120
|
+
CGI.unescape(basename.to_s).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
|
121
|
+
end
|
122
|
+
}
|
123
|
+
|
124
|
+
BEGIN {
|
125
|
+
require "yaml"
|
126
|
+
require "uri"
|
127
|
+
require "open-uri"
|
128
|
+
require "fileutils"
|
129
|
+
require "cgi"
|
130
|
+
|
131
|
+
begin
|
132
|
+
require "rubygems"
|
133
|
+
rescue LoadError
|
134
|
+
42
|
135
|
+
end
|
136
|
+
|
137
|
+
begin
|
138
|
+
require "main"
|
139
|
+
rescue LoadError
|
140
|
+
STDERR.puts "gem install main"
|
141
|
+
exit 1
|
142
|
+
end
|
143
|
+
|
144
|
+
begin
|
145
|
+
require "threadify"
|
146
|
+
rescue LoadError
|
147
|
+
STDERR.puts "gem install threadify"
|
148
|
+
exit 1
|
149
|
+
end
|
150
|
+
|
151
|
+
STDERR.sync = STDOUT.sync = true
|
152
|
+
|
153
|
+
trap("INT"){ exit! }
|
154
|
+
}
|
data/gemspec.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
lib, version = File::basename(File::dirname(File::expand_path(__FILE__))).split %r/-/, 2
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
Gem::Specification::new do |spec|
|
6
|
+
$VERBOSE = nil
|
7
|
+
|
8
|
+
shiteless = lambda do |list|
|
9
|
+
list.delete_if do |file|
|
10
|
+
file =~ %r/\.svn/ or
|
11
|
+
file =~ %r/\.tmp/
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
spec.name = lib
|
16
|
+
spec.version = version
|
17
|
+
spec.platform = Gem::Platform::RUBY
|
18
|
+
spec.summary = lib
|
19
|
+
|
20
|
+
spec.files = shiteless[Dir::glob("**/**")]
|
21
|
+
spec.executables = shiteless[Dir::glob("bin/*")].map{|exe| File::basename exe}
|
22
|
+
|
23
|
+
spec.require_path = "lib"
|
24
|
+
|
25
|
+
spec.has_rdoc = File::exist? "doc"
|
26
|
+
spec.test_suite_file = "test/#{ lib }.rb" if File::directory? "test"
|
27
|
+
#spec.add_dependency 'lib', '>= version'
|
28
|
+
|
29
|
+
spec.extensions << "extconf.rb" if File::exists? "extconf.rb"
|
30
|
+
|
31
|
+
spec.rubyforge_project = 'codeforpeople'
|
32
|
+
spec.author = "Ara T. Howard"
|
33
|
+
spec.email = "ara.t.howard@gmail.com"
|
34
|
+
spec.homepage = "http://codeforpeople.com/lib/ruby/#{ lib }/"
|
35
|
+
end
|
data/install.rb
ADDED
@@ -0,0 +1,214 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rbconfig'
|
3
|
+
require 'find'
|
4
|
+
require 'ftools'
|
5
|
+
require 'tempfile'
|
6
|
+
include Config
|
7
|
+
|
8
|
+
LIBDIR = "lib"
|
9
|
+
LIBDIR_MODE = 0644
|
10
|
+
|
11
|
+
BINDIR = "bin"
|
12
|
+
BINDIR_MODE = 0755
|
13
|
+
|
14
|
+
|
15
|
+
$srcdir = CONFIG["srcdir"]
|
16
|
+
$version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
|
17
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", $version)
|
18
|
+
$archdir = File.join($libdir, CONFIG["arch"])
|
19
|
+
$site_libdir = $:.find {|x| x =~ /site_ruby$/}
|
20
|
+
$bindir = CONFIG["bindir"] || CONFIG['BINDIR']
|
21
|
+
$ruby_install_name = CONFIG['ruby_install_name'] || CONFIG['RUBY_INSTALL_NAME'] || 'ruby'
|
22
|
+
$ruby_ext = CONFIG['EXEEXT'] || ''
|
23
|
+
$ruby = File.join($bindir, ($ruby_install_name + $ruby_ext))
|
24
|
+
|
25
|
+
if !$site_libdir
|
26
|
+
$site_libdir = File.join($libdir, "site_ruby")
|
27
|
+
elsif $site_libdir !~ %r/#{Regexp.quote($version)}/
|
28
|
+
$site_libdir = File.join($site_libdir, $version)
|
29
|
+
end
|
30
|
+
|
31
|
+
def install_rb(srcdir=nil, destdir=nil, mode=nil, bin=nil)
|
32
|
+
#{{{
|
33
|
+
path = []
|
34
|
+
dir = []
|
35
|
+
Find.find(srcdir) do |f|
|
36
|
+
next unless FileTest.file?(f)
|
37
|
+
next if (f = f[srcdir.length+1..-1]) == nil
|
38
|
+
next if (/CVS$/ =~ File.dirname(f))
|
39
|
+
next if (/\.svn/ =~ File.dirname(f))
|
40
|
+
next if f =~ %r/\.lnk/
|
41
|
+
next if f =~ %r/\.svn/
|
42
|
+
next if f =~ %r/\.swp/
|
43
|
+
next if f =~ %r/\.svn/
|
44
|
+
path.push f
|
45
|
+
dir |= [File.dirname(f)]
|
46
|
+
end
|
47
|
+
for f in dir
|
48
|
+
next if f == "."
|
49
|
+
next if f == "CVS"
|
50
|
+
File::makedirs(File.join(destdir, f))
|
51
|
+
end
|
52
|
+
for f in path
|
53
|
+
next if (/\~$/ =~ f)
|
54
|
+
next if (/^\./ =~ File.basename(f))
|
55
|
+
unless bin
|
56
|
+
File::install(File.join(srcdir, f), File.join(destdir, f), mode, true)
|
57
|
+
else
|
58
|
+
from = File.join(srcdir, f)
|
59
|
+
to = File.join(destdir, f)
|
60
|
+
shebangify(from) do |sf|
|
61
|
+
$deferr.print from, " -> ", File::catname(from, to), "\n"
|
62
|
+
$deferr.printf "chmod %04o %s\n", mode, to
|
63
|
+
File::install(sf, to, mode, false)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
#}}}
|
68
|
+
end
|
69
|
+
def shebangify f
|
70
|
+
#{{{
|
71
|
+
open(f) do |fd|
|
72
|
+
buf = fd.read 42
|
73
|
+
if buf =~ %r/^\s*#\s*!.*ruby/o
|
74
|
+
ftmp = Tempfile::new("#{ $$ }_#{ File::basename(f) }")
|
75
|
+
begin
|
76
|
+
fd.rewind
|
77
|
+
ftmp.puts "#!#{ $ruby }"
|
78
|
+
while((buf = fd.read(8192)))
|
79
|
+
ftmp.write buf
|
80
|
+
end
|
81
|
+
ftmp.close
|
82
|
+
yield ftmp.path
|
83
|
+
ensure
|
84
|
+
ftmp.close!
|
85
|
+
end
|
86
|
+
else
|
87
|
+
yield f
|
88
|
+
end
|
89
|
+
end
|
90
|
+
#}}}
|
91
|
+
end
|
92
|
+
def ARGV.switch
|
93
|
+
#{{{
|
94
|
+
return nil if self.empty?
|
95
|
+
arg = self.shift
|
96
|
+
return nil if arg == '--'
|
97
|
+
if arg =~ /^-(.)(.*)/
|
98
|
+
return arg if $1 == '-'
|
99
|
+
raise 'unknown switch "-"' if $2.index('-')
|
100
|
+
self.unshift "-#{$2}" if $2.size > 0
|
101
|
+
"-#{$1}"
|
102
|
+
else
|
103
|
+
self.unshift arg
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
#}}}
|
107
|
+
end
|
108
|
+
def ARGV.req_arg
|
109
|
+
#{{{
|
110
|
+
self.shift || raise('missing argument')
|
111
|
+
#}}}
|
112
|
+
end
|
113
|
+
def linkify d, linked = []
|
114
|
+
#--{{{
|
115
|
+
if test ?d, d
|
116
|
+
versioned = Dir[ File::join(d, "*-[0-9].[0-9].[0-9].rb") ]
|
117
|
+
versioned.each do |v|
|
118
|
+
src, dst = v, v.gsub(%r/\-[\d\.]+\.rb$/, '.rb')
|
119
|
+
lnk = nil
|
120
|
+
begin
|
121
|
+
if test ?l, dst
|
122
|
+
lnk = "#{ dst }.lnk"
|
123
|
+
puts "#{ dst } -> #{ lnk }"
|
124
|
+
File::rename dst, lnk
|
125
|
+
end
|
126
|
+
unless test ?e, dst
|
127
|
+
puts "#{ src } -> #{ dst }"
|
128
|
+
File::copy src, dst
|
129
|
+
linked << dst
|
130
|
+
end
|
131
|
+
ensure
|
132
|
+
if lnk
|
133
|
+
at_exit do
|
134
|
+
puts "#{ lnk } -> #{ dst }"
|
135
|
+
File::rename lnk, dst
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
linked
|
142
|
+
#--}}}
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
#
|
147
|
+
# main program
|
148
|
+
#
|
149
|
+
|
150
|
+
libdir = $site_libdir
|
151
|
+
bindir = $bindir
|
152
|
+
no_linkify = false
|
153
|
+
linked = nil
|
154
|
+
help = false
|
155
|
+
|
156
|
+
usage = <<-usage
|
157
|
+
#{ File::basename $0 }
|
158
|
+
-d, --destdir <destdir>
|
159
|
+
-l, --libdir <libdir>
|
160
|
+
-b, --bindir <bindir>
|
161
|
+
-r, --ruby <ruby>
|
162
|
+
-n, --no_linkify
|
163
|
+
-s, --sudo
|
164
|
+
-h, --help
|
165
|
+
usage
|
166
|
+
|
167
|
+
begin
|
168
|
+
while switch = ARGV.switch
|
169
|
+
case switch
|
170
|
+
when '-d', '--destdir'
|
171
|
+
libdir = ARGV.req_arg
|
172
|
+
when '-l', '--libdir'
|
173
|
+
libdir = ARGV.req_arg
|
174
|
+
when '-b', '--bindir'
|
175
|
+
bindir = ARGV.req_arg
|
176
|
+
when '-r', '--ruby'
|
177
|
+
$ruby = ARGV.req_arg
|
178
|
+
when '-n', '--no_linkify'
|
179
|
+
no_linkify = true
|
180
|
+
when '-s', '--sudo'
|
181
|
+
sudo = 'sudo'
|
182
|
+
when '-h', '--help'
|
183
|
+
help = true
|
184
|
+
else
|
185
|
+
raise "unknown switch #{switch.dump}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
rescue
|
189
|
+
STDERR.puts $!.to_s
|
190
|
+
STDERR.puts usage
|
191
|
+
exit 1
|
192
|
+
end
|
193
|
+
|
194
|
+
if help
|
195
|
+
STDOUT.puts usage
|
196
|
+
exit
|
197
|
+
end
|
198
|
+
|
199
|
+
system "#{ sudo } #{ $ruby } pre-install.rb" if test(?s, 'pre-install.rb')
|
200
|
+
|
201
|
+
unless no_linkify
|
202
|
+
linked = linkify('lib') + linkify('bin')
|
203
|
+
end
|
204
|
+
|
205
|
+
system "#{ $ruby } extconf.rb && make && #{ sudo } make install" if test(?s, 'extconf.rb')
|
206
|
+
|
207
|
+
install_rb(LIBDIR, libdir, LIBDIR_MODE)
|
208
|
+
install_rb(BINDIR, bindir, BINDIR_MODE, bin=true)
|
209
|
+
|
210
|
+
if linked
|
211
|
+
linked.each{|path| File::rm_f path}
|
212
|
+
end
|
213
|
+
|
214
|
+
system "#{ sudo } #{ $ruby } post-install.rb" if test(?s, 'post-install.rb')
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mp3scrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ara T. Howard
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-31 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: ara.t.howard@gmail.com
|
18
|
+
executables:
|
19
|
+
- mp3scrape
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- bin
|
26
|
+
- bin/mp3scrape
|
27
|
+
- gemspec.rb
|
28
|
+
- install.rb
|
29
|
+
- README
|
30
|
+
has_rdoc: false
|
31
|
+
homepage: http://codeforpeople.com/lib/ruby/mp3scrape/
|
32
|
+
post_install_message:
|
33
|
+
rdoc_options: []
|
34
|
+
|
35
|
+
require_paths:
|
36
|
+
- lib
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: "0"
|
42
|
+
version:
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
rubyforge_project: codeforpeople
|
52
|
+
rubygems_version: 1.2.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 2
|
55
|
+
summary: mp3scrape
|
56
|
+
test_files: []
|
57
|
+
|