mp3scrape 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README +43 -0
  2. data/bin/mp3scrape +154 -0
  3. data/gemspec.rb +35 -0
  4. data/install.rb +214 -0
  5. metadata +57 -0
data/README ADDED
@@ -0,0 +1,43 @@
1
+ NAME
2
+ mp3scrape
3
+
4
+ SYNOPSIS
5
+ mp3scrape uri [options]+
6
+
7
+ DESCRIPTION
8
+ mp3scrape will scour any url for it's mp3 content - the script mirrors,
9
+ never downloading the same file twice. it does not, however, crawl a
10
+ website for links, it simple scapes all the songs from a single page.
11
+
12
+ PARAMETERS
13
+ uri (1 -> uri(uri))
14
+ the uri to scrape
15
+ --pattern=pattern, -p (0 ~> pattern=['"](http://[^\s]+[^/\s]+.mp3)["'])
16
+ specifiy the mp3 pattern
17
+ --basedir=basedir, -b (0 ~> basedir=/Users/ahoward/mp3)
18
+ specifiy the base download dir - default(/Users/ahoward/mp3)
19
+ --destination=destination, -d (0 ~> destination)
20
+ specifiy the absolute download dir -
21
+ default(/Users/ahoward/mp3/auto-based-on-uri)
22
+ --list
23
+ only list the mp3s that would be scraped
24
+ --threads=threads, -t (0 ~> integer(threads=8))
25
+ specify the number of threads to download with in parallel -
26
+ default(8)
27
+ --noop, -n
28
+ show the downloads that would be performed
29
+ --help, -h
30
+
31
+ INSTALL
32
+ sudo gem install mp3scrape
33
+
34
+ URI
35
+ http://codeforpeople.com
36
+
37
+ EXAMPLES
38
+ 1) get a bunch of xmas tunes
39
+ mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
40
+
41
+ 2) get a bunch of tunes
42
+ mp2scrape http://troubledsoulsunite.blogspot.com/
43
+
@@ -0,0 +1,154 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ Main {
4
+ Home = File.expand_path(ENV["HOME"] || ENV["USERPROFILE"] || "~")
5
+ Basedir = File.join(Home, "mp3")
6
+ Threads = 8
7
+
8
+ description <<-txt
9
+ mp3scrape will scour any url for it's mp3 content - the script mirrors,
10
+ never downloading the same file twice. it does not, however, crawl a
11
+ website for links, it simple scapes all the songs from a single page.
12
+ txt
13
+
14
+ usage['INSTALL'] = 'sudo gem install mp3scrape'
15
+
16
+ usage['URI'] = 'http://codeforpeople.com'
17
+
18
+ example <<-txt
19
+ 1) get a bunch of xmas tunes
20
+ mp3scrape http://fuelfriends.blogspot.com/2007/12/christmas-mixery.html
21
+
22
+ 2) get a bunch of tunes
23
+ mp2scrape http://troubledsoulsunite.blogspot.com/
24
+ txt
25
+
26
+ argument("uri"){
27
+ description "the uri to scrape"
28
+ cast :uri
29
+ }
30
+
31
+ option("pattern", "p"){
32
+ description "specifiy the mp3 pattern"
33
+ argument_required
34
+ default %|['"](http://[^\\s]+[^/\\s]+.mp3)["']|
35
+ }
36
+
37
+ option("basedir", "b"){
38
+ description "specifiy the base download dir - default(#{ Basedir })"
39
+ argument_required
40
+ default Basedir
41
+ }
42
+
43
+ option("destination", "d"){
44
+ description "specifiy the absolute download dir - default(#{ File.join Basedir, 'auto-based-on-uri' })"
45
+ argument_required
46
+ }
47
+
48
+ option("list"){
49
+ description "only list the mp3s that would be scraped"
50
+ }
51
+
52
+ option("threads", "t"){
53
+ description "specify the number of threads to download with in parallel - default(#{ Threads })"
54
+ argument_required
55
+ default Threads
56
+ cast :integer
57
+ }
58
+
59
+ option("noop", "n"){
60
+ description "show the downloads that would be performed"
61
+ }
62
+
63
+ def run
64
+ uri = param["uri"].value
65
+ pattern = %r/#{ param["pattern"].value }/
66
+ srcs = open(uri.to_s).read.scan(pattern).flatten.compact
67
+ if param["list"].given?
68
+ puts srcs
69
+ exit
70
+ end
71
+ dsts = destinations_for srcs, param["destination"].value
72
+ spec = srcs.zip dsts
73
+ if param["noop"].given?
74
+ spec.each{|src, dst| puts "#{ src } -> #{ dst }"}
75
+ else
76
+ mirror spec
77
+ end
78
+ end
79
+
80
+ def mirror spec
81
+ spec.threadify(params["threads"].value) do |src, dst|
82
+ begin
83
+ FileUtils.mkdir_p(File.dirname(dst))
84
+ mtime = File.stat(dst).mtime rescue Time.at(0)
85
+ open src do |fd|
86
+ last_modified = fd.last_modified || Time.now
87
+ unless last_modified > mtime
88
+ print "#{ src } == #{ dst }"
89
+ break
90
+ end
91
+ data = fd.read and fd.close
92
+ open(dst, "wb"){|fd| fd.write data}
93
+ File.utime last_modified, last_modified, dst
94
+ print "#{ src } -> #{ dst }"
95
+ end
96
+ rescue Object => e
97
+ STDERR.puts "#{ e.message } (#{ e.class })"
98
+ end
99
+ end
100
+ end
101
+
102
+ def destinations_for srcs, destination = nil
103
+ srcs.map do |src|
104
+ basename = File.basename src
105
+ basename = clean basename
106
+ File.expand_path(
107
+ if destination
108
+ File.join destination, basename
109
+ else
110
+ uri = URI.parse src.to_s
111
+ host, paths = uri.host, uri.path.split("/").map{|path| clean path}
112
+ basename = clean paths.pop
113
+ [ Basedir, host, paths, basename ].flatten.compact.join(File::SEPARATOR)
114
+ end
115
+ )
116
+ end
117
+ end
118
+
119
+ def clean basename
120
+ CGI.unescape(basename.to_s).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
121
+ end
122
+ }
123
+
124
+ BEGIN {
125
+ require "yaml"
126
+ require "uri"
127
+ require "open-uri"
128
+ require "fileutils"
129
+ require "cgi"
130
+
131
+ begin
132
+ require "rubygems"
133
+ rescue LoadError
134
+ 42
135
+ end
136
+
137
+ begin
138
+ require "main"
139
+ rescue LoadError
140
+ STDERR.puts "gem install main"
141
+ exit 1
142
+ end
143
+
144
+ begin
145
+ require "threadify"
146
+ rescue LoadError
147
+ STDERR.puts "gem install threadify"
148
+ exit 1
149
+ end
150
+
151
+ STDERR.sync = STDOUT.sync = true
152
+
153
+ trap("INT"){ exit! }
154
+ }
@@ -0,0 +1,35 @@
1
+ lib, version = File::basename(File::dirname(File::expand_path(__FILE__))).split %r/-/, 2
2
+
3
+ require 'rubygems'
4
+
5
+ Gem::Specification::new do |spec|
6
+ $VERBOSE = nil
7
+
8
+ shiteless = lambda do |list|
9
+ list.delete_if do |file|
10
+ file =~ %r/\.svn/ or
11
+ file =~ %r/\.tmp/
12
+ end
13
+ end
14
+
15
+ spec.name = lib
16
+ spec.version = version
17
+ spec.platform = Gem::Platform::RUBY
18
+ spec.summary = lib
19
+
20
+ spec.files = shiteless[Dir::glob("**/**")]
21
+ spec.executables = shiteless[Dir::glob("bin/*")].map{|exe| File::basename exe}
22
+
23
+ spec.require_path = "lib"
24
+
25
+ spec.has_rdoc = File::exist? "doc"
26
+ spec.test_suite_file = "test/#{ lib }.rb" if File::directory? "test"
27
+ #spec.add_dependency 'lib', '>= version'
28
+
29
+ spec.extensions << "extconf.rb" if File::exists? "extconf.rb"
30
+
31
+ spec.rubyforge_project = 'codeforpeople'
32
+ spec.author = "Ara T. Howard"
33
+ spec.email = "ara.t.howard@gmail.com"
34
+ spec.homepage = "http://codeforpeople.com/lib/ruby/#{ lib }/"
35
+ end
@@ -0,0 +1,214 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rbconfig'
3
+ require 'find'
4
+ require 'ftools'
5
+ require 'tempfile'
6
+ include Config
7
+
8
+ LIBDIR = "lib"
9
+ LIBDIR_MODE = 0644
10
+
11
+ BINDIR = "bin"
12
+ BINDIR_MODE = 0755
13
+
14
+
15
+ $srcdir = CONFIG["srcdir"]
16
+ $version = CONFIG["MAJOR"]+"."+CONFIG["MINOR"]
17
+ $libdir = File.join(CONFIG["libdir"], "ruby", $version)
18
+ $archdir = File.join($libdir, CONFIG["arch"])
19
+ $site_libdir = $:.find {|x| x =~ /site_ruby$/}
20
+ $bindir = CONFIG["bindir"] || CONFIG['BINDIR']
21
+ $ruby_install_name = CONFIG['ruby_install_name'] || CONFIG['RUBY_INSTALL_NAME'] || 'ruby'
22
+ $ruby_ext = CONFIG['EXEEXT'] || ''
23
+ $ruby = File.join($bindir, ($ruby_install_name + $ruby_ext))
24
+
25
+ if !$site_libdir
26
+ $site_libdir = File.join($libdir, "site_ruby")
27
+ elsif $site_libdir !~ %r/#{Regexp.quote($version)}/
28
+ $site_libdir = File.join($site_libdir, $version)
29
+ end
30
+
31
+ def install_rb(srcdir=nil, destdir=nil, mode=nil, bin=nil)
32
+ #{{{
33
+ path = []
34
+ dir = []
35
+ Find.find(srcdir) do |f|
36
+ next unless FileTest.file?(f)
37
+ next if (f = f[srcdir.length+1..-1]) == nil
38
+ next if (/CVS$/ =~ File.dirname(f))
39
+ next if (/\.svn/ =~ File.dirname(f))
40
+ next if f =~ %r/\.lnk/
41
+ next if f =~ %r/\.svn/
42
+ next if f =~ %r/\.swp/
43
+ next if f =~ %r/\.svn/
44
+ path.push f
45
+ dir |= [File.dirname(f)]
46
+ end
47
+ for f in dir
48
+ next if f == "."
49
+ next if f == "CVS"
50
+ File::makedirs(File.join(destdir, f))
51
+ end
52
+ for f in path
53
+ next if (/\~$/ =~ f)
54
+ next if (/^\./ =~ File.basename(f))
55
+ unless bin
56
+ File::install(File.join(srcdir, f), File.join(destdir, f), mode, true)
57
+ else
58
+ from = File.join(srcdir, f)
59
+ to = File.join(destdir, f)
60
+ shebangify(from) do |sf|
61
+ $deferr.print from, " -> ", File::catname(from, to), "\n"
62
+ $deferr.printf "chmod %04o %s\n", mode, to
63
+ File::install(sf, to, mode, false)
64
+ end
65
+ end
66
+ end
67
+ #}}}
68
+ end
69
+ def shebangify f
70
+ #{{{
71
+ open(f) do |fd|
72
+ buf = fd.read 42
73
+ if buf =~ %r/^\s*#\s*!.*ruby/o
74
+ ftmp = Tempfile::new("#{ $$ }_#{ File::basename(f) }")
75
+ begin
76
+ fd.rewind
77
+ ftmp.puts "#!#{ $ruby }"
78
+ while((buf = fd.read(8192)))
79
+ ftmp.write buf
80
+ end
81
+ ftmp.close
82
+ yield ftmp.path
83
+ ensure
84
+ ftmp.close!
85
+ end
86
+ else
87
+ yield f
88
+ end
89
+ end
90
+ #}}}
91
+ end
92
+ def ARGV.switch
93
+ #{{{
94
+ return nil if self.empty?
95
+ arg = self.shift
96
+ return nil if arg == '--'
97
+ if arg =~ /^-(.)(.*)/
98
+ return arg if $1 == '-'
99
+ raise 'unknown switch "-"' if $2.index('-')
100
+ self.unshift "-#{$2}" if $2.size > 0
101
+ "-#{$1}"
102
+ else
103
+ self.unshift arg
104
+ nil
105
+ end
106
+ #}}}
107
+ end
108
+ def ARGV.req_arg
109
+ #{{{
110
+ self.shift || raise('missing argument')
111
+ #}}}
112
+ end
113
+ def linkify d, linked = []
114
+ #--{{{
115
+ if test ?d, d
116
+ versioned = Dir[ File::join(d, "*-[0-9].[0-9].[0-9].rb") ]
117
+ versioned.each do |v|
118
+ src, dst = v, v.gsub(%r/\-[\d\.]+\.rb$/, '.rb')
119
+ lnk = nil
120
+ begin
121
+ if test ?l, dst
122
+ lnk = "#{ dst }.lnk"
123
+ puts "#{ dst } -> #{ lnk }"
124
+ File::rename dst, lnk
125
+ end
126
+ unless test ?e, dst
127
+ puts "#{ src } -> #{ dst }"
128
+ File::copy src, dst
129
+ linked << dst
130
+ end
131
+ ensure
132
+ if lnk
133
+ at_exit do
134
+ puts "#{ lnk } -> #{ dst }"
135
+ File::rename lnk, dst
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
141
+ linked
142
+ #--}}}
143
+ end
144
+
145
+
146
+ #
147
+ # main program
148
+ #
149
+
150
+ libdir = $site_libdir
151
+ bindir = $bindir
152
+ no_linkify = false
153
+ linked = nil
154
+ help = false
155
+
156
+ usage = <<-usage
157
+ #{ File::basename $0 }
158
+ -d, --destdir <destdir>
159
+ -l, --libdir <libdir>
160
+ -b, --bindir <bindir>
161
+ -r, --ruby <ruby>
162
+ -n, --no_linkify
163
+ -s, --sudo
164
+ -h, --help
165
+ usage
166
+
167
+ begin
168
+ while switch = ARGV.switch
169
+ case switch
170
+ when '-d', '--destdir'
171
+ libdir = ARGV.req_arg
172
+ when '-l', '--libdir'
173
+ libdir = ARGV.req_arg
174
+ when '-b', '--bindir'
175
+ bindir = ARGV.req_arg
176
+ when '-r', '--ruby'
177
+ $ruby = ARGV.req_arg
178
+ when '-n', '--no_linkify'
179
+ no_linkify = true
180
+ when '-s', '--sudo'
181
+ sudo = 'sudo'
182
+ when '-h', '--help'
183
+ help = true
184
+ else
185
+ raise "unknown switch #{switch.dump}"
186
+ end
187
+ end
188
+ rescue
189
+ STDERR.puts $!.to_s
190
+ STDERR.puts usage
191
+ exit 1
192
+ end
193
+
194
+ if help
195
+ STDOUT.puts usage
196
+ exit
197
+ end
198
+
199
+ system "#{ sudo } #{ $ruby } pre-install.rb" if test(?s, 'pre-install.rb')
200
+
201
+ unless no_linkify
202
+ linked = linkify('lib') + linkify('bin')
203
+ end
204
+
205
+ system "#{ $ruby } extconf.rb && make && #{ sudo } make install" if test(?s, 'extconf.rb')
206
+
207
+ install_rb(LIBDIR, libdir, LIBDIR_MODE)
208
+ install_rb(BINDIR, bindir, BINDIR_MODE, bin=true)
209
+
210
+ if linked
211
+ linked.each{|path| File::rm_f path}
212
+ end
213
+
214
+ system "#{ sudo } #{ $ruby } post-install.rb" if test(?s, 'post-install.rb')
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mp3scrape
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Ara T. Howard
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-31 00:00:00 -06:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: ara.t.howard@gmail.com
18
+ executables:
19
+ - mp3scrape
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - bin
26
+ - bin/mp3scrape
27
+ - gemspec.rb
28
+ - install.rb
29
+ - README
30
+ has_rdoc: false
31
+ homepage: http://codeforpeople.com/lib/ruby/mp3scrape/
32
+ post_install_message:
33
+ rdoc_options: []
34
+
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ version:
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ requirements: []
50
+
51
+ rubyforge_project: codeforpeople
52
+ rubygems_version: 1.2.0
53
+ signing_key:
54
+ specification_version: 2
55
+ summary: mp3scrape
56
+ test_files: []
57
+