botanicus-get 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/get.rb +153 -0
- metadata +52 -0
data/bin/get.rb
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env ruby1.8
|
|
2
|
+
# coding=utf-8
|
|
3
|
+
|
|
4
|
+
=begin rdoc
|
|
5
|
+
page [params] [pages]
|
|
6
|
+
--pdf
|
|
7
|
+
-t --to [download dir]
|
|
8
|
+
=end
|
|
9
|
+
|
|
10
|
+
require "optparse"
|
|
11
|
+
require "rdoc/usage"
|
|
12
|
+
require "easyruby/core/unique_array"
|
|
13
|
+
require "get/page"
|
|
14
|
+
require "get/uri"
|
|
15
|
+
require "get/log"
|
|
16
|
+
require "get/option_parser"
|
|
17
|
+
require "get/mime_types"
|
|
18
|
+
require "shell/size"
|
|
19
|
+
require "shell/option_parser"
|
|
20
|
+
|
|
21
|
+
# Default OPTS
|
|
22
|
+
OPTS = {
|
|
23
|
+
:extensions => Array.new,
|
|
24
|
+
:recursive => false,
|
|
25
|
+
:protocols => ["http"],
|
|
26
|
+
:flatten => false,
|
|
27
|
+
:verbose => true }
|
|
28
|
+
|
|
29
|
+
site = ARGV.shift
|
|
30
|
+
abort("Any parameters given. Type -h for help.") unless site
|
|
31
|
+
@page = Page.new(site)
|
|
32
|
+
|
|
33
|
+
ARGV.options do |opts|
|
|
34
|
+
opts.banner = "Usage: #{File.basename($0)} [site] [options]"
|
|
35
|
+
|
|
36
|
+
# MIME-types options
|
|
37
|
+
opts.separator "MIME-types options:"
|
|
38
|
+
opts.on("-e", "--extensions", Array, "Download just files with given extensions.") do |exts|
|
|
39
|
+
page[:extensions].push(*exts)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
opts.on( "-m", "--mime", Array,
|
|
43
|
+
"Download just files with given MIME-type (Example: #{$0}) localhost:3300 --mime text/html" ) do |opt|
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
%w{ pdf js css js }.each do |extension|
|
|
48
|
+
opts.extension(extension) do
|
|
49
|
+
OPTS[:extensions].push(extension)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# General types
|
|
54
|
+
opts.on("-M", "--movies", "Download just movies.") do
|
|
55
|
+
OPTS[:extensions].push("movies")
|
|
56
|
+
end
|
|
57
|
+
opts.on("-I", "--images", "Download just images.") do
|
|
58
|
+
OPTS[:extensions].push("images")
|
|
59
|
+
end
|
|
60
|
+
opts.on("-A", "--rss", "Extract RSS and Atom links") do
|
|
61
|
+
OPTS[:extensions].push("feeds")
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Protocol options
|
|
65
|
+
opts.separator "Protocol options:"
|
|
66
|
+
opts.on("-S", "--svn", "Extract SVN links.") do
|
|
67
|
+
OPTS[:protocols].push("svn")
|
|
68
|
+
end
|
|
69
|
+
opts.on("-G", "--git", "Extract Git links.") do
|
|
70
|
+
OPTS[:protocols].push("git")
|
|
71
|
+
end
|
|
72
|
+
opts.on("-F", "--ftp", "Extract FTP links.") do
|
|
73
|
+
OPTS[:protocols].push("ftp")
|
|
74
|
+
end
|
|
75
|
+
opts.on("-P", "--protocol", Array, "Use these protocols") do
|
|
76
|
+
OPTS[:extensions].push("feeds")
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Output options
|
|
80
|
+
opts.separator "Output options:"
|
|
81
|
+
opts.on("-w", "--wget", "Output will be Bash script using wget.") do
|
|
82
|
+
OPTS[:output] = "wget"
|
|
83
|
+
end
|
|
84
|
+
opts.on("-d", "--download", "Download the files.") do
|
|
85
|
+
OPTS[:output] = "download"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
opts.on( "-f", "--flatten", "Create flatten structure." ) do
|
|
89
|
+
OPTS[:flatten] = true
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Size options
|
|
93
|
+
opts.separator "Size options:"
|
|
94
|
+
opts.on( "-s", "--size", Range, "Size from x to y. (Example: 20..40KB or 100KB..10MB)" ) do |opt|
|
|
95
|
+
OPTS[:size] = opt
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
opts.on( "-xs", "--max-size", Size, "Max size" ) do |opt|
|
|
99
|
+
OPTS[:size] = 0..opt
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
opts.on( "-ns", "--min-size", Size, "Min size" ) do |opt|
|
|
103
|
+
n = 1.0 / 0 # Infinity
|
|
104
|
+
OPTS[:size] = opt..n
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# General options
|
|
108
|
+
opts.separator "General options:"
|
|
109
|
+
opts.on("-R", "--recursive", "Recursive proceed.") do
|
|
110
|
+
OPTS[:recursive] = true
|
|
111
|
+
end
|
|
112
|
+
# --t --types: show all mimes at site
|
|
113
|
+
|
|
114
|
+
opts.on("-M", "--match", Regexp, "Use just links which match given regexp.") do |opt|
|
|
115
|
+
@page.urls.reject! { |url| not url.match(opt) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
opts.on("-pm", "--print-mimes", "Print all MIME types on the site.") do
|
|
119
|
+
OPTS[:show] = :mimes
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
opts.on("-q", "--quiet") do
|
|
123
|
+
OPTS[:verbose] = false
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
opts.on("-p", "--pretend", "Just pretend.") do
|
|
127
|
+
OPTS[:pretend] = true
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
opts.on("-d", "--debug", "Debug on.") do
|
|
131
|
+
OPTS[:debug] = true
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
opts.separator "Common options:"
|
|
135
|
+
opts.on("-h", "--help", "Show this message.") do
|
|
136
|
+
abort(opts.to_s)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# help if
|
|
140
|
+
opts.parse! #rescue abort(opts.to_s)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
require File.dirname(__FILE__) + "/../lib/get/runner"
|
|
144
|
+
|
|
145
|
+
__END__
|
|
146
|
+
# ====================
|
|
147
|
+
# = DEVELOPMENT ONLY =
|
|
148
|
+
# ====================
|
|
149
|
+
@page = Page.new(ARGV.first || "http://localhost:3300")
|
|
150
|
+
@page.proceed
|
|
151
|
+
puts "Linky, co nejsou na hlavni strance:"
|
|
152
|
+
puts @page.recursive - @page.urls
|
|
153
|
+
# puts @page.urls
|
metadata
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: botanicus-get
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- "Jakub \xC5\xA0\xC5\xA5astn\xC3\xBD aka Botanicus"
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain:
|
|
11
|
+
date: 2009-05-16 00:00:00 -07:00
|
|
12
|
+
default_executable: get.rb
|
|
13
|
+
dependencies: []
|
|
14
|
+
|
|
15
|
+
description: ""
|
|
16
|
+
email: knava.bestvinensis@gmail.com
|
|
17
|
+
executables:
|
|
18
|
+
- get.rb
|
|
19
|
+
extensions: []
|
|
20
|
+
|
|
21
|
+
extra_rdoc_files: []
|
|
22
|
+
|
|
23
|
+
files: []
|
|
24
|
+
|
|
25
|
+
has_rdoc: false
|
|
26
|
+
homepage: http://github.com/botanicus/get
|
|
27
|
+
post_install_message:
|
|
28
|
+
rdoc_options: []
|
|
29
|
+
|
|
30
|
+
require_paths:
|
|
31
|
+
- lib
|
|
32
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
33
|
+
requirements:
|
|
34
|
+
- - ">="
|
|
35
|
+
- !ruby/object:Gem::Version
|
|
36
|
+
version: "0"
|
|
37
|
+
version:
|
|
38
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
39
|
+
requirements:
|
|
40
|
+
- - ">="
|
|
41
|
+
- !ruby/object:Gem::Version
|
|
42
|
+
version: "0"
|
|
43
|
+
version:
|
|
44
|
+
requirements: []
|
|
45
|
+
|
|
46
|
+
rubyforge_project:
|
|
47
|
+
rubygems_version: 1.2.0
|
|
48
|
+
signing_key:
|
|
49
|
+
specification_version: 2
|
|
50
|
+
summary: Get is CLI download manager for automatic downloading the whole files of specified type from given web.
|
|
51
|
+
test_files: []
|
|
52
|
+
|