scrapers 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +8 -0
- data/Rakefile +46 -0
- data/bin/rubytapas +46 -0
- data/lib/scrapers/manning_dashboard.rb +15 -0
- data/lib/scrapers/rubytapas.rb +1 -0
- data/lib/scrapers/version.rb +1 -1
- data/scrapers.gemspec +6 -1
- metadata +54 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9714ca4691bf79afd82dd1bac8588919703faf66
|
4
|
+
data.tar.gz: 2de713c77701348724b080af2d039739c2e33b67
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1efecf60008126c24dee448e948ce49e9f187cac161732d5bb61ca171219e927116a05d799c676dbb789d0ed4e175837defe5889855076d3bad8090462249b25
|
7
|
+
data.tar.gz: e8d1d51f9dae5a2072b3f3282704982f1ab3483f2727c9ebcbc3a0c70aa3b58d1f1a7588bbd439fe762978ad69acd3093f7237c3ab5e1a9b04eef7228951b28c
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1 +1,47 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require "highline/import"
|
3
|
+
require "active_support/core_ext/string/inflections"
|
4
|
+
|
5
|
+
desc "Create the basis for a new scraper"
|
6
|
+
task :new, [:module_name] do |t, args|
|
7
|
+
if args.module_name
|
8
|
+
module_name = args.module_name
|
9
|
+
else
|
10
|
+
module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
|
11
|
+
end
|
12
|
+
|
13
|
+
file_name = module_name.underscore + ".rb"
|
14
|
+
dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
|
15
|
+
|
16
|
+
new_scraper_path = File.join(dir_name, file_name)
|
17
|
+
|
18
|
+
template = <<-EOT
|
19
|
+
module Scrapers
|
20
|
+
module #{module_name}
|
21
|
+
|
22
|
+
def self.scrape(url)
|
23
|
+
results = Hash.new
|
24
|
+
|
25
|
+
Mechanize.start(url) do |m|
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
results
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
EOT
|
35
|
+
|
36
|
+
if File.exist?(new_scraper_path)
|
37
|
+
if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
|
38
|
+
File.unlink(new_scraper_path)
|
39
|
+
else
|
40
|
+
exit 0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
File.write(new_scraper_path, template)
|
45
|
+
say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
|
46
|
+
|
47
|
+
end
|
data/bin/rubytapas
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'thor'
|
5
|
+
require 'netrc'
|
6
|
+
require 'scrapers/rubytapas'
|
7
|
+
|
8
|
+
################################################################################
|
9
|
+
#
|
10
|
+
# Scraper for RubyTapas episodes.
|
11
|
+
#
|
12
|
+
# (Note: you need to subscribe to RupyTapas to download episodes.)
|
13
|
+
#
|
14
|
+
################################################################################
|
15
|
+
|
16
|
+
class RubyTapasDownload < Thor
|
17
|
+
|
18
|
+
RUBYTAPAS="rubytapas.dpdcart.com"
|
19
|
+
RUBYTAPAS_EPISODE_URL = "https://#{RUBYTAPAS}/subscriber/post?id=\#{episode}"
|
20
|
+
|
21
|
+
desc "download", "Downloads the listed episode's files into a new directory with the episode tag in the given directory"
|
22
|
+
method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
|
23
|
+
method_option :url, :desc => "url to episode downloads (overrides episode)"
|
24
|
+
method_option :episode, :aliases => %w{-e --ep}, :desc => "Episode number"
|
25
|
+
method_option :user, :aliases => %w{-u -U}, :desc => "dpdcart user. Default is read from $HOME/.netrc"
|
26
|
+
method_option :password, :aliases => %w{-p -pw}, :desc => "dpdcart password. Default is read from $HOME/.netrc"
|
27
|
+
|
28
|
+
def download
|
29
|
+
netrc = Netrc.read
|
30
|
+
user, pw = netrc[RUBYTAPAS]
|
31
|
+
user = options.fetch("user", user)
|
32
|
+
pw = options.fetch("password", pw)
|
33
|
+
url = options.fetch("url", nil)
|
34
|
+
episode = options.fetch("episode", nil)
|
35
|
+
destination = options.fetch("destination", nil)
|
36
|
+
STDERR.puts "destination: #{destination}, episode: #{episode}, url: #{url}, user: #{user}, pw: #{pw.length}"
|
37
|
+
unless url
|
38
|
+
raise "Must give episode or full url" unless episode
|
39
|
+
url = RUBYTAPAS_EPISODE_URL.sub(%r[\#{episode}], episode)
|
40
|
+
end
|
41
|
+
Scrapers::RubyTapas.scrape url, user, pw, destination
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
RubyTapasDownload.start
|
data/lib/scrapers/rubytapas.rb
CHANGED
data/lib/scrapers/version.rb
CHANGED
data/scrapers.gemspec
CHANGED
@@ -18,12 +18,17 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
+
spec.add_dependency "mechanize"
|
22
|
+
spec.add_dependency "netrc"
|
23
|
+
|
21
24
|
spec.add_development_dependency "bundler"
|
22
25
|
spec.add_development_dependency "rake"
|
23
26
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_dependency "mechanize"
|
25
27
|
spec.add_development_dependency "guard"
|
26
28
|
spec.add_development_dependency "guard-rspec"
|
27
29
|
spec.add_development_dependency "webmock"
|
28
30
|
spec.add_development_dependency "vcr"
|
31
|
+
|
32
|
+
spec.add_development_dependency "active_support"
|
33
|
+
spec.add_development_dependency "highline"
|
29
34
|
end
|
metadata
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: mechanize
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
@@ -25,7 +25,21 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: netrc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - '>='
|
@@ -39,7 +53,7 @@ dependencies:
|
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
56
|
+
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - '>='
|
@@ -53,13 +67,13 @@ dependencies:
|
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
70
|
+
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - '>='
|
60
74
|
- !ruby/object:Gem::Version
|
61
75
|
version: '0'
|
62
|
-
type: :
|
76
|
+
type: :development
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
@@ -122,11 +136,40 @@ dependencies:
|
|
122
136
|
- - '>='
|
123
137
|
- !ruby/object:Gem::Version
|
124
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: active_support
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: highline
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
125
167
|
description: A library of web site scrapers utilizing mechanize and other goodies.
|
126
168
|
Helpful in gathering images, moving things, saving things, etc.
|
127
169
|
email:
|
128
170
|
- tamouse@gmail.com
|
129
|
-
executables:
|
171
|
+
executables:
|
172
|
+
- rubytapas
|
130
173
|
extensions: []
|
131
174
|
extra_rdoc_files: []
|
132
175
|
files:
|
@@ -137,6 +180,7 @@ files:
|
|
137
180
|
- LICENSE.txt
|
138
181
|
- README.md
|
139
182
|
- Rakefile
|
183
|
+
- bin/rubytapas
|
140
184
|
- lib/scrapers.rb
|
141
185
|
- lib/scrapers/allrecipes.rb
|
142
186
|
- lib/scrapers/discoverynews.rb
|
@@ -144,6 +188,7 @@ files:
|
|
144
188
|
- lib/scrapers/esod.rb
|
145
189
|
- lib/scrapers/gocomics.rb
|
146
190
|
- lib/scrapers/imgur.rb
|
191
|
+
- lib/scrapers/manning_dashboard.rb
|
147
192
|
- lib/scrapers/nasa_apod.rb
|
148
193
|
- lib/scrapers/rubytapas.rb
|
149
194
|
- lib/scrapers/sinfest.rb
|