scrapers 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -0
- data/Rakefile +46 -0
- data/bin/rubytapas +46 -0
- data/lib/scrapers/manning_dashboard.rb +15 -0
- data/lib/scrapers/rubytapas.rb +1 -0
- data/lib/scrapers/version.rb +1 -1
- data/scrapers.gemspec +6 -1
- metadata +54 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9714ca4691bf79afd82dd1bac8588919703faf66
|
4
|
+
data.tar.gz: 2de713c77701348724b080af2d039739c2e33b67
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1efecf60008126c24dee448e948ce49e9f187cac161732d5bb61ca171219e927116a05d799c676dbb789d0ed4e175837defe5889855076d3bad8090462249b25
|
7
|
+
data.tar.gz: e8d1d51f9dae5a2072b3f3282704982f1ab3483f2727c9ebcbc3a0c70aa3b58d1f1a7588bbd439fe762978ad69acd3093f7237c3ab5e1a9b04eef7228951b28c
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1 +1,47 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
require "highline/import"
|
3
|
+
require "active_support/core_ext/string/inflections"
|
4
|
+
|
5
|
+
desc "Create the basis for a new scraper"
|
6
|
+
task :new, [:module_name] do |t, args|
|
7
|
+
if args.module_name
|
8
|
+
module_name = args.module_name
|
9
|
+
else
|
10
|
+
module_name = ask("<%= color('What is the name of your new scraper module?', YELLOW) %>") {|q| q.default = "NewScraper" }
|
11
|
+
end
|
12
|
+
|
13
|
+
file_name = module_name.underscore + ".rb"
|
14
|
+
dir_name = File.join(File.dirname(__FILE__),'lib','scrapers')
|
15
|
+
|
16
|
+
new_scraper_path = File.join(dir_name, file_name)
|
17
|
+
|
18
|
+
template = <<-EOT
|
19
|
+
module Scrapers
|
20
|
+
module #{module_name}
|
21
|
+
|
22
|
+
def self.scrape(url)
|
23
|
+
results = Hash.new
|
24
|
+
|
25
|
+
Mechanize.start(url) do |m|
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
results
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
EOT
|
35
|
+
|
36
|
+
if File.exist?(new_scraper_path)
|
37
|
+
if agree("<%= color('#{file_name}', BLUE); color('already exists. Do you want to overwrite it?', YELLOW) %>", true)
|
38
|
+
File.unlink(new_scraper_path)
|
39
|
+
else
|
40
|
+
exit 0
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
File.write(new_scraper_path, template)
|
45
|
+
say("<%= color('New scraper in', YELLOW) %> <%= color('#{new_scraper_path}', BLUE) %>")
|
46
|
+
|
47
|
+
end
|
data/bin/rubytapas
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'thor'
|
5
|
+
require 'netrc'
|
6
|
+
require 'scrapers/rubytapas'
|
7
|
+
|
8
|
+
################################################################################
|
9
|
+
#
|
10
|
+
# Scraper for RubyTapas episodes.
|
11
|
+
#
|
12
|
+
# (Note: you need to subscribe to RupyTapas to download episodes.)
|
13
|
+
#
|
14
|
+
################################################################################
|
15
|
+
|
16
|
+
class RubyTapasDownload < Thor
|
17
|
+
|
18
|
+
RUBYTAPAS="rubytapas.dpdcart.com"
|
19
|
+
RUBYTAPAS_EPISODE_URL = "https://#{RUBYTAPAS}/subscriber/post?id=\#{episode}"
|
20
|
+
|
21
|
+
desc "download", "Downloads the listed episode's files into a new directory with the episode tag in the given directory"
|
22
|
+
method_option :destination, :aliases => %w{-d --dest}, :desc => "Destination to store the downloads", :default => "."
|
23
|
+
method_option :url, :desc => "url to episode downloads (overrides episode)"
|
24
|
+
method_option :episode, :aliases => %w{-e --ep}, :desc => "Episode number"
|
25
|
+
method_option :user, :aliases => %w{-u -U}, :desc => "dpdcart user. Default is read from $HOME/.netrc"
|
26
|
+
method_option :password, :aliases => %w{-p -pw}, :desc => "dpdcart password. Default is read from $HOME/.netrc"
|
27
|
+
|
28
|
+
def download
|
29
|
+
netrc = Netrc.read
|
30
|
+
user, pw = netrc[RUBYTAPAS]
|
31
|
+
user = options.fetch("user", user)
|
32
|
+
pw = options.fetch("password", pw)
|
33
|
+
url = options.fetch("url", nil)
|
34
|
+
episode = options.fetch("episode", nil)
|
35
|
+
destination = options.fetch("destination", nil)
|
36
|
+
STDERR.puts "destination: #{destination}, episode: #{episode}, url: #{url}, user: #{user}, pw: #{pw.length}"
|
37
|
+
unless url
|
38
|
+
raise "Must give episode or full url" unless episode
|
39
|
+
url = RUBYTAPAS_EPISODE_URL.sub(%r[\#{episode}], episode)
|
40
|
+
end
|
41
|
+
Scrapers::RubyTapas.scrape url, user, pw, destination
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
RubyTapasDownload.start
|
data/lib/scrapers/rubytapas.rb
CHANGED
data/lib/scrapers/version.rb
CHANGED
data/scrapers.gemspec
CHANGED
@@ -18,12 +18,17 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
+
spec.add_dependency "mechanize"
|
22
|
+
spec.add_dependency "netrc"
|
23
|
+
|
21
24
|
spec.add_development_dependency "bundler"
|
22
25
|
spec.add_development_dependency "rake"
|
23
26
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_dependency "mechanize"
|
25
27
|
spec.add_development_dependency "guard"
|
26
28
|
spec.add_development_dependency "guard-rspec"
|
27
29
|
spec.add_development_dependency "webmock"
|
28
30
|
spec.add_development_dependency "vcr"
|
31
|
+
|
32
|
+
spec.add_development_dependency "active_support"
|
33
|
+
spec.add_development_dependency "highline"
|
29
34
|
end
|
metadata
CHANGED
@@ -1,23 +1,23 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: scrapers
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tamara Temple
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: mechanize
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
|
-
type: :
|
20
|
+
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
@@ -25,7 +25,21 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: netrc
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - '>='
|
@@ -39,7 +53,7 @@ dependencies:
|
|
39
53
|
- !ruby/object:Gem::Version
|
40
54
|
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
56
|
+
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - '>='
|
@@ -53,13 +67,13 @@ dependencies:
|
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
70
|
+
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - '>='
|
60
74
|
- !ruby/object:Gem::Version
|
61
75
|
version: '0'
|
62
|
-
type: :
|
76
|
+
type: :development
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
@@ -122,11 +136,40 @@ dependencies:
|
|
122
136
|
- - '>='
|
123
137
|
- !ruby/object:Gem::Version
|
124
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: active_support
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - '>='
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - '>='
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: highline
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - '>='
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - '>='
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0'
|
125
167
|
description: A library of web site scrapers utilizing mechanize and other goodies.
|
126
168
|
Helpful in gathering images, moving things, saving things, etc.
|
127
169
|
email:
|
128
170
|
- tamouse@gmail.com
|
129
|
-
executables:
|
171
|
+
executables:
|
172
|
+
- rubytapas
|
130
173
|
extensions: []
|
131
174
|
extra_rdoc_files: []
|
132
175
|
files:
|
@@ -137,6 +180,7 @@ files:
|
|
137
180
|
- LICENSE.txt
|
138
181
|
- README.md
|
139
182
|
- Rakefile
|
183
|
+
- bin/rubytapas
|
140
184
|
- lib/scrapers.rb
|
141
185
|
- lib/scrapers/allrecipes.rb
|
142
186
|
- lib/scrapers/discoverynews.rb
|
@@ -144,6 +188,7 @@ files:
|
|
144
188
|
- lib/scrapers/esod.rb
|
145
189
|
- lib/scrapers/gocomics.rb
|
146
190
|
- lib/scrapers/imgur.rb
|
191
|
+
- lib/scrapers/manning_dashboard.rb
|
147
192
|
- lib/scrapers/nasa_apod.rb
|
148
193
|
- lib/scrapers/rubytapas.rb
|
149
194
|
- lib/scrapers/sinfest.rb
|