confed_scraper 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.rvmrc +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +33 -0
- data/Rakefile +1 -0
- data/confed_scraper.gemspec +25 -0
- data/lib/confed_scraper/confreaks_scraper.rb +29 -0
- data/lib/confed_scraper/scraper.rb +20 -0
- data/lib/confed_scraper/version.rb +3 -0
- data/lib/confed_scraper.rb +3 -0
- metadata +88 -0
data/.gitignore
ADDED
data/.rvmrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use ruby-1.9.2@confed_scraper --create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
confed_scraper (0.0.1)
|
5
|
+
nokogiri
|
6
|
+
rest-client
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
coderay (0.9.8)
|
12
|
+
method_source (0.6.7)
|
13
|
+
ruby_parser (>= 2.3.1)
|
14
|
+
mime-types (1.17.2)
|
15
|
+
nokogiri (1.5.0)
|
16
|
+
pry (0.9.7.4)
|
17
|
+
coderay (~> 0.9.8)
|
18
|
+
method_source (~> 0.6.7)
|
19
|
+
ruby_parser (>= 2.3.1)
|
20
|
+
slop (~> 2.1.0)
|
21
|
+
rest-client (1.6.7)
|
22
|
+
mime-types (>= 1.16)
|
23
|
+
ruby_parser (2.3.1)
|
24
|
+
sexp_processor (~> 3.0)
|
25
|
+
sexp_processor (3.0.9)
|
26
|
+
slop (2.1.0)
|
27
|
+
|
28
|
+
PLATFORMS
|
29
|
+
ruby
|
30
|
+
|
31
|
+
DEPENDENCIES
|
32
|
+
confed_scraper!
|
33
|
+
pry
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "confed_scraper/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "confed_scraper"
|
7
|
+
s.version = ConfedScraper::VERSION
|
8
|
+
s.authors = ["Matt Polito"]
|
9
|
+
s.email = ["matt.polito@gmail.com"]
|
10
|
+
s.homepage = "http://github.com/mattpolito/confed_scraper"
|
11
|
+
s.summary = %q{Site scrapers to ease data import into Confed}
|
12
|
+
s.description = %q{Site scrapers to ease data import into Confed}
|
13
|
+
|
14
|
+
s.rubyforge_project = "confed_scraper"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
# specify any dependencies here; for example:
|
22
|
+
s.add_development_dependency "pry"
|
23
|
+
s.add_runtime_dependency "rest-client"
|
24
|
+
s.add_runtime_dependency "nokogiri"
|
25
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module ConfedScraper
|
2
|
+
class ConfreaksScraper < Scraper
|
3
|
+
SITE_URI = "http://confreaks.net"
|
4
|
+
|
5
|
+
def process
|
6
|
+
content = get_content_from(url)
|
7
|
+
videos_doc= Nokogiri::HTML.parse(content)
|
8
|
+
links = videos_doc.xpath('//*[@class="video"]//*[@class="title"]/a')
|
9
|
+
video_data = []
|
10
|
+
links.each_with_index do |link|
|
11
|
+
vid = {}
|
12
|
+
show_page_url = SITE_URI + link.attr('href')
|
13
|
+
show_page_content = RestClient.get(show_page_url)
|
14
|
+
show_page_doc = Nokogiri::HTML.parse(show_page_content)
|
15
|
+
|
16
|
+
scrape_message(show_page_url)
|
17
|
+
|
18
|
+
title = show_page_doc.xpath('//*[@class="video-title"]').text.strip
|
19
|
+
vid[:title] = title
|
20
|
+
vid[:uri] = show_page_url
|
21
|
+
vid[:presenters] = show_page_doc.xpath('//*[@class="video-presenters"]/a').map(&:text)
|
22
|
+
vid[:description] = show_page_doc.xpath('//*[@class="video-abstract"]/p').text.strip
|
23
|
+
video_data << vid
|
24
|
+
end
|
25
|
+
|
26
|
+
video_data
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module ConfedScraper
|
2
|
+
class Scraper
|
3
|
+
attr_reader :url
|
4
|
+
|
5
|
+
def initialize(url)
|
6
|
+
@url = url
|
7
|
+
end
|
8
|
+
|
9
|
+
def process(url)
|
10
|
+
end
|
11
|
+
|
12
|
+
def get_content_from(url)
|
13
|
+
RestClient.get(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def scrape_message(url)
|
17
|
+
p "Scraping: #{url}"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: confed_scraper
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matt Polito
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-12-17 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: pry
|
16
|
+
requirement: &70212084710360 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70212084710360
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rest-client
|
27
|
+
requirement: &70212084709940 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70212084709940
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: nokogiri
|
38
|
+
requirement: &70212084709520 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70212084709520
|
47
|
+
description: Site scrapers to ease data import into Confed
|
48
|
+
email:
|
49
|
+
- matt.polito@gmail.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- .rvmrc
|
56
|
+
- Gemfile
|
57
|
+
- Gemfile.lock
|
58
|
+
- Rakefile
|
59
|
+
- confed_scraper.gemspec
|
60
|
+
- lib/confed_scraper.rb
|
61
|
+
- lib/confed_scraper/confreaks_scraper.rb
|
62
|
+
- lib/confed_scraper/scraper.rb
|
63
|
+
- lib/confed_scraper/version.rb
|
64
|
+
homepage: http://github.com/mattpolito/confed_scraper
|
65
|
+
licenses: []
|
66
|
+
post_install_message:
|
67
|
+
rdoc_options: []
|
68
|
+
require_paths:
|
69
|
+
- lib
|
70
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
71
|
+
none: false
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ! '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project: confed_scraper
|
84
|
+
rubygems_version: 1.8.10
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Site scrapers to ease data import into Confed
|
88
|
+
test_files: []
|