confed_scraper 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use ruby-1.9.2@confed_scraper --create
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,33 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ confed_scraper (0.0.1)
5
+ nokogiri
6
+ rest-client
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ coderay (0.9.8)
12
+ method_source (0.6.7)
13
+ ruby_parser (>= 2.3.1)
14
+ mime-types (1.17.2)
15
+ nokogiri (1.5.0)
16
+ pry (0.9.7.4)
17
+ coderay (~> 0.9.8)
18
+ method_source (~> 0.6.7)
19
+ ruby_parser (>= 2.3.1)
20
+ slop (~> 2.1.0)
21
+ rest-client (1.6.7)
22
+ mime-types (>= 1.16)
23
+ ruby_parser (2.3.1)
24
+ sexp_processor (~> 3.0)
25
+ sexp_processor (3.0.9)
26
+ slop (2.1.0)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ confed_scraper!
33
+ pry
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "confed_scraper/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "confed_scraper"
7
+ s.version = ConfedScraper::VERSION
8
+ s.authors = ["Matt Polito"]
9
+ s.email = ["matt.polito@gmail.com"]
10
+ s.homepage = "http://github.com/mattpolito/confed_scraper"
11
+ s.summary = %q{Site scrapers to ease data import into Confed}
12
+ s.description = %q{Site scrapers to ease data import into Confed}
13
+
14
+ s.rubyforge_project = "confed_scraper"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_development_dependency "pry"
23
+ s.add_runtime_dependency "rest-client"
24
+ s.add_runtime_dependency "nokogiri"
25
+ end
@@ -0,0 +1,29 @@
1
+ module ConfedScraper
2
+ class ConfreaksScraper < Scraper
3
+ SITE_URI = "http://confreaks.net"
4
+
5
+ def process
6
+ content = get_content_from(url)
7
+ videos_doc= Nokogiri::HTML.parse(content)
8
+ links = videos_doc.xpath('//*[@class="video"]//*[@class="title"]/a')
9
+ video_data = []
10
+ links.each_with_index do |link|
11
+ vid = {}
12
+ show_page_url = SITE_URI + link.attr('href')
13
+ show_page_content = RestClient.get(show_page_url)
14
+ show_page_doc = Nokogiri::HTML.parse(show_page_content)
15
+
16
+ scrape_message(show_page_url)
17
+
18
+ title = show_page_doc.xpath('//*[@class="video-title"]').text.strip
19
+ vid[:title] = title
20
+ vid[:uri] = show_page_url
21
+ vid[:presenters] = show_page_doc.xpath('//*[@class="video-presenters"]/a').map(&:text)
22
+ vid[:description] = show_page_doc.xpath('//*[@class="video-abstract"]/p').text.strip
23
+ video_data << vid
24
+ end
25
+
26
+ video_data
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module ConfedScraper
2
+ class Scraper
3
+ attr_reader :url
4
+
5
+ def initialize(url)
6
+ @url = url
7
+ end
8
+
9
+ def process(url)
10
+ end
11
+
12
+ def get_content_from(url)
13
+ RestClient.get(url)
14
+ end
15
+
16
+ def scrape_message(url)
17
+ p "Scraping: #{url}"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module ConfedScraper
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "confed_scraper/version"
2
+ require 'confed_scraper/scraper'
3
+ require 'confed_scraper/confreaks_scraper'
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: confed_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matt Polito
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pry
16
+ requirement: &70212084710360 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70212084710360
25
+ - !ruby/object:Gem::Dependency
26
+ name: rest-client
27
+ requirement: &70212084709940 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70212084709940
36
+ - !ruby/object:Gem::Dependency
37
+ name: nokogiri
38
+ requirement: &70212084709520 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70212084709520
47
+ description: Site scrapers to ease data import into Confed
48
+ email:
49
+ - matt.polito@gmail.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - .rvmrc
56
+ - Gemfile
57
+ - Gemfile.lock
58
+ - Rakefile
59
+ - confed_scraper.gemspec
60
+ - lib/confed_scraper.rb
61
+ - lib/confed_scraper/confreaks_scraper.rb
62
+ - lib/confed_scraper/scraper.rb
63
+ - lib/confed_scraper/version.rb
64
+ homepage: http://github.com/mattpolito/confed_scraper
65
+ licenses: []
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project: confed_scraper
84
+ rubygems_version: 1.8.10
85
+ signing_key:
86
+ specification_version: 3
87
+ summary: Site scrapers to ease data import into Confed
88
+ test_files: []