confed_scraper 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use ruby-1.9.2@confed_scraper --create
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source :rubygems
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,33 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ confed_scraper (0.0.1)
5
+ nokogiri
6
+ rest-client
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ coderay (0.9.8)
12
+ method_source (0.6.7)
13
+ ruby_parser (>= 2.3.1)
14
+ mime-types (1.17.2)
15
+ nokogiri (1.5.0)
16
+ pry (0.9.7.4)
17
+ coderay (~> 0.9.8)
18
+ method_source (~> 0.6.7)
19
+ ruby_parser (>= 2.3.1)
20
+ slop (~> 2.1.0)
21
+ rest-client (1.6.7)
22
+ mime-types (>= 1.16)
23
+ ruby_parser (2.3.1)
24
+ sexp_processor (~> 3.0)
25
+ sexp_processor (3.0.9)
26
+ slop (2.1.0)
27
+
28
+ PLATFORMS
29
+ ruby
30
+
31
+ DEPENDENCIES
32
+ confed_scraper!
33
+ pry
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "confed_scraper/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "confed_scraper"
7
+ s.version = ConfedScraper::VERSION
8
+ s.authors = ["Matt Polito"]
9
+ s.email = ["matt.polito@gmail.com"]
10
+ s.homepage = "http://github.com/mattpolito/confed_scraper"
11
+ s.summary = %q{Site scrapers to ease data import into Confed}
12
+ s.description = %q{Site scrapers to ease data import into Confed}
13
+
14
+ s.rubyforge_project = "confed_scraper"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ # specify any dependencies here; for example:
22
+ s.add_development_dependency "pry"
23
+ s.add_runtime_dependency "rest-client"
24
+ s.add_runtime_dependency "nokogiri"
25
+ end
@@ -0,0 +1,29 @@
1
+ module ConfedScraper
2
+ class ConfreaksScraper < Scraper
3
+ SITE_URI = "http://confreaks.net"
4
+
5
+ def process
6
+ content = get_content_from(url)
7
+ videos_doc= Nokogiri::HTML.parse(content)
8
+ links = videos_doc.xpath('//*[@class="video"]//*[@class="title"]/a')
9
+ video_data = []
10
+ links.each_with_index do |link|
11
+ vid = {}
12
+ show_page_url = SITE_URI + link.attr('href')
13
+ show_page_content = RestClient.get(show_page_url)
14
+ show_page_doc = Nokogiri::HTML.parse(show_page_content)
15
+
16
+ scrape_message(show_page_url)
17
+
18
+ title = show_page_doc.xpath('//*[@class="video-title"]').text.strip
19
+ vid[:title] = title
20
+ vid[:uri] = show_page_url
21
+ vid[:presenters] = show_page_doc.xpath('//*[@class="video-presenters"]/a').map(&:text)
22
+ vid[:description] = show_page_doc.xpath('//*[@class="video-abstract"]/p').text.strip
23
+ video_data << vid
24
+ end
25
+
26
+ video_data
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module ConfedScraper
2
+ class Scraper
3
+ attr_reader :url
4
+
5
+ def initialize(url)
6
+ @url = url
7
+ end
8
+
9
+ def process(url)
10
+ end
11
+
12
+ def get_content_from(url)
13
+ RestClient.get(url)
14
+ end
15
+
16
+ def scrape_message(url)
17
+ p "Scraping: #{url}"
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,3 @@
1
+ module ConfedScraper
2
+ VERSION = "0.0.2"
3
+ end
@@ -0,0 +1,3 @@
1
+ require "confed_scraper/version"
2
+ require 'confed_scraper/scraper'
3
+ require 'confed_scraper/confreaks_scraper'
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: confed_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Matt Polito
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: pry
16
+ requirement: &70212084710360 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70212084710360
25
+ - !ruby/object:Gem::Dependency
26
+ name: rest-client
27
+ requirement: &70212084709940 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70212084709940
36
+ - !ruby/object:Gem::Dependency
37
+ name: nokogiri
38
+ requirement: &70212084709520 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70212084709520
47
+ description: Site scrapers to ease data import into Confed
48
+ email:
49
+ - matt.polito@gmail.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - .rvmrc
56
+ - Gemfile
57
+ - Gemfile.lock
58
+ - Rakefile
59
+ - confed_scraper.gemspec
60
+ - lib/confed_scraper.rb
61
+ - lib/confed_scraper/confreaks_scraper.rb
62
+ - lib/confed_scraper/scraper.rb
63
+ - lib/confed_scraper/version.rb
64
+ homepage: http://github.com/mattpolito/confed_scraper
65
+ licenses: []
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project: confed_scraper
84
+ rubygems_version: 1.8.10
85
+ signing_key:
86
+ specification_version: 3
87
+ summary: Site scrapers to ease data import into Confed
88
+ test_files: []