eolclub_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7b2f6bbd06e41d9f27ab7ad6f0de3c27c2e88ad2
4
+ data.tar.gz: 6c8833dfa3be6aef292f60cde1c8ab529f20749d
5
+ SHA512:
6
+ metadata.gz: 8652dfc89bdf8aa43fd56706cabb4dd434b6a4363c812190777fa7da70040037b49d2c02553d971113c412c3843a0c2254c3196ac64adefd9154df0ee71bbb21
7
+ data.tar.gz: 03f8873d8ac75073534ba65aeba8e03876748ea8ad200f3a55dacde36a0fa85eff78a72d6eb565ed16fbe093e924bbf028e566d7ab30de2caf75b31026f66b3f
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format progress
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in eolclub_scraper.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Matt Gillooly
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # EolclubScraper
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'eolclub_scraper'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install eolclub_scraper
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new('spec')
5
+
6
+ task :default => :spec
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'eolclub_scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "eolclub_scraper"
8
+ spec.version = EolclubScraper::VERSION
9
+ spec.authors = ["Matt Gillooly"]
10
+ spec.email = ["matt@mattgillooly.com"]
11
+ spec.description = %q{Scrape currently scheduled event from EOLclub.org for PVDTechEvents.com}
12
+ spec.summary = %q{EOLclub.org event scraper}
13
+ spec.homepage = "http://pvdtechevents.com/"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency 'nokogiri'
22
+ spec.add_dependency 'chronic'
23
+ spec.add_development_dependency "bundler", "~> 1.3"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "vcr"
27
+ spec.add_development_dependency "webmock"
28
+ end
@@ -0,0 +1,13 @@
1
+ require "eolclub_scraper/version"
2
+ require "eolclub_scraper/homepage"
3
+ require "eolclub_scraper/event_parser"
4
+
5
+ module EolclubScraper
6
+
7
+ def self.scheduled_event
8
+ homepage = Homepage.new
9
+ parser = EventParser.new
10
+ parser.parse(homepage.content)
11
+ end
12
+
13
+ end
@@ -0,0 +1,6 @@
1
+ module EolclubScraper
2
+
3
+ class Event < Struct.new(:start_time, :end_time)
4
+ end
5
+
6
+ end
@@ -0,0 +1,24 @@
1
+ require 'eolclub_scraper/event'
2
+ require 'nokogiri'
3
+ require 'chronic'
4
+
5
+ module EolclubScraper
6
+
7
+ class EventParser
8
+
9
+ # This method is extremely fragile, but so far EOLclub.org has been sticking to
10
+ # the same format, so that may be okay.
11
+ def parse(content)
12
+ doc = Nokogiri::HTML.parse(content)
13
+ schedule_text = doc.css('p')[1].text.split("\n")[2].split(',').last.strip.split
14
+ start_time, end_time = schedule_text.last.split('–')
15
+
16
+ Event.new(
17
+ Chronic.parse( [ schedule_text[0], schedule_text[1], start_time ].join(' ') ),
18
+ Chronic.parse( [ schedule_text[0], schedule_text[1], end_time ].join(' ') )
19
+ )
20
+ end
21
+
22
+ end
23
+
24
+ end
@@ -0,0 +1,15 @@
1
+ require 'open-uri'
2
+
3
+ module EolclubScraper
4
+
5
+ class Homepage
6
+ def initialize(url='http://eolclub.org/')
7
+ @url = url
8
+ end
9
+
10
+ def content
11
+ open(@url).read
12
+ end
13
+ end
14
+
15
+ end
@@ -0,0 +1,3 @@
1
+ module EolclubScraper
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,64 @@
1
+ require 'eolclub_scraper/event_parser'
2
+
3
+ describe EolclubScraper::EventParser do
4
+
5
+ describe '#parse' do
6
+ it 'parses an Event from the supplied content' do
7
+ expect( subject.parse(content) ).to eq(
8
+ EolclubScraper::Event.new(
9
+ Time.local(2013, 12, 9, 18, 0, 0),
10
+ Time.local(2013, 12, 9, 23, 0, 0)
11
+ )
12
+ )
13
+ end
14
+ end
15
+
16
+ let(:content) {
17
+ <<-EOF
18
+ <!DOCTYPE html>
19
+ <html>
20
+ <head>
21
+ <meta http-equiv='content-type' content='text/html; charset=utf-8' />
22
+ <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0' />
23
+ <title>End of Line Club // Providence, RI hacknight</title>
24
+ <meta name='description' content='Monthly Providence, Rhode Island hacknight. Code, design, and collaborate with other local developers to a backdrop of electronic music.' />
25
+ <link href='/stylesheets/screen.css' media='screen' rel='stylesheet' type='text/css' />
26
+ <script type="text/javascript" src="http://use.typekit.com/vry7ulq.js"></script>
27
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
28
+ </head>
29
+ <body>
30
+ <div id='container'>
31
+ <header>
32
+ <h1>End of Line Club</h1>
33
+ </header>
34
+ <section id='main'>
35
+ <p>
36
+ Monthly Providence, RI hacknight. Code, design, and collaborate
37
+ with other local developers to a backdrop of electronic music.
38
+ Bring your laptop and a project to work on.
39
+ </p>
40
+ <p>
41
+ Our next meetup is
42
+ Monday, December 9th from 6pm&ndash;11pm
43
+ at <a href='http://basicsgroup.com'>Basics Group</a>.
44
+ Arrive whenever you can. Pizza and beer provided.
45
+ </p>
46
+ <p>
47
+ <a href="https://twitter.com/EOLclub">@EOLclub</a>
48
+ to get more info and RSVP.
49
+ </p>
50
+ </section>
51
+ </div>
52
+ <!--
53
+ "Up there it's their time... their time.
54
+ But down here it's our time. It's our time down here."
55
+ - The Goonies
56
+
57
+ Site design and coding by David Piehler (@dpie)
58
+ -->
59
+ </body>
60
+ </html>
61
+ EOF
62
+ }
63
+
64
+ end
@@ -0,0 +1,12 @@
1
+ require 'spec_helper'
2
+ require 'eolclub_scraper/homepage'
3
+
4
+ describe EolclubScraper::Homepage do
5
+
6
+ describe '#content', :vcr do
7
+ it 'fetches the body content from the EOL Club homepage' do
8
+ expect( subject.content ).to include( 'Monthly Providence, RI hacknight.' )
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,19 @@
1
+ require 'eolclub_scraper'
2
+
3
+ describe EolclubScraper do
4
+
5
+ describe '.scheduled_event' do
6
+ let(:content) { double }
7
+ let(:event) { double }
8
+
9
+ before do
10
+ EolclubScraper::Homepage.any_instance.stub(:content) { content }
11
+ EolclubScraper::EventParser.any_instance.stub(:parse).with(content) { event }
12
+ end
13
+
14
+ it 'parses an Event from the EOL Club homepage' do
15
+ expect(EolclubScraper.scheduled_event).to eq(event)
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,13 @@
1
+ require 'vcr'
2
+
3
+ RSpec.configure do |config|
4
+ config.treat_symbols_as_metadata_keys_with_true_values = true
5
+ end
6
+
7
+ VCR.configure do |c|
8
+ c.cassette_library_dir = 'spec/vcr'
9
+ c.hook_into :webmock
10
+
11
+ c.allow_http_connections_when_no_cassette = false
12
+ c.configure_rspec_metadata!
13
+ end
@@ -0,0 +1,118 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://eolclub.org/
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - '*/*'
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 302
19
+ message: Found
20
+ headers:
21
+ Connection:
22
+ - close
23
+ Pragma:
24
+ - no-cache
25
+ Cache-Control:
26
+ - no-cache
27
+ Location:
28
+ - /
29
+ body:
30
+ encoding: UTF-8
31
+ string: ''
32
+ http_version:
33
+ recorded_at: Sun, 15 Dec 2013 21:29:17 GMT
34
+ - request:
35
+ method: get
36
+ uri: http://eolclub.org/
37
+ body:
38
+ encoding: US-ASCII
39
+ string: ''
40
+ headers:
41
+ Accept-Encoding:
42
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
43
+ Accept:
44
+ - '*/*'
45
+ User-Agent:
46
+ - Ruby
47
+ response:
48
+ status:
49
+ code: 200
50
+ message: OK
51
+ headers:
52
+ Server:
53
+ - GitHub.com
54
+ Date:
55
+ - Sun, 15 Dec 2013 21:29:17 GMT
56
+ Content-Type:
57
+ - text/html
58
+ Last-Modified:
59
+ - Tue, 03 Dec 2013 16:15:35 GMT
60
+ Transfer-Encoding:
61
+ - chunked
62
+ Connection:
63
+ - close
64
+ Expires:
65
+ - Sun, 15 Dec 2013 21:39:17 GMT
66
+ Cache-Control:
67
+ - max-age=600
68
+ Vary:
69
+ - Accept-Encoding
70
+ body:
71
+ encoding: UTF-8
72
+ string: |
73
+ <!DOCTYPE html>
74
+ <html>
75
+ <head>
76
+ <meta http-equiv='content-type' content='text/html; charset=utf-8' />
77
+ <meta name='viewport' content='width=device-width, initial-scale=1.0, maximum-scale=1.0' />
78
+ <title>End of Line Club // Providence, RI hacknight</title>
79
+ <meta name='description' content='Monthly Providence, Rhode Island hacknight. Code, design, and collaborate with other local developers to a backdrop of electronic music.' />
80
+ <link href='/stylesheets/screen.css' media='screen' rel='stylesheet' type='text/css' />
81
+ <script type="text/javascript" src="http://use.typekit.com/vry7ulq.js"></script>
82
+ <script type="text/javascript">try{Typekit.load();}catch(e){}</script>
83
+ </head>
84
+ <body>
85
+ <div id='container'>
86
+ <header>
87
+ <h1>End of Line Club</h1>
88
+ </header>
89
+ <section id='main'>
90
+ <p>
91
+ Monthly Providence, RI hacknight. Code, design, and collaborate
92
+ with other local developers to a backdrop of electronic music.
93
+ Bring your laptop and a project to work on.
94
+ </p>
95
+ <p>
96
+ Our next meetup is
97
+ Monday, December 9th from 6pm&ndash;11pm
98
+ at <a href='http://basicsgroup.com'>Basics Group</a>.
99
+ Arrive whenever you can. Pizza and beer provided.
100
+ </p>
101
+ <p>
102
+ <a href="https://twitter.com/EOLclub">@EOLclub</a>
103
+ to get more info and RSVP.
104
+ </p>
105
+ </section>
106
+ </div>
107
+ <!--
108
+ "Up there it's their time... their time.
109
+ But down here it's our time. It's our time down here."
110
+ - The Goonies
111
+
112
+ Site design and coding by David Piehler (@dpie)
113
+ -->
114
+ </body>
115
+ </html>
116
+ http_version:
117
+ recorded_at: Sun, 15 Dec 2013 21:29:17 GMT
118
+ recorded_with: VCR 2.5.0
metadata ADDED
@@ -0,0 +1,164 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: eolclub_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Matt Gillooly
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: chronic
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.3'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.3'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: vcr
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: webmock
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Scrape currently scheduled event from EOLclub.org for PVDTechEvents.com
112
+ email:
113
+ - matt@mattgillooly.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - .gitignore
119
+ - .rspec
120
+ - Gemfile
121
+ - LICENSE.txt
122
+ - README.md
123
+ - Rakefile
124
+ - eolclub_scraper.gemspec
125
+ - lib/eolclub_scraper.rb
126
+ - lib/eolclub_scraper/event.rb
127
+ - lib/eolclub_scraper/event_parser.rb
128
+ - lib/eolclub_scraper/homepage.rb
129
+ - lib/eolclub_scraper/version.rb
130
+ - spec/eolclub_scraper/event_parser_spec.rb
131
+ - spec/eolclub_scraper/homepage_spec.rb
132
+ - spec/eolclub_scraper_spec.rb
133
+ - spec/spec_helper.rb
134
+ - spec/vcr/EolclubScraper_Homepage/_content/fetches_the_body_content_from_the_EOL_Club_homepage.yml
135
+ homepage: http://pvdtechevents.com/
136
+ licenses:
137
+ - MIT
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - '>='
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.0.14
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: EOLclub.org event scraper
159
+ test_files:
160
+ - spec/eolclub_scraper/event_parser_spec.rb
161
+ - spec/eolclub_scraper/homepage_spec.rb
162
+ - spec/eolclub_scraper_spec.rb
163
+ - spec/spec_helper.rb
164
+ - spec/vcr/EolclubScraper_Homepage/_content/fetches_the_body_content_from_the_EOL_Club_homepage.yml