brainy_zip_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7bab99c323598d43715d712553d08480ffddb8f3
4
+ data.tar.gz: 41afd1437b09896a4b0afa83ad8365940a34f448
5
+ SHA512:
6
+ metadata.gz: d7362e82871b199af4dbbee064ab94bd6e07d106823bc1599836cfdbc579ffb606d16411e7af876ef5e294156e1acc7f0d6fe819788ab92e39b721dd91c1a953
7
+ data.tar.gz: b34bc39fe0e562a5b28b8671991a32ba0b128b22af4a1f45de5f0b28c2a8e0b02dc34371c45948da5696ab4a82400a4c0ee290a9eef4dc8da1cd939898da311e
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in brainy_zip_scraper.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Eric Berry
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,8 @@
1
+ # Brainy Zip Scraper
2
+
3
+ Create a CSV of zip code data from BrainyZip.com
4
+
5
+ ## Usage
6
+
7
+ require 'brainy_zip_scraper'
8
+ BrainyZipScraper.to_csv("path/to/zip_codes.csv")
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'brainy_zip_scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "brainy_zip_scraper"
8
+ spec.version = BrainyZipScraper::VERSION
9
+ spec.authors = ["Eric Berry"]
10
+ spec.email = ["cavneb@gmail.com"]
11
+ spec.summary = "Save zip code data to CSV"
12
+ spec.description = "Save zip code data to CSV"
13
+ spec.homepage = "https://github.com/cavneb/brainy_zip_scraper"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "nokogiri"
22
+ spec.add_dependency "ruby-progressbar", "~> 1.6.1"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.7"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ end
@@ -0,0 +1,10 @@
1
+ require "brainy_zip_scraper/scraper"
2
+ require "brainy_zip_scraper/version"
3
+
4
+ module BrainyZipScraper
5
+ def self.to_csv(path)
6
+ scraper = Scraper.new
7
+ scraper.to_csv(path)
8
+ puts "Updated #{path}"
9
+ end
10
+ end
@@ -0,0 +1,104 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'ruby-progressbar'
4
+ require 'csv'
5
+
6
+ class Scraper
7
+ def urls
8
+ pages = []
9
+ pages << "http://www.brainyzip.com/state/zip_alabama.html"
10
+ pages << "http://www.brainyzip.com/state/zip_alaska.html"
11
+ pages << "http://www.brainyzip.com/state/zip_arizona.html"
12
+ pages << "http://www.brainyzip.com/state/zip_arkansas.html"
13
+ pages << "http://www.brainyzip.com/state/zip_california.html"
14
+ pages << "http://www.brainyzip.com/state/zip_colorado.html"
15
+ pages << "http://www.brainyzip.com/state/zip_connecticut.html"
16
+ pages << "http://www.brainyzip.com/state/zip_delaware.html"
17
+ pages << "http://www.brainyzip.com/state/zip_districtofcolumbia.html"
18
+ pages << "http://www.brainyzip.com/state/zip_florida.html"
19
+ pages << "http://www.brainyzip.com/state/zip_georgia.html"
20
+ pages << "http://www.brainyzip.com/state/zip_hawaii.html"
21
+ pages << "http://www.brainyzip.com/state/zip_idaho.html"
22
+ pages << "http://www.brainyzip.com/state/zip_illinois.html"
23
+ pages << "http://www.brainyzip.com/state/zip_indiana.html"
24
+ pages << "http://www.brainyzip.com/state/zip_iowa.html"
25
+ pages << "http://www.brainyzip.com/state/zip_kansas.html"
26
+ pages << "http://www.brainyzip.com/state/zip_kentucky.html"
27
+ pages << "http://www.brainyzip.com/state/zip_louisiana.html"
28
+ pages << "http://www.brainyzip.com/state/zip_maine.html"
29
+ pages << "http://www.brainyzip.com/state/zip_maryland.html"
30
+ pages << "http://www.brainyzip.com/state/zip_massachusetts.html"
31
+ pages << "http://www.brainyzip.com/state/zip_michigan.html"
32
+ pages << "http://www.brainyzip.com/state/zip_minnesota.html"
33
+ pages << "http://www.brainyzip.com/state/zip_mississippi.html"
34
+ pages << "http://www.brainyzip.com/state/zip_missouri.html"
35
+ pages << "http://www.brainyzip.com/state/zip_montana.html"
36
+ pages << "http://www.brainyzip.com/state/zip_nebraska.html"
37
+ pages << "http://www.brainyzip.com/state/zip_nevada.html"
38
+ pages << "http://www.brainyzip.com/state/zip_newhampshire.html"
39
+ pages << "http://www.brainyzip.com/state/zip_newjersey.html"
40
+ pages << "http://www.brainyzip.com/state/zip_newmexico.html"
41
+ pages << "http://www.brainyzip.com/state/zip_newyork.html"
42
+ pages << "http://www.brainyzip.com/state/zip_northcarolina.html"
43
+ pages << "http://www.brainyzip.com/state/zip_northdakota.html"
44
+ pages << "http://www.brainyzip.com/state/zip_ohio.html"
45
+ pages << "http://www.brainyzip.com/state/zip_oklahoma.html"
46
+ pages << "http://www.brainyzip.com/state/zip_oregon.html"
47
+ pages << "http://www.brainyzip.com/state/zip_pennsylvania.html"
48
+ pages << "http://www.brainyzip.com/state/zip_rhodeisland.html"
49
+ pages << "http://www.brainyzip.com/state/zip_southcarolina.html"
50
+ pages << "http://www.brainyzip.com/state/zip_southdakota.html"
51
+ pages << "http://www.brainyzip.com/state/zip_tennessee.html"
52
+ pages << "http://www.brainyzip.com/state/zip_texas.html"
53
+ pages << "http://www.brainyzip.com/state/zip_utah.html"
54
+ pages << "http://www.brainyzip.com/state/zip_vermont.html"
55
+ pages << "http://www.brainyzip.com/state/zip_virginia.html"
56
+ pages << "http://www.brainyzip.com/state/zip_washington.html"
57
+ pages << "http://www.brainyzip.com/state/zip_westvirginia.html"
58
+ pages << "http://www.brainyzip.com/state/zip_wisconsin.html"
59
+ pages << "http://www.brainyzip.com/state/zip_wyoming.html"
60
+ pages
61
+ end
62
+
63
+ def parse_page(url)
64
+ doc = Nokogiri::HTML(open(url))
65
+ trs = doc.xpath("//center[3]/table/tr[2]/td[1]/table/tr")
66
+ rows = []
67
+ trs.each_with_index do |tr, index|
68
+ next if index == 0
69
+ row = []
70
+ tr.children.each_with_index do |td, idx|
71
+ val = td.text.strip.gsub(' zip code', '')
72
+ row << val
73
+ end
74
+ rows << row
75
+ end
76
+ rows
77
+ end
78
+
79
+ def to_csv(path)
80
+ progressbar = ProgressBar.create(
81
+ :title => 'Zip Codes by State',
82
+ :starting_at => 0,
83
+ :total => urls.length,
84
+ :format => '%a |%b>>%i| %p%% %t'
85
+ )
86
+
87
+ csv_rows = []
88
+ csv_rows << ["zip_code", "city", "state", "county", "population"]
89
+
90
+ urls.each do |page|
91
+ csv_rows = csv_rows + parse_page(page)
92
+ progressbar.increment
93
+ csv_rows
94
+ end
95
+
96
+ CSV.open(path, "w") do |csv|
97
+ csv_rows.each do |row|
98
+ csv << row
99
+ end
100
+ end
101
+
102
+ path
103
+ end
104
+ end
@@ -0,0 +1,3 @@
1
+ module BrainyZipScraper
2
+ VERSION = "0.0.1"
3
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: brainy_zip_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Eric Berry
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: ruby-progressbar
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.6.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.6.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Save zip code data to CSV
70
+ email:
71
+ - cavneb@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - brainy_zip_scraper.gemspec
82
+ - lib/brainy_zip_scraper.rb
83
+ - lib/brainy_zip_scraper/scraper.rb
84
+ - lib/brainy_zip_scraper/version.rb
85
+ homepage: https://github.com/cavneb/brainy_zip_scraper
86
+ licenses:
87
+ - MIT
88
+ metadata: {}
89
+ post_install_message:
90
+ rdoc_options: []
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ requirements: []
104
+ rubyforge_project:
105
+ rubygems_version: 2.2.2
106
+ signing_key:
107
+ specification_version: 4
108
+ summary: Save zip code data to CSV
109
+ test_files: []
110
+ has_rdoc: