barbershop_contestants 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +1 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +91 -0
- data/LICENSE.txt +21 -0
- data/README.md +37 -0
- data/Rakefile +2 -0
- data/barbershop_contestants.gemspec +49 -0
- data/bin/barbershop_contestants +15 -0
- data/bin/console +16 -0
- data/bin/setup +8 -0
- data/lib/barbershop_contestants/cli.rb +214 -0
- data/lib/barbershop_contestants/competitor.rb +90 -0
- data/lib/barbershop_contestants/contest.rb +33 -0
- data/lib/barbershop_contestants/performance.rb +83 -0
- data/lib/barbershop_contestants/scraper.rb +177 -0
- data/lib/barbershop_contestants/version.rb +3 -0
- data/lib/barbershop_contestants.rb +22 -0
- data/pkg/barbershop-contestants-0.2.0.gem +0 -0
- data/pkg/barbershop_contestants-0.1.0.gem +0 -0
- data/pkg/barbershop_contestants-0.2.0.gem +0 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project.htm +972 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/300px-2018_After_Hours.jpg +0 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/After Hours (BHS) - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project.html +883 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project.html +973 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project.html +496 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project.htm +454 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/tpc-check.html +3 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project.htm +633 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.htm +591 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.html +590 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project.htm +303 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/1940Pic.jpg +0 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/Flat Foot Four - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project.htm +302 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/1987Pic.jpg +0 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(1).php +21 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(2).php +1 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(3).php +178 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/load.php +1 -0
- data/sites/Interstate Rivals - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
- metadata +235 -0
@@ -0,0 +1,177 @@
|
|
1
|
+
# simple (reusable) scraper class that calls Nokogiri and dumps
|
2
|
+
# the requested site
|
3
|
+
class Scraper
|
4
|
+
# site storage
|
5
|
+
LOCATIONS = {
|
6
|
+
base: {
|
7
|
+
web: "https://www.barbershopwiki.com/wiki/",
|
8
|
+
local: "./sites/"
|
9
|
+
},
|
10
|
+
q_champs: {
|
11
|
+
web: "BHS_International_Quartet_Champions",
|
12
|
+
local: "BHS International Quartet Champions - Barbershop Wiki Project.html",
|
13
|
+
},
|
14
|
+
c_champs: {
|
15
|
+
web: "BHS_International_Chorus_Champions",
|
16
|
+
local: "BHS International Chorus Champions - Barbershop Wiki Project.html"
|
17
|
+
},
|
18
|
+
q_year: {
|
19
|
+
web: ["BHS_Intl_Quartet_Contest_", ""],
|
20
|
+
local: ["BHS Intl Quartet Contest ", " - Barbershop Wiki Project.html"]
|
21
|
+
},
|
22
|
+
c_year: {
|
23
|
+
web: ["BHS_Intl_Chorus_Contest_", ""],
|
24
|
+
local: ["BHS Intl Chorus Contest ", " - Barbershop Wiki Project.html"]
|
25
|
+
},
|
26
|
+
q_page: {
|
27
|
+
web: "",
|
28
|
+
local: ""
|
29
|
+
},
|
30
|
+
c_page: {
|
31
|
+
web: "",
|
32
|
+
local: ""
|
33
|
+
},
|
34
|
+
}
|
35
|
+
QUARTET_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Quartet_Champions"
|
36
|
+
CHORUS_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Chorus_Champions"
|
37
|
+
LOCAL_SITES = {
|
38
|
+
quartet_champs: "./sites/BHS International Quartet Champions - Barbershop Wiki Project.html",
|
39
|
+
chorus_champs: "./sites/BHS International Chorus Champions - Barbershop Wiki Project.html",
|
40
|
+
chorus_2018: "./sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project.html",
|
41
|
+
quartet_2018: "./sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.html"
|
42
|
+
}
|
43
|
+
CACHE_LOCATIONS = {
|
44
|
+
qchamps: "./sites/qchamps.txt",
|
45
|
+
cchamps: "./sites/cchamps.txt"
|
46
|
+
}
|
47
|
+
|
48
|
+
@years_scraped = { "quartet" => [], "chorus" => [] }
|
49
|
+
|
50
|
+
def self.scrape_or_load(page)
|
51
|
+
load_cache || Nokogiri::HTML(open(page))
|
52
|
+
end
|
53
|
+
# scraper should know what it's scraping,
|
54
|
+
# but should not worry about the data classes'
|
55
|
+
# architecture
|
56
|
+
|
57
|
+
def self.load_cache
|
58
|
+
# loaded = {}
|
59
|
+
# CACHE_LOCATIONS.each do |key, loc|
|
60
|
+
# # load loc
|
61
|
+
# # loaded[key] = fopen(loc)
|
62
|
+
# end
|
63
|
+
nil
|
64
|
+
# I'll have to figure this out later :/
|
65
|
+
# try to get these to work:
|
66
|
+
##### doc = Nokogiri(string_or_io)
|
67
|
+
##### node.write_to(io, *options)
|
68
|
+
##### or
|
69
|
+
##### node.to_s / .to_html / .to_xml
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.scrape_quartet_champs(source)
|
73
|
+
# binding.pry
|
74
|
+
puts "Scraping Quartet Champs"
|
75
|
+
location = LOCATIONS[:base][source] + LOCATIONS[:q_champs][source]
|
76
|
+
doc = load_cache || scrape_or_load(location)
|
77
|
+
# puts "Scraping local copy of site"
|
78
|
+
# TODO: reinstate real scraping functionality when in wifi
|
79
|
+
# binding.pry
|
80
|
+
champ_table = doc.css(".wikitable tbody tr") # get the champs table
|
81
|
+
champ_table.shift # get rid of the headers (can't figure out how to differentiate them with css)
|
82
|
+
champ_table
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.scrape_and_create_quartet_champs(source)
|
86
|
+
scrape_quartet_champs(source).each do |row|
|
87
|
+
# binding.pry
|
88
|
+
row_data = row.text.split("\n")
|
89
|
+
q_champs_hash = {
|
90
|
+
year: row_data[1].to_i,
|
91
|
+
name: row_data[2],
|
92
|
+
score: row_data[3],
|
93
|
+
district: row_data[4],
|
94
|
+
comments: row_data[5],
|
95
|
+
members: row_data[7],
|
96
|
+
place: 1, # champions definitionally are first place
|
97
|
+
type: "quartet"
|
98
|
+
}
|
99
|
+
Performance.find_or_create(q_champs_hash, "quartet")
|
100
|
+
# binding.pry
|
101
|
+
end
|
102
|
+
system "clear" or system "cls"
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.scrape_chorus_champs(source)
|
106
|
+
puts "Scraping Chorus Champs"
|
107
|
+
location = LOCATIONS[:base][source] + LOCATIONS[:c_champs][source]
|
108
|
+
doc = load_cache || scrape_or_load(location)
|
109
|
+
champ_table = doc.css(".wikitable")[1].css("tr")
|
110
|
+
champ_table.shift # remove header line
|
111
|
+
champ_table
|
112
|
+
end
|
113
|
+
|
114
|
+
def self.scrape_and_create_chorus_champs(source)
|
115
|
+
# binding.pry
|
116
|
+
scrape_chorus_champs(source).each do |row|
|
117
|
+
# build a hash
|
118
|
+
row_data = row.text.split("\n")
|
119
|
+
# binding.pry
|
120
|
+
c_champs_hash = {
|
121
|
+
year: row_data[1].to_i,
|
122
|
+
name: row_data[2],
|
123
|
+
hometown_and_district: row_data[3],
|
124
|
+
director: row_data[4],
|
125
|
+
number_on_stage: row_data[5],
|
126
|
+
score: row_data[6],
|
127
|
+
place: 1, # champions definitionally are first place
|
128
|
+
type: "chorus"
|
129
|
+
}
|
130
|
+
Performance.find_or_create(c_champs_hash, "chorus")
|
131
|
+
end
|
132
|
+
system "clear" or system "cls"
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.scrape_and_create_year(source, year, type)
|
136
|
+
if @years_scraped[type].include?(year)
|
137
|
+
# puts "#{year} already scraped" # for debugging
|
138
|
+
return true
|
139
|
+
end
|
140
|
+
@years_scraped[type] << year
|
141
|
+
scrape_year(source, year, type).each do |t|
|
142
|
+
# binding.pry
|
143
|
+
t.each do |tr|
|
144
|
+
row_data = tr.text.split("\n")
|
145
|
+
# binding.pry
|
146
|
+
year_hash = {
|
147
|
+
year: year,
|
148
|
+
place: row_data[1],
|
149
|
+
name: row_data[2],
|
150
|
+
district: row_data[3],
|
151
|
+
score: row_data[4]
|
152
|
+
}
|
153
|
+
year_hash[:number_on_stage] = row_data[5] if type == "chorus"
|
154
|
+
Performance.find_or_create(year_hash, type)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
system "clear" or system "cls"
|
158
|
+
end
|
159
|
+
|
160
|
+
def self.scrape_year(source, year, type)
|
161
|
+
puts "Scraping #{type.capitalize} Contest for #{year}"
|
162
|
+
location = LOCATIONS[:base][source] + \
|
163
|
+
LOCATIONS[(type[0] + "_year").to_sym][source].join(year.to_s)
|
164
|
+
doc = load_cache || scrape_or_load(location)
|
165
|
+
tables_node = doc.css(".wikitable")
|
166
|
+
tables_arr = []
|
167
|
+
tables_node.each do |t|
|
168
|
+
# binding.pry
|
169
|
+
unless t.css("tr").first.text.include?("Admin")
|
170
|
+
tables_arr << t.css("tr").drop(1)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
tables_arr
|
174
|
+
# binding.pry
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require_all "./lib"
|
2
|
+
|
3
|
+
# QUARTET_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Quartet_Champions"
|
4
|
+
|
5
|
+
module BarbershopContestants
|
6
|
+
class Error < StandardError; end
|
7
|
+
# Your code goes here
|
8
|
+
end
|
9
|
+
|
10
|
+
# binding.pry
|
11
|
+
CLI.start
|
12
|
+
### Remember this pattern: ###
|
13
|
+
|
14
|
+
# properties.each do |k, v|
|
15
|
+
# # k = properties
|
16
|
+
# # v = values
|
17
|
+
# class.send("#{k}=", v)
|
18
|
+
# end
|
19
|
+
|
20
|
+
# A line of text
|
21
|
+
|
22
|
+
# rake install local to
|
Binary file
|
Binary file
|
Binary file
|