barbershop_contestants 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +1 -0
  3. data/CHANGELOG.md +1 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +91 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +37 -0
  8. data/Rakefile +2 -0
  9. data/barbershop_contestants.gemspec +49 -0
  10. data/bin/barbershop_contestants +15 -0
  11. data/bin/console +16 -0
  12. data/bin/setup +8 -0
  13. data/lib/barbershop_contestants/cli.rb +214 -0
  14. data/lib/barbershop_contestants/competitor.rb +90 -0
  15. data/lib/barbershop_contestants/contest.rb +33 -0
  16. data/lib/barbershop_contestants/performance.rb +83 -0
  17. data/lib/barbershop_contestants/scraper.rb +177 -0
  18. data/lib/barbershop_contestants/version.rb +3 -0
  19. data/lib/barbershop_contestants.rb +22 -0
  20. data/pkg/barbershop-contestants-0.2.0.gem +0 -0
  21. data/pkg/barbershop_contestants-0.1.0.gem +0 -0
  22. data/pkg/barbershop_contestants-0.2.0.gem +0 -0
  23. data/sites/After Hours (BHS) - Barbershop Wiki Project.htm +972 -0
  24. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/300px-2018_After_Hours.jpg +0 -0
  25. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  26. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(1).php +21 -0
  27. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(2).php +1 -0
  28. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load(3).php +178 -0
  29. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/load.php +1 -0
  30. data/sites/After Hours (BHS) - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  31. data/sites/BHS International Chorus Champions - Barbershop Wiki Project.html +883 -0
  32. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  33. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(1).php +21 -0
  34. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(2).php +1 -0
  35. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load(3).php +178 -0
  36. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/load.php +1 -0
  37. data/sites/BHS International Chorus Champions - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  38. data/sites/BHS International Quartet Champions - Barbershop Wiki Project.html +973 -0
  39. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  40. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(1).php +21 -0
  41. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(2).php +1 -0
  42. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load(3).php +178 -0
  43. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/load.php +1 -0
  44. data/sites/BHS International Quartet Champions - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  45. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project.html +496 -0
  46. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  47. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(1).php +21 -0
  48. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(2).php +1 -0
  49. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load(3).php +178 -0
  50. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/load.php +1 -0
  51. data/sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  52. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project.htm +454 -0
  53. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  54. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(1).php +21 -0
  55. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(2).php +1 -0
  56. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load(3).php +178 -0
  57. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/load.php +1 -0
  58. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  59. data/sites/BHS Intl Quartet Contest 1940 - Barbershop Wiki Project_files/tpc-check.html +3 -0
  60. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project.htm +633 -0
  61. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  62. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(1).php +21 -0
  63. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(2).php +1 -0
  64. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load(3).php +178 -0
  65. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/load.php +1 -0
  66. data/sites/BHS Intl Quartet Contest 1987 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  67. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.htm +591 -0
  68. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.html +590 -0
  69. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  70. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(1).php +21 -0
  71. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(2).php +1 -0
  72. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load(3).php +178 -0
  73. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/load.php +1 -0
  74. data/sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  75. data/sites/Flat Foot Four - Barbershop Wiki Project.htm +303 -0
  76. data/sites/Flat Foot Four - Barbershop Wiki Project_files/1940Pic.jpg +0 -0
  77. data/sites/Flat Foot Four - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  78. data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(1).php +21 -0
  79. data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(2).php +1 -0
  80. data/sites/Flat Foot Four - Barbershop Wiki Project_files/load(3).php +178 -0
  81. data/sites/Flat Foot Four - Barbershop Wiki Project_files/load.php +1 -0
  82. data/sites/Flat Foot Four - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  83. data/sites/Interstate Rivals - Barbershop Wiki Project.htm +302 -0
  84. data/sites/Interstate Rivals - Barbershop Wiki Project_files/1987Pic.jpg +0 -0
  85. data/sites/Interstate Rivals - Barbershop Wiki Project_files/cc-by-nc-sa.png +0 -0
  86. data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(1).php +21 -0
  87. data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(2).php +1 -0
  88. data/sites/Interstate Rivals - Barbershop Wiki Project_files/load(3).php +178 -0
  89. data/sites/Interstate Rivals - Barbershop Wiki Project_files/load.php +1 -0
  90. data/sites/Interstate Rivals - Barbershop Wiki Project_files/poweredby_mediawiki_88x31.png +0 -0
  91. metadata +235 -0
@@ -0,0 +1,177 @@
1
+ # simple (reusable) scraper class that calls Nokogiri and dumps
2
+ # the requested site
3
+ class Scraper
4
+ # site storage
5
+ LOCATIONS = {
6
+ base: {
7
+ web: "https://www.barbershopwiki.com/wiki/",
8
+ local: "./sites/"
9
+ },
10
+ q_champs: {
11
+ web: "BHS_International_Quartet_Champions",
12
+ local: "BHS International Quartet Champions - Barbershop Wiki Project.html",
13
+ },
14
+ c_champs: {
15
+ web: "BHS_International_Chorus_Champions",
16
+ local: "BHS International Chorus Champions - Barbershop Wiki Project.html"
17
+ },
18
+ q_year: {
19
+ web: ["BHS_Intl_Quartet_Contest_", ""],
20
+ local: ["BHS Intl Quartet Contest ", " - Barbershop Wiki Project.html"]
21
+ },
22
+ c_year: {
23
+ web: ["BHS_Intl_Chorus_Contest_", ""],
24
+ local: ["BHS Intl Chorus Contest ", " - Barbershop Wiki Project.html"]
25
+ },
26
+ q_page: {
27
+ web: "",
28
+ local: ""
29
+ },
30
+ c_page: {
31
+ web: "",
32
+ local: ""
33
+ },
34
+ }
35
+ QUARTET_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Quartet_Champions"
36
+ CHORUS_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Chorus_Champions"
37
+ LOCAL_SITES = {
38
+ quartet_champs: "./sites/BHS International Quartet Champions - Barbershop Wiki Project.html",
39
+ chorus_champs: "./sites/BHS International Chorus Champions - Barbershop Wiki Project.html",
40
+ chorus_2018: "./sites/BHS Intl Chorus Contest 2018 - Barbershop Wiki Project.html",
41
+ quartet_2018: "./sites/BHS Intl Quartet Contest 2018 - Barbershop Wiki Project.html"
42
+ }
43
+ CACHE_LOCATIONS = {
44
+ qchamps: "./sites/qchamps.txt",
45
+ cchamps: "./sites/cchamps.txt"
46
+ }
47
+
48
+ @years_scraped = { "quartet" => [], "chorus" => [] }
49
+
50
+ def self.scrape_or_load(page)
51
+ load_cache || Nokogiri::HTML(open(page))
52
+ end
53
+ # scraper should know what it's scraping,
54
+ # but should not worry about the data classes'
55
+ # architecture
56
+
57
+ def self.load_cache
58
+ # loaded = {}
59
+ # CACHE_LOCATIONS.each do |key, loc|
60
+ # # load loc
61
+ # # loaded[key] = fopen(loc)
62
+ # end
63
+ nil
64
+ # I'll have to figure this out later :/
65
+ # try to get these to work:
66
+ ##### doc = Nokogiri(string_or_io)
67
+ ##### node.write_to(io, *options)
68
+ ##### or
69
+ ##### node.to_s / .to_html / .to_xml
70
+ end
71
+
72
+ def self.scrape_quartet_champs(source)
73
+ # binding.pry
74
+ puts "Scraping Quartet Champs"
75
+ location = LOCATIONS[:base][source] + LOCATIONS[:q_champs][source]
76
+ doc = load_cache || scrape_or_load(location)
77
+ # puts "Scraping local copy of site"
78
+ # TODO: reinstate real scraping functionality when in wifi
79
+ # binding.pry
80
+ champ_table = doc.css(".wikitable tbody tr") # get the champs table
81
+ champ_table.shift # get rid of the headers (can't figure out how to differentiate them with css)
82
+ champ_table
83
+ end
84
+
85
+ def self.scrape_and_create_quartet_champs(source)
86
+ scrape_quartet_champs(source).each do |row|
87
+ # binding.pry
88
+ row_data = row.text.split("\n")
89
+ q_champs_hash = {
90
+ year: row_data[1].to_i,
91
+ name: row_data[2],
92
+ score: row_data[3],
93
+ district: row_data[4],
94
+ comments: row_data[5],
95
+ members: row_data[7],
96
+ place: 1, # champions definitionally are first place
97
+ type: "quartet"
98
+ }
99
+ Performance.find_or_create(q_champs_hash, "quartet")
100
+ # binding.pry
101
+ end
102
+ system "clear" or system "cls"
103
+ end
104
+
105
+ def self.scrape_chorus_champs(source)
106
+ puts "Scraping Chorus Champs"
107
+ location = LOCATIONS[:base][source] + LOCATIONS[:c_champs][source]
108
+ doc = load_cache || scrape_or_load(location)
109
+ champ_table = doc.css(".wikitable")[1].css("tr")
110
+ champ_table.shift # remove header line
111
+ champ_table
112
+ end
113
+
114
+ def self.scrape_and_create_chorus_champs(source)
115
+ # binding.pry
116
+ scrape_chorus_champs(source).each do |row|
117
+ # build a hash
118
+ row_data = row.text.split("\n")
119
+ # binding.pry
120
+ c_champs_hash = {
121
+ year: row_data[1].to_i,
122
+ name: row_data[2],
123
+ hometown_and_district: row_data[3],
124
+ director: row_data[4],
125
+ number_on_stage: row_data[5],
126
+ score: row_data[6],
127
+ place: 1, # champions definitionally are first place
128
+ type: "chorus"
129
+ }
130
+ Performance.find_or_create(c_champs_hash, "chorus")
131
+ end
132
+ system "clear" or system "cls"
133
+ end
134
+
135
+ def self.scrape_and_create_year(source, year, type)
136
+ if @years_scraped[type].include?(year)
137
+ # puts "#{year} already scraped" # for debugging
138
+ return true
139
+ end
140
+ @years_scraped[type] << year
141
+ scrape_year(source, year, type).each do |t|
142
+ # binding.pry
143
+ t.each do |tr|
144
+ row_data = tr.text.split("\n")
145
+ # binding.pry
146
+ year_hash = {
147
+ year: year,
148
+ place: row_data[1],
149
+ name: row_data[2],
150
+ district: row_data[3],
151
+ score: row_data[4]
152
+ }
153
+ year_hash[:number_on_stage] = row_data[5] if type == "chorus"
154
+ Performance.find_or_create(year_hash, type)
155
+ end
156
+ end
157
+ system "clear" or system "cls"
158
+ end
159
+
160
+ def self.scrape_year(source, year, type)
161
+ puts "Scraping #{type.capitalize} Contest for #{year}"
162
+ location = LOCATIONS[:base][source] + \
163
+ LOCATIONS[(type[0] + "_year").to_sym][source].join(year.to_s)
164
+ doc = load_cache || scrape_or_load(location)
165
+ tables_node = doc.css(".wikitable")
166
+ tables_arr = []
167
+ tables_node.each do |t|
168
+ # binding.pry
169
+ unless t.css("tr").first.text.include?("Admin")
170
+ tables_arr << t.css("tr").drop(1)
171
+ end
172
+ end
173
+ tables_arr
174
+ # binding.pry
175
+ end
176
+
177
+ end
@@ -0,0 +1,3 @@
1
+ module BarbershopContestants
2
+ VERSION = "0.3.0"
3
+ end
@@ -0,0 +1,22 @@
1
+ require_all "./lib"
2
+
3
+ # QUARTET_CHAMPS_SITE = "https://www.barbershopwiki.com/wiki/BHS_International_Quartet_Champions"
4
+
5
+ module BarbershopContestants
6
+ class Error < StandardError; end
7
+ # Your code goes here
8
+ end
9
+
10
+ # binding.pry
11
+ CLI.start
12
+ ### Remember this pattern: ###
13
+
14
+ # properties.each do |k, v|
15
+ # # k = properties
16
+ # # v = values
17
+ # class.send("#{k}=", v)
18
+ # end
19
+
20
+ # A line of text
21
+
22
+ # rake install local to