korfzone-scraper 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +5 -0
  3. data/.simplecov +3 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +64 -0
  7. data/Rakefile +4 -0
  8. data/korfzone-scraper.gemspec +28 -0
  9. data/lib/korfzone/scraper/block_page.rb +39 -0
  10. data/lib/korfzone/scraper/categories.rb +26 -0
  11. data/lib/korfzone/scraper/games_page.rb +112 -0
  12. data/lib/korfzone/scraper/page.rb +47 -0
  13. data/lib/korfzone/scraper/version.rb +5 -0
  14. data/lib/korfzone/scraper.rb +38 -0
  15. data/spec/korfzone/scraper/page_spec.rb +224 -0
  16. data/spec/spec_helper.rb +4 -0
  17. data/spec/support/files/beta-Wedstrijden-senioren-veld-V01.html +1031 -0
  18. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011001.html +790 -0
  19. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011002.html +794 -0
  20. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011003.html +788 -0
  21. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011004.html +794 -0
  22. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011005.html +793 -0
  23. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011006.html +797 -0
  24. data/spec/support/files/beta-Wedstrijden-senioren-veld-V011007.html +793 -0
  25. data/spec/support/files/beta-Wedstrijden-senioren-veld.html +721 -0
  26. data/spec/support/files/games.yml +125 -0
  27. data/spec/support/games/V011001.yml +15 -0
  28. data/spec/support/games/V011002.yml +16 -0
  29. data/spec/support/games/V011003.yml +15 -0
  30. data/spec/support/games/V011004.yml +2 -0
  31. data/spec/support/games/V011005.yml +18 -0
  32. data/spec/support/games/V011006.yml +19 -0
  33. data/spec/support/games/V011007.yml +19 -0
  34. data/spec/support/games/V011008.yml +18 -0
  35. data/spec/support/open_mock.rb +16 -0
  36. metadata +172 -0
@@ -0,0 +1,125 @@
1
+ ---
2
+ - code: V011001
3
+ location:
4
+ - Riviera
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/12
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Riviera
12
+ - Temse
13
+ division: "1ste klasse A"
14
+ category: :senioren
15
+ competition: veld
16
+ - code: V011002
17
+ location:
18
+ - Ganda
19
+ - !ruby/object:URI::HTTP
20
+ scheme: http
21
+ host: www.korfbal.be
22
+ path: /beta/Locatie/14
23
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
24
+ teams:
25
+ - Ganda
26
+ - Borgerhout/GW
27
+ division: "1ste klasse A"
28
+ category: :senioren
29
+ competition: veld
30
+ - code: V011003
31
+ location:
32
+ - ASKC
33
+ - !ruby/object:URI::HTTP
34
+ scheme: http
35
+ host: www.korfbal.be
36
+ path: /beta/Locatie/43
37
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
38
+ teams:
39
+ - ASKC
40
+ - Boeckenberg
41
+ division: "1ste klasse A"
42
+ category: :senioren
43
+ competition: veld
44
+ - code: V011004
45
+ location:
46
+ - Vobako
47
+ - !ruby/object:URI::HTTP
48
+ scheme: http
49
+ host: www.korfbal.be
50
+ path: /beta/Locatie/32
51
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
52
+ teams:
53
+ - Vobako
54
+ - Scaldis
55
+ division: "1ste klasse A"
56
+ category: :senioren
57
+ competition: veld
58
+ - code: V011005
59
+ location:
60
+ - Kwik
61
+ - !ruby/object:URI::HTTP
62
+ scheme: http
63
+ host: www.korfbal.be
64
+ path: /beta/Locatie/25
65
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
66
+ teams:
67
+ - Kwik
68
+ - Minerva
69
+ scores:
70
+ - 0
71
+ - 0
72
+ division: "1ste klasse B"
73
+ category: :senioren
74
+ competition: veld
75
+ - code: V011006
76
+ location:
77
+ - Voorwaarts
78
+ - !ruby/object:URI::HTTP
79
+ scheme: http
80
+ host: www.korfbal.be
81
+ path: /beta/Locatie/9
82
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
83
+ teams:
84
+ - Voorwaarts
85
+ - Leuven
86
+ scores:
87
+ - 2
88
+ - 10
89
+ division: "1ste klasse B"
90
+ category: :senioren
91
+ competition: veld
92
+ - code: V011007
93
+ location:
94
+ - Catba
95
+ - !ruby/object:URI::HTTP
96
+ scheme: http
97
+ host: www.korfbal.be
98
+ path: /beta/Locatie/24
99
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
100
+ teams:
101
+ - Catba
102
+ - AKC
103
+ scores:
104
+ - 4
105
+ - 9
106
+ division: "1ste klasse B"
107
+ category: :senioren
108
+ competition: veld
109
+ - code: V011008
110
+ location:
111
+ - Sikopi
112
+ - !ruby/object:URI::HTTP
113
+ scheme: http
114
+ host: www.korfbal.be
115
+ path: /beta/Locatie/30
116
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
117
+ teams:
118
+ - Sikopi
119
+ - Meeuwen
120
+ scores:
121
+ - 0
122
+ - 5
123
+ division: "1ste klasse B"
124
+ category: :senioren
125
+ competition: veld
@@ -0,0 +1,15 @@
1
+ ---
2
+ code: V011001
3
+ location:
4
+ - Riviera
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/12
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Riviera
12
+ - Temse
13
+ division: 1ste klasse A
14
+ category: senioren
15
+ competition: veld
@@ -0,0 +1,16 @@
1
+ ---
2
+ code: V011002
3
+ location:
4
+ - Ganda
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/14
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ referee: Fluit Opzak
11
+ teams:
12
+ - Ganda
13
+ - Borgerhout/GW
14
+ division: 1ste klasse A
15
+ category: senioren
16
+ competition: veld
@@ -0,0 +1,15 @@
1
+ ---
2
+ code: V011003
3
+ location:
4
+ - ASKC
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/43
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - ASKC
12
+ - Boeckenberg
13
+ division: 1ste klasse A
14
+ category: senioren
15
+ competition: veld
@@ -0,0 +1,2 @@
1
+ ---
2
+ code: V011004
@@ -0,0 +1,18 @@
1
+ ---
2
+ code: V011005
3
+ location:
4
+ - Kwik
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/25
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Kwik
12
+ - Minerva
13
+ scores:
14
+ - 0
15
+ - 0
16
+ division: 1ste klasse B
17
+ category: senioren
18
+ competition: veld
@@ -0,0 +1,19 @@
1
+ ---
2
+ code: V011006
3
+ location:
4
+ - Voorwaarts
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/9
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Voorwaarts
12
+ - Leuven
13
+ referee: Fluit Opzak
14
+ scores:
15
+ - 2
16
+ - 10
17
+ division: 1ste klasse B
18
+ category: senioren
19
+ competition: veld
@@ -0,0 +1,19 @@
1
+ ---
2
+ code: V011007
3
+ location:
4
+ - Catba
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/24
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Catba
12
+ - AKC
13
+ scores:
14
+ - 0
15
+ - 5
16
+ forfait: true
17
+ division: 1ste klasse B
18
+ category: senioren
19
+ competition: veld
@@ -0,0 +1,18 @@
1
+ ---
2
+ code: V011008
3
+ location:
4
+ - Sikopi
5
+ - !ruby/object:URI::HTTP
6
+ scheme: http
7
+ host: www.korfbal.be
8
+ path: /beta/Locatie/30
9
+ starts_at: 2013-09-01 15:30:00.000000000 +02:00
10
+ teams:
11
+ - Sikopi
12
+ - Meeuwen
13
+ scores:
14
+ - 0
15
+ - 5
16
+ division: 1ste klasse B
17
+ category: :senioren
18
+ competition: veld
@@ -0,0 +1,16 @@
1
+ require 'YAML'
2
+
3
+ def stub_korfzone_fetch( uri )
4
+ path = URI( uri.to_s ).path.gsub( /^\//, '' ).gsub( '/', '-' ) + '.html'
5
+ file_path = File.join( File.dirname( __FILE__ ), 'files', path )
6
+
7
+ Korfzone::Scraper.should_receive( :fetch ).with( URI( uri.to_s ) ).and_return( open( file_path ) )
8
+ end
9
+
10
+ def load_game( code )
11
+ game = YAML::load_file( File.join( File.dirname( __FILE__ ), 'games', "#{code}.yml" ) )
12
+ uri = "http://www.korfbal.be/beta/Wedstrijden/senioren/veld/#{code}"
13
+ [ uri, game ]
14
+ end
15
+
16
+ BLOCK_URIS = (1..22).map { |i| URI( "http://www.korfbal.be/beta/Wedstrijden/senioren/veld/V#{i.to_s.rjust(2,'0')}") }
metadata ADDED
@@ -0,0 +1,172 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: korfzone-scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Rutger Claes
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-07-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: debugger
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rspec
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: simplecov
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Scrape the KBKB korfbal.be website
84
+ email:
85
+ - rutger@korfzone.be
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - .gitignore
91
+ - .simplecov
92
+ - Gemfile
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - korfzone-scraper.gemspec
97
+ - lib/korfzone/scraper.rb
98
+ - lib/korfzone/scraper/block_page.rb
99
+ - lib/korfzone/scraper/categories.rb
100
+ - lib/korfzone/scraper/games_page.rb
101
+ - lib/korfzone/scraper/page.rb
102
+ - lib/korfzone/scraper/version.rb
103
+ - spec/korfzone/scraper/page_spec.rb
104
+ - spec/spec_helper.rb
105
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V01.html
106
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011001.html
107
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011002.html
108
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011003.html
109
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011004.html
110
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011005.html
111
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011006.html
112
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011007.html
113
+ - spec/support/files/beta-Wedstrijden-senioren-veld.html
114
+ - spec/support/files/games.yml
115
+ - spec/support/games/V011001.yml
116
+ - spec/support/games/V011002.yml
117
+ - spec/support/games/V011003.yml
118
+ - spec/support/games/V011004.yml
119
+ - spec/support/games/V011005.yml
120
+ - spec/support/games/V011006.yml
121
+ - spec/support/games/V011007.yml
122
+ - spec/support/games/V011008.yml
123
+ - spec/support/open_mock.rb
124
+ homepage: http://github.org/korfzone/korfzone-scraper
125
+ licenses:
126
+ - MIT
127
+ metadata: {}
128
+ post_install_message:
129
+ rdoc_options: []
130
+ require_paths:
131
+ - lib
132
+ required_ruby_version: !ruby/object:Gem::Requirement
133
+ requirements:
134
+ - - '>='
135
+ - !ruby/object:Gem::Version
136
+ version: '0'
137
+ required_rubygems_version: !ruby/object:Gem::Requirement
138
+ requirements:
139
+ - - '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ requirements: []
143
+ rubyforge_project:
144
+ rubygems_version: 2.0.0
145
+ signing_key:
146
+ specification_version: 4
147
+ summary: The code needed to scrape all games of the KBKB website (korfbal.be) extracted
148
+ from the Korfzone project. This code is expected to break whenever the KBKB updates
149
+ its website.
150
+ test_files:
151
+ - spec/korfzone/scraper/page_spec.rb
152
+ - spec/spec_helper.rb
153
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V01.html
154
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011001.html
155
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011002.html
156
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011003.html
157
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011004.html
158
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011005.html
159
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011006.html
160
+ - spec/support/files/beta-Wedstrijden-senioren-veld-V011007.html
161
+ - spec/support/files/beta-Wedstrijden-senioren-veld.html
162
+ - spec/support/files/games.yml
163
+ - spec/support/games/V011001.yml
164
+ - spec/support/games/V011002.yml
165
+ - spec/support/games/V011003.yml
166
+ - spec/support/games/V011004.yml
167
+ - spec/support/games/V011005.yml
168
+ - spec/support/games/V011006.yml
169
+ - spec/support/games/V011007.yml
170
+ - spec/support/games/V011008.yml
171
+ - spec/support/open_mock.rb
172
+ has_rdoc: