atp_scraper 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/Guardfile +4 -0
- data/README.md +4 -2
- data/atp_scraper.gemspec +2 -0
- data/lib/atp_scraper/activities/record.rb +24 -28
- data/lib/atp_scraper/activities/tournament.rb +59 -63
- data/lib/atp_scraper/activity.rb +14 -9
- data/lib/atp_scraper/ranking.rb +5 -4
- data/lib/atp_scraper/version.rb +1 -1
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: baa37248033947425c520cd786e492aa009ed6e9
|
4
|
+
data.tar.gz: a92c8e3d56b0e5557ee2bf92ffca9b604c3d0932
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 226b5cd0ef6a8b8f40f40442cb36e098cb72298c727b674944a36e565755084e78704290ed74d3da9179f6417625844e56f96a291205a3d202b73f1f1627368c
|
7
|
+
data.tar.gz: a139dccfacf13aa459b91f1fbf7279faef51cbd0e8da0143d05d0ae767ffda0b7ff590772dc38b5900e2175687215b8dda90ff0712999a2cfed83d948d239bda
|
data/.travis.yml
CHANGED
data/Guardfile
ADDED
data/README.md
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
# AtpScraper
|
4
4
|
AtpScraper is a tool scraping tennis data from atpworldtour.com
|
5
5
|
|
6
|
+
[https://rubygems.org/gems/atp_scraper]
|
7
|
+
|
6
8
|
## Installation
|
7
9
|
|
8
10
|
Add this line to your application's Gemfile:
|
@@ -25,7 +27,7 @@ require "atp_scraper"
|
|
25
27
|
|
26
28
|
# Get Singles Ranking TOP100
|
27
29
|
AtpScraper::Get.singles_ranking
|
28
|
-
# Response
|
30
|
+
# Response
|
29
31
|
# {
|
30
32
|
# rannking: "5"
|
31
33
|
# player_name: "Rafael Nadal",
|
@@ -39,7 +41,7 @@ AtpScraper::Get.singles_ranking("101-200")
|
|
39
41
|
|
40
42
|
# Get Player Activity. For Example Rafael Nadal's activity in 2016
|
41
43
|
AtpScraper::Get.player_activity("n409", 2016)
|
42
|
-
# Response
|
44
|
+
# Response
|
43
45
|
# {
|
44
46
|
# year: "2016",
|
45
47
|
# player_name: "Rafael Nadal",
|
data/atp_scraper.gemspec
CHANGED
@@ -27,4 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency "pry"
|
28
28
|
spec.add_development_dependency "rb-readline"
|
29
29
|
spec.add_development_dependency "rubocop"
|
30
|
+
spec.add_development_dependency "guard"
|
31
|
+
spec.add_development_dependency "guard-minitest"
|
30
32
|
end
|
@@ -1,34 +1,30 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def info
|
9
|
-
pickup_record(@record)
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
1
|
+
module AtpScraper
|
2
|
+
module Activities
|
3
|
+
# Activity Record Class
|
4
|
+
class Record
|
5
|
+
def initialize(doc)
|
6
|
+
@record = doc
|
7
|
+
end
|
13
8
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
def get
|
10
|
+
result = {}
|
11
|
+
@record.css("td").each_with_index do |td, n|
|
12
|
+
record_content = td.content.strip
|
13
|
+
case n
|
14
|
+
when 0 then
|
15
|
+
result[:round] = record_content
|
16
|
+
when 1 then
|
17
|
+
result[:opponent_rank] = record_content
|
18
|
+
when 2 then
|
19
|
+
result[:opponent_name] = record_content
|
20
|
+
when 3 then
|
21
|
+
result[:win_loss] = record_content
|
22
|
+
when 4 then
|
23
|
+
result[:score] = record_content
|
24
|
+
end
|
29
25
|
end
|
26
|
+
result
|
30
27
|
end
|
31
|
-
result
|
32
28
|
end
|
33
29
|
end
|
34
30
|
end
|
@@ -1,75 +1,71 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module AtpScraper
|
2
|
+
module Activities
|
3
|
+
# Activity Tournamnet Class
|
4
|
+
class Tournament
|
5
|
+
def initialize(doc)
|
6
|
+
@tournament = doc
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
# Return tournament data
|
10
|
+
def get
|
11
|
+
tournament_date = pickup_text(".tourney-dates")
|
12
|
+
surface = pickup_surface
|
13
|
+
caption = pickup_text(".activity-tournament-caption")
|
14
|
+
{
|
15
|
+
name: pickup_text(".tourney-title"),
|
16
|
+
category: pickup_category,
|
17
|
+
location: pickup_text(".tourney-location"),
|
18
|
+
date: divide_tournament_date(tournament_date),
|
19
|
+
year: tournament_date[0, 4],
|
20
|
+
surface: surface[:surface],
|
21
|
+
surface_inout: surface[:inout],
|
22
|
+
ranking: pickup_player_rank(caption)
|
23
|
+
}
|
24
|
+
end
|
11
25
|
|
12
|
-
|
13
|
-
|
14
|
-
|
26
|
+
# Return records in this tournament
|
27
|
+
def records
|
28
|
+
@tournament.css(".mega-table tbody tr")
|
29
|
+
end
|
15
30
|
|
16
|
-
|
31
|
+
private
|
17
32
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
category: pickup_category,
|
25
|
-
location: pickup_text(".tourney-location"),
|
26
|
-
date: divide_tournament_date(tournament_date),
|
27
|
-
year: tournament_date[0, 4],
|
28
|
-
surface: surface[:surface],
|
29
|
-
surface_inout: surface[:inout],
|
30
|
-
ranking: pickup_player_rank(caption)
|
31
|
-
}
|
32
|
-
end
|
33
|
+
# Before: String "2011.01.03 - 2011.01.08"
|
34
|
+
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
35
|
+
def divide_tournament_date(date)
|
36
|
+
date = date.split('-').map(&:strip)
|
37
|
+
{ start: date[0], end: date[1] }
|
38
|
+
end
|
33
39
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
date = date.split('-').map(&:strip)
|
38
|
-
{ start: date[0], end: date[1] }
|
39
|
-
end
|
40
|
+
def pickup_text(selector)
|
41
|
+
@tournament.css(selector).first.content.strip
|
42
|
+
end
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
def pickup_category
|
45
|
+
# ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
|
46
|
+
badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
|
47
|
+
badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
|
48
|
+
end
|
44
49
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
+
def pickup_surface
|
51
|
+
surface = @tournament
|
52
|
+
.css(".tourney-details")[1]
|
53
|
+
.css(".item-details")
|
54
|
+
.first.content.gsub(/\t|\s/, "")
|
55
|
+
divide_surface(surface)
|
56
|
+
end
|
50
57
|
|
51
|
-
|
52
|
-
surface
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
60
|
-
def divide_surface(surface)
|
61
|
-
inout = surface.match(/^(Outdoor|Indoor)/)
|
62
|
-
return { surface: surface, inout: nil } if inout.nil?
|
63
|
-
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
64
|
-
end
|
65
|
-
|
66
|
-
def pickup_player_rank(tournament_caption)
|
67
|
-
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
68
|
-
rank[1].strip
|
69
|
-
end
|
58
|
+
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
59
|
+
def divide_surface(surface)
|
60
|
+
inout = surface.match(/^(Outdoor|Indoor)/)
|
61
|
+
return { surface: surface, inout: nil } if inout.nil?
|
62
|
+
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
63
|
+
end
|
70
64
|
|
71
|
-
|
72
|
-
|
65
|
+
def pickup_player_rank(tournament_caption)
|
66
|
+
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
67
|
+
rank[1].strip
|
68
|
+
end
|
73
69
|
end
|
74
70
|
end
|
75
71
|
end
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -19,17 +19,17 @@ module AtpScraper
|
|
19
19
|
include Activities
|
20
20
|
def initialize(html, html_charset = 'utf-8')
|
21
21
|
@activity_doc = AtpScraper::Html.parse(html, html_charset)
|
22
|
-
@player_name = pickup_player_name
|
22
|
+
@player_name = pickup_player_name
|
23
23
|
end
|
24
24
|
|
25
25
|
def pickup_activity_data
|
26
26
|
result = []
|
27
27
|
|
28
|
-
search_tournaments_doc
|
28
|
+
search_tournaments_doc.each do |tournament_doc|
|
29
29
|
tournament = Tournament.new(tournament_doc)
|
30
30
|
tournament.records.each do |record_doc|
|
31
31
|
record = Record.new(record_doc)
|
32
|
-
record_hash = create_record(record.
|
32
|
+
record_hash = create_record(record.get, tournament.get)
|
33
33
|
result.push(record_hash)
|
34
34
|
end
|
35
35
|
end
|
@@ -38,17 +38,17 @@ module AtpScraper
|
|
38
38
|
|
39
39
|
private
|
40
40
|
|
41
|
-
def search_tournaments_doc
|
42
|
-
activity_doc.css(".activity-tournament-table")
|
41
|
+
def search_tournaments_doc
|
42
|
+
@activity_doc.css(".activity-tournament-table")
|
43
43
|
end
|
44
44
|
|
45
45
|
def create_record(record, tournament)
|
46
46
|
{
|
47
47
|
year: tournament[:year],
|
48
48
|
player_name: @player_name,
|
49
|
-
player_rank: tournament[:ranking],
|
49
|
+
player_rank: convert_ranking(tournament[:ranking]),
|
50
50
|
opponent_name: record[:opponent_name],
|
51
|
-
opponent_rank: record[:opponent_rank],
|
51
|
+
opponent_rank: convert_ranking(record[:opponent_rank]),
|
52
52
|
round: record[:round],
|
53
53
|
score: record[:score],
|
54
54
|
win_loss: record[:win_loss],
|
@@ -62,10 +62,15 @@ module AtpScraper
|
|
62
62
|
}
|
63
63
|
end
|
64
64
|
|
65
|
-
def pickup_player_name
|
66
|
-
activity_doc
|
65
|
+
def pickup_player_name
|
66
|
+
@activity_doc
|
67
67
|
.css("meta[property=\"pageTransitionTitle\"]")
|
68
68
|
.attr("content").value
|
69
69
|
end
|
70
|
+
|
71
|
+
def convert_ranking(ranking)
|
72
|
+
return nil if ranking == '-'
|
73
|
+
ranking
|
74
|
+
end
|
70
75
|
end
|
71
76
|
end
|
data/lib/atp_scraper/ranking.rb
CHANGED
@@ -15,7 +15,7 @@ module AtpScraper
|
|
15
15
|
|
16
16
|
def pickup_ranking_data
|
17
17
|
result = []
|
18
|
-
search_player_doc
|
18
|
+
search_player_doc.each do |player_doc|
|
19
19
|
result.push(pickup_player_data(player_doc))
|
20
20
|
end
|
21
21
|
result
|
@@ -23,8 +23,8 @@ module AtpScraper
|
|
23
23
|
|
24
24
|
private
|
25
25
|
|
26
|
-
def search_player_doc
|
27
|
-
ranking_doc.css(".mega-table tbody tr")
|
26
|
+
def search_player_doc
|
27
|
+
@ranking_doc.css(".mega-table tbody tr")
|
28
28
|
end
|
29
29
|
|
30
30
|
def pickup_player_data(player_doc)
|
@@ -55,11 +55,12 @@ module AtpScraper
|
|
55
55
|
player_doc.css(".points-cell").first.content.strip.delete(',')
|
56
56
|
end
|
57
57
|
|
58
|
-
#
|
58
|
+
# "/en/players/rafael-nadal/n409/overview" => "rafael-nadal"
|
59
59
|
def get_url_name(url)
|
60
60
|
url.split("/")[3]
|
61
61
|
end
|
62
62
|
|
63
|
+
# "/en/players/rafael-nadal/n409/overview" => "n409"
|
63
64
|
def get_url_id(url)
|
64
65
|
url.split("/")[4]
|
65
66
|
end
|
data/lib/atp_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -122,6 +122,34 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: guard
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: guard-minitest
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
125
153
|
description: atp_scraper is a tool scraping tennis data from atpworldtour.com
|
126
154
|
email:
|
127
155
|
- w.vamos603@gmail.com
|
@@ -134,6 +162,7 @@ files:
|
|
134
162
|
- ".travis.yml"
|
135
163
|
- CODE_OF_CONDUCT.md
|
136
164
|
- Gemfile
|
165
|
+
- Guardfile
|
137
166
|
- LICENSE.txt
|
138
167
|
- README.md
|
139
168
|
- Rakefile
|