atp_scraper 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -10
- data/lib/atp_scraper/activity.rb +89 -99
- data/lib/atp_scraper/get.rb +18 -0
- data/lib/atp_scraper/ranking.rb +34 -44
- data/lib/atp_scraper/version.rb +1 -1
- data/lib/atp_scraper.rb +2 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ee413b367b7003e9748268c3e6ebac13a5758db0
|
4
|
+
data.tar.gz: dc366cb110b4063407919d9eb782f9f59af4ba7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ee1d39cc20915134812ce6d6f1781c3cf0a6eb11d878e3a4218df8b47ab502389c9c3acd23dfd144dc145836e4a434ad996757be4a067e7ed78aefa318573bd3
|
7
|
+
data.tar.gz: 89e81c1b0251913fcd810671425b6773fdb2173f7c14a81293f1715b883d05529e2794a4c5c51b49383a8ac6ba393a517394475b01583adb5738913a778b0d19
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
[](https://travis-ci.org/mosuke5/atp_scraper)
|
2
2
|
# AtpScraper
|
3
|
+
AtpScraper is a tool scraping tennis data from atpworldtour.com
|
3
4
|
|
4
5
|
## Installation
|
5
6
|
|
@@ -22,7 +23,7 @@ Or install it yourself as:
|
|
22
23
|
require "atp_scraper"
|
23
24
|
|
24
25
|
# Get Singles Ranking TOP100
|
25
|
-
AtpScraper::
|
26
|
+
AtpScraper::Get.singles_ranking
|
26
27
|
# Response Json
|
27
28
|
# {
|
28
29
|
# rannking: "5"
|
@@ -32,10 +33,10 @@ AtpScraper::Ranking.get
|
|
32
33
|
# }
|
33
34
|
|
34
35
|
# Get Singles Ranking 101-200
|
35
|
-
AtpScraper::
|
36
|
+
AtpScraper::Get.singles_ranking("101-200")
|
36
37
|
|
37
38
|
# Get Player Activity. For Example Rafael Nadal's activity in 2016
|
38
|
-
AtpScraper::
|
39
|
+
AtpScraper::Get.player_activity("n409", 2016)
|
39
40
|
# Response Json
|
40
41
|
# {
|
41
42
|
# year: 2016,
|
@@ -54,12 +55,6 @@ AtpScraper::Activity.get("n409", 2016)
|
|
54
55
|
# }
|
55
56
|
```
|
56
57
|
|
57
|
-
## Development
|
58
|
-
|
59
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
60
|
-
|
61
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
62
|
-
|
63
58
|
## Contributing
|
64
59
|
- Fork the project.
|
65
60
|
- Make your feature addition or bug fix, write tests.
|
@@ -68,6 +63,5 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
68
63
|
|
69
64
|
|
70
65
|
## License
|
71
|
-
|
72
66
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
73
67
|
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -14,120 +14,110 @@
|
|
14
14
|
module AtpScraper
|
15
15
|
# Scrape activity data
|
16
16
|
class Activity
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
pickup_activity_data(activity_doc)
|
22
|
-
end
|
23
|
-
|
24
|
-
def build_uri(player_id, year)
|
25
|
-
"/players/anything/#{player_id}/player-activity?year=#{year}"
|
26
|
-
end
|
17
|
+
def pickup_activity_data(activity_doc)
|
18
|
+
result = []
|
19
|
+
player = {}
|
20
|
+
player[:name] = pickup_player_name(activity_doc)
|
27
21
|
|
28
|
-
|
29
|
-
|
30
|
-
player =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
player[:rank] = pickup_player_rank(tournament[:caption])
|
36
|
-
search_records_doc(tournament_doc).each do |record_doc|
|
37
|
-
record = pickup_record(record_doc)
|
38
|
-
record_hash = create_record(record, player, tournament)
|
39
|
-
result.push(record_hash)
|
40
|
-
end
|
22
|
+
search_tournaments_doc(activity_doc).each do |tournament_doc|
|
23
|
+
tournament = pickup_tournament_info(tournament_doc)
|
24
|
+
player[:rank] = pickup_player_rank(tournament[:caption])
|
25
|
+
search_records_doc(tournament_doc).each do |record_doc|
|
26
|
+
record = pickup_record(record_doc)
|
27
|
+
record_hash = create_record(record, player, tournament)
|
28
|
+
result.push(record_hash)
|
41
29
|
end
|
42
|
-
result
|
43
30
|
end
|
31
|
+
result
|
32
|
+
end
|
44
33
|
|
45
|
-
|
46
|
-
activity_doc.css(".activity-tournament-table")
|
47
|
-
end
|
34
|
+
private
|
48
35
|
|
49
|
-
|
50
|
-
|
51
|
-
|
36
|
+
def search_tournaments_doc(activity_doc)
|
37
|
+
activity_doc.css(".activity-tournament-table")
|
38
|
+
end
|
52
39
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
player_name: player[:name],
|
57
|
-
player_rank: player[:rank],
|
58
|
-
opponent_name: record[:opponent_name],
|
59
|
-
opponent_rank: record[:opponent_rank],
|
60
|
-
round: record[:round],
|
61
|
-
score: record[:score],
|
62
|
-
win_loss: record[:win_loss],
|
63
|
-
tournament_name: tournament[:name],
|
64
|
-
tournament_location: tournament[:location],
|
65
|
-
tournament_start_date: tournament[:date][:start],
|
66
|
-
tournament_end_date: tournament[:date][:end],
|
67
|
-
tournament_surface: tournament[:surface]
|
68
|
-
}
|
69
|
-
end
|
40
|
+
def search_records_doc(tournament_doc)
|
41
|
+
tournament_doc.css(".mega-table tbody tr")
|
42
|
+
end
|
70
43
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
44
|
+
def create_record(record, player, tournament)
|
45
|
+
{
|
46
|
+
year: tournament[:year],
|
47
|
+
player_name: player[:name],
|
48
|
+
player_rank: player[:rank],
|
49
|
+
opponent_name: record[:opponent_name],
|
50
|
+
opponent_rank: record[:opponent_rank],
|
51
|
+
round: record[:round],
|
52
|
+
score: record[:score],
|
53
|
+
win_loss: record[:win_loss],
|
54
|
+
tournament_name: tournament[:name],
|
55
|
+
tournament_location: tournament[:location],
|
56
|
+
tournament_start_date: tournament[:date][:start],
|
57
|
+
tournament_end_date: tournament[:date][:end],
|
58
|
+
tournament_surface: tournament[:surface]
|
59
|
+
}
|
60
|
+
end
|
76
61
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
62
|
+
def pickup_player_name(activity_doc)
|
63
|
+
activity_doc
|
64
|
+
.css("meta[property=\"pageTransitionTitle\"]")
|
65
|
+
.attr("content").value
|
66
|
+
end
|
67
|
+
|
68
|
+
def pickup_record(record_doc)
|
69
|
+
result = {}
|
70
|
+
record_doc.css("td").each_with_index do |td, n|
|
71
|
+
record_content = td.content.strip
|
72
|
+
case n
|
73
|
+
when 0 then
|
74
|
+
result[:round] = record_content
|
75
|
+
when 1 then
|
76
|
+
result[:opponent_rank] = record_content
|
77
|
+
when 2 then
|
78
|
+
result[:opponent_name] = record_content
|
79
|
+
when 3 then
|
80
|
+
result[:win_loss] = record_content
|
81
|
+
when 4 then
|
82
|
+
result[:score] = record_content
|
93
83
|
end
|
94
|
-
result
|
95
84
|
end
|
85
|
+
result
|
86
|
+
end
|
96
87
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
88
|
+
def pickup_tournament_info(tournament_doc)
|
89
|
+
tournament_date = pickup_text(tournament_doc, ".tourney-dates")
|
90
|
+
{
|
91
|
+
name: pickup_text(tournament_doc, ".tourney-title"),
|
92
|
+
location: pickup_text(tournament_doc, ".tourney-location"),
|
93
|
+
date: divide_tournament_date(tournament_date),
|
94
|
+
year: tournament_date[0, 4],
|
95
|
+
caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
|
96
|
+
surface: pickup_surface(tournament_doc)
|
97
|
+
}
|
98
|
+
end
|
108
99
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
def pickup_player_rank(tournament_caption)
|
101
|
+
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
102
|
+
rank[1].strip
|
103
|
+
end
|
113
104
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
105
|
+
# Before: String "2011.01.03 - 2011.01.08"
|
106
|
+
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
107
|
+
def divide_tournament_date(date)
|
108
|
+
date = date.split('-').map(&:strip)
|
109
|
+
{ start: date[0], end: date[1] }
|
110
|
+
end
|
120
111
|
|
121
|
-
|
122
|
-
|
123
|
-
|
112
|
+
def pickup_text(doc, selector)
|
113
|
+
doc.css(selector).first.content.strip
|
114
|
+
end
|
124
115
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
end
|
116
|
+
def pickup_surface(tournament_doc)
|
117
|
+
tournament_doc
|
118
|
+
.css(".tourney-details")[1]
|
119
|
+
.css(".item-details")
|
120
|
+
.first.content.gsub(/\t|\s/, "")
|
131
121
|
end
|
132
122
|
end
|
133
123
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module AtpScraper
|
2
|
+
# Main class
|
3
|
+
class Get
|
4
|
+
def self.singles_ranking(rank_range = nil)
|
5
|
+
request_uri = "/en/rankings/singles?rankRange=#{rank_range}"
|
6
|
+
ranking_doc = AtpScraper::Html.get_and_parse(request_uri)
|
7
|
+
ranking = AtpScraper::Ranking.new
|
8
|
+
ranking.pickup_ranking_data(ranking_doc)
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.player_activity(player_id, year)
|
12
|
+
request_uri = "/players/anything/#{player_id}/player-activity?year=#{year}"
|
13
|
+
activity_doc = AtpScraper::Html.get_and_parse(request_uri)
|
14
|
+
activity = AtpScraper::Activity.new
|
15
|
+
activity.pickup_activity_data(activity_doc)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/atp_scraper/ranking.rb
CHANGED
@@ -9,59 +9,49 @@
|
|
9
9
|
module AtpScraper
|
10
10
|
# Scrape ranking data
|
11
11
|
class Ranking
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
pickup_ranking_data(ranking_doc)
|
12
|
+
def pickup_ranking_data(ranking_doc)
|
13
|
+
result = []
|
14
|
+
search_player_doc(ranking_doc).each do |player_doc|
|
15
|
+
result.push(pickup_player_data(player_doc))
|
17
16
|
end
|
17
|
+
result
|
18
|
+
end
|
18
19
|
|
19
|
-
|
20
|
-
"/en/rankings/singles?rankRange=#{rank_range}"
|
21
|
-
end
|
20
|
+
private
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
result.push(pickup_player_data(player_doc))
|
27
|
-
end
|
28
|
-
result
|
29
|
-
end
|
30
|
-
|
31
|
-
def search_player_doc(ranking_doc)
|
32
|
-
ranking_doc.css(".mega-table tbody tr")
|
33
|
-
end
|
22
|
+
def search_player_doc(ranking_doc)
|
23
|
+
ranking_doc.css(".mega-table tbody tr")
|
24
|
+
end
|
34
25
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
26
|
+
def pickup_player_data(player_doc)
|
27
|
+
url = pickup_player_url(player_doc)
|
28
|
+
{
|
29
|
+
ranking: pickup_player_rank(player_doc),
|
30
|
+
player_name: pickup_player_name(player_doc),
|
31
|
+
player_url_name: get_url_name(url),
|
32
|
+
player_id: get_url_id(url)
|
33
|
+
}
|
34
|
+
end
|
44
35
|
|
45
|
-
|
46
|
-
|
47
|
-
|
36
|
+
def pickup_player_rank(player_doc)
|
37
|
+
player_doc.css(".rank-cell").first.content.strip
|
38
|
+
end
|
48
39
|
|
49
|
-
|
50
|
-
|
51
|
-
|
40
|
+
def pickup_player_name(player_doc)
|
41
|
+
player_doc.css(".player-cell").first.content.strip
|
42
|
+
end
|
52
43
|
|
53
|
-
|
54
|
-
|
55
|
-
|
44
|
+
def pickup_player_url(player_doc)
|
45
|
+
player_doc.css(".player-cell a").attr("href").value
|
46
|
+
end
|
56
47
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
48
|
+
# url => "/en/players/rafael-nadal/n409/overview"
|
49
|
+
def get_url_name(url)
|
50
|
+
url.split("/")[3]
|
51
|
+
end
|
61
52
|
|
62
|
-
|
63
|
-
|
64
|
-
end
|
53
|
+
def get_url_id(url)
|
54
|
+
url.split("/")[4]
|
65
55
|
end
|
66
56
|
end
|
67
57
|
end
|
data/lib/atp_scraper/version.rb
CHANGED
data/lib/atp_scraper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -142,6 +142,7 @@ files:
|
|
142
142
|
- bin/setup
|
143
143
|
- lib/atp_scraper.rb
|
144
144
|
- lib/atp_scraper/activity.rb
|
145
|
+
- lib/atp_scraper/get.rb
|
145
146
|
- lib/atp_scraper/html.rb
|
146
147
|
- lib/atp_scraper/ranking.rb
|
147
148
|
- lib/atp_scraper/version.rb
|