atp_scraper 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -2
- data/Guardfile +4 -0
- data/README.md +4 -2
- data/atp_scraper.gemspec +2 -0
- data/lib/atp_scraper/activities/record.rb +24 -28
- data/lib/atp_scraper/activities/tournament.rb +59 -63
- data/lib/atp_scraper/activity.rb +14 -9
- data/lib/atp_scraper/ranking.rb +5 -4
- data/lib/atp_scraper/version.rb +1 -1
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: baa37248033947425c520cd786e492aa009ed6e9
|
4
|
+
data.tar.gz: a92c8e3d56b0e5557ee2bf92ffca9b604c3d0932
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 226b5cd0ef6a8b8f40f40442cb36e098cb72298c727b674944a36e565755084e78704290ed74d3da9179f6417625844e56f96a291205a3d202b73f1f1627368c
|
7
|
+
data.tar.gz: a139dccfacf13aa459b91f1fbf7279faef51cbd0e8da0143d05d0ae767ffda0b7ff590772dc38b5900e2175687215b8dda90ff0712999a2cfed83d948d239bda
|
data/.travis.yml
CHANGED
data/Guardfile
ADDED
data/README.md
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
# AtpScraper
|
4
4
|
AtpScraper is a tool scraping tennis data from atpworldtour.com
|
5
5
|
|
6
|
+
[https://rubygems.org/gems/atp_scraper]
|
7
|
+
|
6
8
|
## Installation
|
7
9
|
|
8
10
|
Add this line to your application's Gemfile:
|
@@ -25,7 +27,7 @@ require "atp_scraper"
|
|
25
27
|
|
26
28
|
# Get Singles Ranking TOP100
|
27
29
|
AtpScraper::Get.singles_ranking
|
28
|
-
# Response
|
30
|
+
# Response
|
29
31
|
# {
|
30
32
|
# rannking: "5"
|
31
33
|
# player_name: "Rafael Nadal",
|
@@ -39,7 +41,7 @@ AtpScraper::Get.singles_ranking("101-200")
|
|
39
41
|
|
40
42
|
# Get Player Activity. For Example Rafael Nadal's activity in 2016
|
41
43
|
AtpScraper::Get.player_activity("n409", 2016)
|
42
|
-
# Response
|
44
|
+
# Response
|
43
45
|
# {
|
44
46
|
# year: "2016",
|
45
47
|
# player_name: "Rafael Nadal",
|
data/atp_scraper.gemspec
CHANGED
@@ -27,4 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.add_development_dependency "pry"
|
28
28
|
spec.add_development_dependency "rb-readline"
|
29
29
|
spec.add_development_dependency "rubocop"
|
30
|
+
spec.add_development_dependency "guard"
|
31
|
+
spec.add_development_dependency "guard-minitest"
|
30
32
|
end
|
@@ -1,34 +1,30 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
def info
|
9
|
-
pickup_record(@record)
|
10
|
-
end
|
11
|
-
|
12
|
-
private
|
1
|
+
module AtpScraper
|
2
|
+
module Activities
|
3
|
+
# Activity Record Class
|
4
|
+
class Record
|
5
|
+
def initialize(doc)
|
6
|
+
@record = doc
|
7
|
+
end
|
13
8
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
def get
|
10
|
+
result = {}
|
11
|
+
@record.css("td").each_with_index do |td, n|
|
12
|
+
record_content = td.content.strip
|
13
|
+
case n
|
14
|
+
when 0 then
|
15
|
+
result[:round] = record_content
|
16
|
+
when 1 then
|
17
|
+
result[:opponent_rank] = record_content
|
18
|
+
when 2 then
|
19
|
+
result[:opponent_name] = record_content
|
20
|
+
when 3 then
|
21
|
+
result[:win_loss] = record_content
|
22
|
+
when 4 then
|
23
|
+
result[:score] = record_content
|
24
|
+
end
|
29
25
|
end
|
26
|
+
result
|
30
27
|
end
|
31
|
-
result
|
32
28
|
end
|
33
29
|
end
|
34
30
|
end
|
@@ -1,75 +1,71 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
module AtpScraper
|
2
|
+
module Activities
|
3
|
+
# Activity Tournamnet Class
|
4
|
+
class Tournament
|
5
|
+
def initialize(doc)
|
6
|
+
@tournament = doc
|
7
|
+
end
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
# Return tournament data
|
10
|
+
def get
|
11
|
+
tournament_date = pickup_text(".tourney-dates")
|
12
|
+
surface = pickup_surface
|
13
|
+
caption = pickup_text(".activity-tournament-caption")
|
14
|
+
{
|
15
|
+
name: pickup_text(".tourney-title"),
|
16
|
+
category: pickup_category,
|
17
|
+
location: pickup_text(".tourney-location"),
|
18
|
+
date: divide_tournament_date(tournament_date),
|
19
|
+
year: tournament_date[0, 4],
|
20
|
+
surface: surface[:surface],
|
21
|
+
surface_inout: surface[:inout],
|
22
|
+
ranking: pickup_player_rank(caption)
|
23
|
+
}
|
24
|
+
end
|
11
25
|
|
12
|
-
|
13
|
-
|
14
|
-
|
26
|
+
# Return records in this tournament
|
27
|
+
def records
|
28
|
+
@tournament.css(".mega-table tbody tr")
|
29
|
+
end
|
15
30
|
|
16
|
-
|
31
|
+
private
|
17
32
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
category: pickup_category,
|
25
|
-
location: pickup_text(".tourney-location"),
|
26
|
-
date: divide_tournament_date(tournament_date),
|
27
|
-
year: tournament_date[0, 4],
|
28
|
-
surface: surface[:surface],
|
29
|
-
surface_inout: surface[:inout],
|
30
|
-
ranking: pickup_player_rank(caption)
|
31
|
-
}
|
32
|
-
end
|
33
|
+
# Before: String "2011.01.03 - 2011.01.08"
|
34
|
+
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
35
|
+
def divide_tournament_date(date)
|
36
|
+
date = date.split('-').map(&:strip)
|
37
|
+
{ start: date[0], end: date[1] }
|
38
|
+
end
|
33
39
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
date = date.split('-').map(&:strip)
|
38
|
-
{ start: date[0], end: date[1] }
|
39
|
-
end
|
40
|
+
def pickup_text(selector)
|
41
|
+
@tournament.css(selector).first.content.strip
|
42
|
+
end
|
40
43
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
def pickup_category
|
45
|
+
# ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
|
46
|
+
badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
|
47
|
+
badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
|
48
|
+
end
|
44
49
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
+
def pickup_surface
|
51
|
+
surface = @tournament
|
52
|
+
.css(".tourney-details")[1]
|
53
|
+
.css(".item-details")
|
54
|
+
.first.content.gsub(/\t|\s/, "")
|
55
|
+
divide_surface(surface)
|
56
|
+
end
|
50
57
|
|
51
|
-
|
52
|
-
surface
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
end
|
58
|
-
|
59
|
-
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
60
|
-
def divide_surface(surface)
|
61
|
-
inout = surface.match(/^(Outdoor|Indoor)/)
|
62
|
-
return { surface: surface, inout: nil } if inout.nil?
|
63
|
-
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
64
|
-
end
|
65
|
-
|
66
|
-
def pickup_player_rank(tournament_caption)
|
67
|
-
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
68
|
-
rank[1].strip
|
69
|
-
end
|
58
|
+
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
59
|
+
def divide_surface(surface)
|
60
|
+
inout = surface.match(/^(Outdoor|Indoor)/)
|
61
|
+
return { surface: surface, inout: nil } if inout.nil?
|
62
|
+
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
63
|
+
end
|
70
64
|
|
71
|
-
|
72
|
-
|
65
|
+
def pickup_player_rank(tournament_caption)
|
66
|
+
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
67
|
+
rank[1].strip
|
68
|
+
end
|
73
69
|
end
|
74
70
|
end
|
75
71
|
end
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -19,17 +19,17 @@ module AtpScraper
|
|
19
19
|
include Activities
|
20
20
|
def initialize(html, html_charset = 'utf-8')
|
21
21
|
@activity_doc = AtpScraper::Html.parse(html, html_charset)
|
22
|
-
@player_name = pickup_player_name
|
22
|
+
@player_name = pickup_player_name
|
23
23
|
end
|
24
24
|
|
25
25
|
def pickup_activity_data
|
26
26
|
result = []
|
27
27
|
|
28
|
-
search_tournaments_doc
|
28
|
+
search_tournaments_doc.each do |tournament_doc|
|
29
29
|
tournament = Tournament.new(tournament_doc)
|
30
30
|
tournament.records.each do |record_doc|
|
31
31
|
record = Record.new(record_doc)
|
32
|
-
record_hash = create_record(record.
|
32
|
+
record_hash = create_record(record.get, tournament.get)
|
33
33
|
result.push(record_hash)
|
34
34
|
end
|
35
35
|
end
|
@@ -38,17 +38,17 @@ module AtpScraper
|
|
38
38
|
|
39
39
|
private
|
40
40
|
|
41
|
-
def search_tournaments_doc
|
42
|
-
activity_doc.css(".activity-tournament-table")
|
41
|
+
def search_tournaments_doc
|
42
|
+
@activity_doc.css(".activity-tournament-table")
|
43
43
|
end
|
44
44
|
|
45
45
|
def create_record(record, tournament)
|
46
46
|
{
|
47
47
|
year: tournament[:year],
|
48
48
|
player_name: @player_name,
|
49
|
-
player_rank: tournament[:ranking],
|
49
|
+
player_rank: convert_ranking(tournament[:ranking]),
|
50
50
|
opponent_name: record[:opponent_name],
|
51
|
-
opponent_rank: record[:opponent_rank],
|
51
|
+
opponent_rank: convert_ranking(record[:opponent_rank]),
|
52
52
|
round: record[:round],
|
53
53
|
score: record[:score],
|
54
54
|
win_loss: record[:win_loss],
|
@@ -62,10 +62,15 @@ module AtpScraper
|
|
62
62
|
}
|
63
63
|
end
|
64
64
|
|
65
|
-
def pickup_player_name
|
66
|
-
activity_doc
|
65
|
+
def pickup_player_name
|
66
|
+
@activity_doc
|
67
67
|
.css("meta[property=\"pageTransitionTitle\"]")
|
68
68
|
.attr("content").value
|
69
69
|
end
|
70
|
+
|
71
|
+
def convert_ranking(ranking)
|
72
|
+
return nil if ranking == '-'
|
73
|
+
ranking
|
74
|
+
end
|
70
75
|
end
|
71
76
|
end
|
data/lib/atp_scraper/ranking.rb
CHANGED
@@ -15,7 +15,7 @@ module AtpScraper
|
|
15
15
|
|
16
16
|
def pickup_ranking_data
|
17
17
|
result = []
|
18
|
-
search_player_doc
|
18
|
+
search_player_doc.each do |player_doc|
|
19
19
|
result.push(pickup_player_data(player_doc))
|
20
20
|
end
|
21
21
|
result
|
@@ -23,8 +23,8 @@ module AtpScraper
|
|
23
23
|
|
24
24
|
private
|
25
25
|
|
26
|
-
def search_player_doc
|
27
|
-
ranking_doc.css(".mega-table tbody tr")
|
26
|
+
def search_player_doc
|
27
|
+
@ranking_doc.css(".mega-table tbody tr")
|
28
28
|
end
|
29
29
|
|
30
30
|
def pickup_player_data(player_doc)
|
@@ -55,11 +55,12 @@ module AtpScraper
|
|
55
55
|
player_doc.css(".points-cell").first.content.strip.delete(',')
|
56
56
|
end
|
57
57
|
|
58
|
-
#
|
58
|
+
# "/en/players/rafael-nadal/n409/overview" => "rafael-nadal"
|
59
59
|
def get_url_name(url)
|
60
60
|
url.split("/")[3]
|
61
61
|
end
|
62
62
|
|
63
|
+
# "/en/players/rafael-nadal/n409/overview" => "n409"
|
63
64
|
def get_url_id(url)
|
64
65
|
url.split("/")[4]
|
65
66
|
end
|
data/lib/atp_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-05-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -122,6 +122,34 @@ dependencies:
|
|
122
122
|
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: guard
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: guard-minitest
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - ">="
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '0'
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
125
153
|
description: atp_scraper is a tool scraping tennis data from atpworldtour.com
|
126
154
|
email:
|
127
155
|
- w.vamos603@gmail.com
|
@@ -134,6 +162,7 @@ files:
|
|
134
162
|
- ".travis.yml"
|
135
163
|
- CODE_OF_CONDUCT.md
|
136
164
|
- Gemfile
|
165
|
+
- Guardfile
|
137
166
|
- LICENSE.txt
|
138
167
|
- README.md
|
139
168
|
- Rakefile
|