atp_scraper 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/atp_scraper/activities/record.rb +11 -1
- data/lib/atp_scraper/activities/tournament.rb +70 -0
- data/lib/atp_scraper/activity.rb +8 -56
- data/lib/atp_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d4557a6bb017dd9f6607a7a55a7ffd8d53b219
|
4
|
+
data.tar.gz: 1bbae10d70dd2deac57b51c0f86c7b80fda6fc55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b71241b3908338ba1d07c73d708bfd2ea9ffe5c7f3226a17243a296a78c9d5a49a01f3d54499b23f26303f5adbeabf4a3b4985578a231e3d486825e8dd2fa920
|
7
|
+
data.tar.gz: 8e4854126992fab5cd987e38365c150a4976edfd157da2e3dd516fbbfda32d9f013de39678aa55952661782cf4959e404e7afc7caa07f54b1cb03b2ea5b6cf5b
|
data/README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/atp_scraper.svg)](https://badge.fury.io/rb/atp_scraper)
|
1
2
|
[![Build Status](https://travis-ci.org/mosuke5/atp_scraper.svg?branch=master)](https://travis-ci.org/mosuke5/atp_scraper)
|
2
3
|
# AtpScraper
|
3
4
|
AtpScraper is a tool scraping tennis data from atpworldtour.com
|
@@ -49,6 +50,7 @@ AtpScraper::Get.player_activity("n409", 2016)
|
|
49
50
|
# score: "673 64 63 674 26",
|
50
51
|
# win_loss: "L",
|
51
52
|
# tournament_name: "Australian Open",
|
53
|
+
# tournament_category: "grandslam",
|
52
54
|
# tournament_location: "Melbourne, Australia",
|
53
55
|
# tournament_start_date: "2016.01.18",
|
54
56
|
# tournament_end_date: "2016.01.31",
|
@@ -1,7 +1,17 @@
|
|
1
1
|
module Activities
|
2
2
|
# Activity Record Class
|
3
3
|
class Record
|
4
|
-
def
|
4
|
+
def initialize(doc)
|
5
|
+
@record = doc
|
6
|
+
end
|
7
|
+
|
8
|
+
def info
|
9
|
+
pickup_record(@record)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def pickup_record(record_doc)
|
5
15
|
result = {}
|
6
16
|
record_doc.css("td").each_with_index do |td, n|
|
7
17
|
record_content = td.content.strip
|
@@ -1,5 +1,75 @@
|
|
1
1
|
module Activities
|
2
2
|
# Activity Tournamnet Class
|
3
3
|
class Tournament
|
4
|
+
def initialize(doc)
|
5
|
+
@tournament = doc
|
6
|
+
end
|
7
|
+
|
8
|
+
def info
|
9
|
+
pickup_info(@tournament)
|
10
|
+
end
|
11
|
+
|
12
|
+
def records
|
13
|
+
search_records_doc
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def pickup_info(tournament_doc)
|
19
|
+
tournament_date = pickup_text(".tourney-dates")
|
20
|
+
surface = pickup_surface
|
21
|
+
caption = pickup_text(".activity-tournament-caption")
|
22
|
+
{
|
23
|
+
name: pickup_text(".tourney-title"),
|
24
|
+
category: pickup_category,
|
25
|
+
location: pickup_text(".tourney-location"),
|
26
|
+
date: divide_tournament_date(tournament_date),
|
27
|
+
year: tournament_date[0, 4],
|
28
|
+
surface: surface[:surface],
|
29
|
+
surface_inout: surface[:inout],
|
30
|
+
ranking: pickup_player_rank(caption)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Before: String "2011.01.03 - 2011.01.08"
|
35
|
+
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
36
|
+
def divide_tournament_date(date)
|
37
|
+
date = date.split('-').map(&:strip)
|
38
|
+
{ start: date[0], end: date[1] }
|
39
|
+
end
|
40
|
+
|
41
|
+
def pickup_text(selector)
|
42
|
+
@tournament.css(selector).first.content.strip
|
43
|
+
end
|
44
|
+
|
45
|
+
def pickup_category
|
46
|
+
# ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
|
47
|
+
badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
|
48
|
+
badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
def pickup_surface
|
52
|
+
surface = @tournament
|
53
|
+
.css(".tourney-details")[1]
|
54
|
+
.css(".item-details")
|
55
|
+
.first.content.gsub(/\t|\s/, "")
|
56
|
+
divide_surface(surface)
|
57
|
+
end
|
58
|
+
|
59
|
+
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
60
|
+
def divide_surface(surface)
|
61
|
+
inout = surface.match(/^(Outdoor|Indoor)/)
|
62
|
+
return { surface: surface, inout: nil } if inout.nil?
|
63
|
+
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
64
|
+
end
|
65
|
+
|
66
|
+
def pickup_player_rank(tournament_caption)
|
67
|
+
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
68
|
+
rank[1].strip
|
69
|
+
end
|
70
|
+
|
71
|
+
def search_records_doc
|
72
|
+
@tournament.css(".mega-table tbody tr")
|
73
|
+
end
|
4
74
|
end
|
5
75
|
end
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
###################
|
13
13
|
|
14
14
|
require 'atp_scraper/activities/record'
|
15
|
+
require 'atp_scraper/activities/tournament'
|
15
16
|
module AtpScraper
|
16
17
|
# Scrape activity data
|
17
18
|
class Activity
|
@@ -23,14 +24,12 @@ module AtpScraper
|
|
23
24
|
|
24
25
|
def pickup_activity_data
|
25
26
|
result = []
|
26
|
-
player = {}
|
27
27
|
|
28
28
|
search_tournaments_doc(@activity_doc).each do |tournament_doc|
|
29
|
-
tournament =
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
record_hash = create_record(record, player, tournament)
|
29
|
+
tournament = Tournament.new(tournament_doc)
|
30
|
+
tournament.records.each do |record_doc|
|
31
|
+
record = Record.new(record_doc)
|
32
|
+
record_hash = create_record(record.info, tournament.info)
|
34
33
|
result.push(record_hash)
|
35
34
|
end
|
36
35
|
end
|
@@ -43,21 +42,18 @@ module AtpScraper
|
|
43
42
|
activity_doc.css(".activity-tournament-table")
|
44
43
|
end
|
45
44
|
|
46
|
-
def
|
47
|
-
tournament_doc.css(".mega-table tbody tr")
|
48
|
-
end
|
49
|
-
|
50
|
-
def create_record(record, player, tournament)
|
45
|
+
def create_record(record, tournament)
|
51
46
|
{
|
52
47
|
year: tournament[:year],
|
53
48
|
player_name: @player_name,
|
54
|
-
player_rank:
|
49
|
+
player_rank: tournament[:ranking],
|
55
50
|
opponent_name: record[:opponent_name],
|
56
51
|
opponent_rank: record[:opponent_rank],
|
57
52
|
round: record[:round],
|
58
53
|
score: record[:score],
|
59
54
|
win_loss: record[:win_loss],
|
60
55
|
tournament_name: tournament[:name],
|
56
|
+
tournament_category: tournament[:category],
|
61
57
|
tournament_location: tournament[:location],
|
62
58
|
tournament_start_date: tournament[:date][:start],
|
63
59
|
tournament_end_date: tournament[:date][:end],
|
@@ -71,49 +67,5 @@ module AtpScraper
|
|
71
67
|
.css("meta[property=\"pageTransitionTitle\"]")
|
72
68
|
.attr("content").value
|
73
69
|
end
|
74
|
-
|
75
|
-
def pickup_tournament_info(tournament_doc)
|
76
|
-
tournament_date = pickup_text(tournament_doc, ".tourney-dates")
|
77
|
-
surface = pickup_surface(tournament_doc)
|
78
|
-
{
|
79
|
-
name: pickup_text(tournament_doc, ".tourney-title"),
|
80
|
-
location: pickup_text(tournament_doc, ".tourney-location"),
|
81
|
-
date: divide_tournament_date(tournament_date),
|
82
|
-
year: tournament_date[0, 4],
|
83
|
-
caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
|
84
|
-
surface: surface[:surface],
|
85
|
-
surface_inout: surface[:inout]
|
86
|
-
}
|
87
|
-
end
|
88
|
-
|
89
|
-
def pickup_player_rank(tournament_caption)
|
90
|
-
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
91
|
-
rank[1].strip
|
92
|
-
end
|
93
|
-
|
94
|
-
# Before: String "2011.01.03 - 2011.01.08"
|
95
|
-
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
96
|
-
def divide_tournament_date(date)
|
97
|
-
date = date.split('-').map(&:strip)
|
98
|
-
{ start: date[0], end: date[1] }
|
99
|
-
end
|
100
|
-
|
101
|
-
def pickup_text(doc, selector)
|
102
|
-
doc.css(selector).first.content.strip
|
103
|
-
end
|
104
|
-
|
105
|
-
def pickup_surface(tournament_doc)
|
106
|
-
surface = tournament_doc
|
107
|
-
.css(".tourney-details")[1]
|
108
|
-
.css(".item-details")
|
109
|
-
.first.content.gsub(/\t|\s/, "")
|
110
|
-
divide_surface(surface)
|
111
|
-
end
|
112
|
-
|
113
|
-
def divide_surface(surface)
|
114
|
-
inout = surface.match(/^(Outdoor|Indoor)/)
|
115
|
-
return { surface: surface, inout: nil } if inout.nil?
|
116
|
-
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
117
|
-
end
|
118
70
|
end
|
119
71
|
end
|
data/lib/atp_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|