atp_scraper 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/atp_scraper/activities/record.rb +11 -1
- data/lib/atp_scraper/activities/tournament.rb +70 -0
- data/lib/atp_scraper/activity.rb +8 -56
- data/lib/atp_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f7d4557a6bb017dd9f6607a7a55a7ffd8d53b219
|
4
|
+
data.tar.gz: 1bbae10d70dd2deac57b51c0f86c7b80fda6fc55
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b71241b3908338ba1d07c73d708bfd2ea9ffe5c7f3226a17243a296a78c9d5a49a01f3d54499b23f26303f5adbeabf4a3b4985578a231e3d486825e8dd2fa920
|
7
|
+
data.tar.gz: 8e4854126992fab5cd987e38365c150a4976edfd157da2e3dd516fbbfda32d9f013de39678aa55952661782cf4959e404e7afc7caa07f54b1cb03b2ea5b6cf5b
|
data/README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
[](https://badge.fury.io/rb/atp_scraper)
|
1
2
|
[](https://travis-ci.org/mosuke5/atp_scraper)
|
2
3
|
# AtpScraper
|
3
4
|
AtpScraper is a tool scraping tennis data from atpworldtour.com
|
@@ -49,6 +50,7 @@ AtpScraper::Get.player_activity("n409", 2016)
|
|
49
50
|
# score: "673 64 63 674 26",
|
50
51
|
# win_loss: "L",
|
51
52
|
# tournament_name: "Australian Open",
|
53
|
+
# tournament_category: "grandslam",
|
52
54
|
# tournament_location: "Melbourne, Australia",
|
53
55
|
# tournament_start_date: "2016.01.18",
|
54
56
|
# tournament_end_date: "2016.01.31",
|
@@ -1,7 +1,17 @@
|
|
1
1
|
module Activities
|
2
2
|
# Activity Record Class
|
3
3
|
class Record
|
4
|
-
def
|
4
|
+
def initialize(doc)
|
5
|
+
@record = doc
|
6
|
+
end
|
7
|
+
|
8
|
+
def info
|
9
|
+
pickup_record(@record)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def pickup_record(record_doc)
|
5
15
|
result = {}
|
6
16
|
record_doc.css("td").each_with_index do |td, n|
|
7
17
|
record_content = td.content.strip
|
@@ -1,5 +1,75 @@
|
|
1
1
|
module Activities
|
2
2
|
# Activity Tournamnet Class
|
3
3
|
class Tournament
|
4
|
+
def initialize(doc)
|
5
|
+
@tournament = doc
|
6
|
+
end
|
7
|
+
|
8
|
+
def info
|
9
|
+
pickup_info(@tournament)
|
10
|
+
end
|
11
|
+
|
12
|
+
def records
|
13
|
+
search_records_doc
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def pickup_info(tournament_doc)
|
19
|
+
tournament_date = pickup_text(".tourney-dates")
|
20
|
+
surface = pickup_surface
|
21
|
+
caption = pickup_text(".activity-tournament-caption")
|
22
|
+
{
|
23
|
+
name: pickup_text(".tourney-title"),
|
24
|
+
category: pickup_category,
|
25
|
+
location: pickup_text(".tourney-location"),
|
26
|
+
date: divide_tournament_date(tournament_date),
|
27
|
+
year: tournament_date[0, 4],
|
28
|
+
surface: surface[:surface],
|
29
|
+
surface_inout: surface[:inout],
|
30
|
+
ranking: pickup_player_rank(caption)
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
# Before: String "2011.01.03 - 2011.01.08"
|
35
|
+
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
36
|
+
def divide_tournament_date(date)
|
37
|
+
date = date.split('-').map(&:strip)
|
38
|
+
{ start: date[0], end: date[1] }
|
39
|
+
end
|
40
|
+
|
41
|
+
def pickup_text(selector)
|
42
|
+
@tournament.css(selector).first.content.strip
|
43
|
+
end
|
44
|
+
|
45
|
+
def pickup_category
|
46
|
+
# ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
|
47
|
+
badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
|
48
|
+
badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
|
49
|
+
end
|
50
|
+
|
51
|
+
def pickup_surface
|
52
|
+
surface = @tournament
|
53
|
+
.css(".tourney-details")[1]
|
54
|
+
.css(".item-details")
|
55
|
+
.first.content.gsub(/\t|\s/, "")
|
56
|
+
divide_surface(surface)
|
57
|
+
end
|
58
|
+
|
59
|
+
# "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
|
60
|
+
def divide_surface(surface)
|
61
|
+
inout = surface.match(/^(Outdoor|Indoor)/)
|
62
|
+
return { surface: surface, inout: nil } if inout.nil?
|
63
|
+
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
64
|
+
end
|
65
|
+
|
66
|
+
def pickup_player_rank(tournament_caption)
|
67
|
+
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
68
|
+
rank[1].strip
|
69
|
+
end
|
70
|
+
|
71
|
+
def search_records_doc
|
72
|
+
@tournament.css(".mega-table tbody tr")
|
73
|
+
end
|
4
74
|
end
|
5
75
|
end
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
###################
|
13
13
|
|
14
14
|
require 'atp_scraper/activities/record'
|
15
|
+
require 'atp_scraper/activities/tournament'
|
15
16
|
module AtpScraper
|
16
17
|
# Scrape activity data
|
17
18
|
class Activity
|
@@ -23,14 +24,12 @@ module AtpScraper
|
|
23
24
|
|
24
25
|
def pickup_activity_data
|
25
26
|
result = []
|
26
|
-
player = {}
|
27
27
|
|
28
28
|
search_tournaments_doc(@activity_doc).each do |tournament_doc|
|
29
|
-
tournament =
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
record_hash = create_record(record, player, tournament)
|
29
|
+
tournament = Tournament.new(tournament_doc)
|
30
|
+
tournament.records.each do |record_doc|
|
31
|
+
record = Record.new(record_doc)
|
32
|
+
record_hash = create_record(record.info, tournament.info)
|
34
33
|
result.push(record_hash)
|
35
34
|
end
|
36
35
|
end
|
@@ -43,21 +42,18 @@ module AtpScraper
|
|
43
42
|
activity_doc.css(".activity-tournament-table")
|
44
43
|
end
|
45
44
|
|
46
|
-
def
|
47
|
-
tournament_doc.css(".mega-table tbody tr")
|
48
|
-
end
|
49
|
-
|
50
|
-
def create_record(record, player, tournament)
|
45
|
+
def create_record(record, tournament)
|
51
46
|
{
|
52
47
|
year: tournament[:year],
|
53
48
|
player_name: @player_name,
|
54
|
-
player_rank:
|
49
|
+
player_rank: tournament[:ranking],
|
55
50
|
opponent_name: record[:opponent_name],
|
56
51
|
opponent_rank: record[:opponent_rank],
|
57
52
|
round: record[:round],
|
58
53
|
score: record[:score],
|
59
54
|
win_loss: record[:win_loss],
|
60
55
|
tournament_name: tournament[:name],
|
56
|
+
tournament_category: tournament[:category],
|
61
57
|
tournament_location: tournament[:location],
|
62
58
|
tournament_start_date: tournament[:date][:start],
|
63
59
|
tournament_end_date: tournament[:date][:end],
|
@@ -71,49 +67,5 @@ module AtpScraper
|
|
71
67
|
.css("meta[property=\"pageTransitionTitle\"]")
|
72
68
|
.attr("content").value
|
73
69
|
end
|
74
|
-
|
75
|
-
def pickup_tournament_info(tournament_doc)
|
76
|
-
tournament_date = pickup_text(tournament_doc, ".tourney-dates")
|
77
|
-
surface = pickup_surface(tournament_doc)
|
78
|
-
{
|
79
|
-
name: pickup_text(tournament_doc, ".tourney-title"),
|
80
|
-
location: pickup_text(tournament_doc, ".tourney-location"),
|
81
|
-
date: divide_tournament_date(tournament_date),
|
82
|
-
year: tournament_date[0, 4],
|
83
|
-
caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
|
84
|
-
surface: surface[:surface],
|
85
|
-
surface_inout: surface[:inout]
|
86
|
-
}
|
87
|
-
end
|
88
|
-
|
89
|
-
def pickup_player_rank(tournament_caption)
|
90
|
-
rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
|
91
|
-
rank[1].strip
|
92
|
-
end
|
93
|
-
|
94
|
-
# Before: String "2011.01.03 - 2011.01.08"
|
95
|
-
# After: Hash { start: 2011.01.03, end: 2011.01.08 }
|
96
|
-
def divide_tournament_date(date)
|
97
|
-
date = date.split('-').map(&:strip)
|
98
|
-
{ start: date[0], end: date[1] }
|
99
|
-
end
|
100
|
-
|
101
|
-
def pickup_text(doc, selector)
|
102
|
-
doc.css(selector).first.content.strip
|
103
|
-
end
|
104
|
-
|
105
|
-
def pickup_surface(tournament_doc)
|
106
|
-
surface = tournament_doc
|
107
|
-
.css(".tourney-details")[1]
|
108
|
-
.css(".item-details")
|
109
|
-
.first.content.gsub(/\t|\s/, "")
|
110
|
-
divide_surface(surface)
|
111
|
-
end
|
112
|
-
|
113
|
-
def divide_surface(surface)
|
114
|
-
inout = surface.match(/^(Outdoor|Indoor)/)
|
115
|
-
return { surface: surface, inout: nil } if inout.nil?
|
116
|
-
{ surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
|
117
|
-
end
|
118
70
|
end
|
119
71
|
end
|
data/lib/atp_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|