atp_scraper 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/atp_scraper/activity.rb +23 -6
- data/lib/atp_scraper/get.rb +6 -6
- data/lib/atp_scraper/html.rb +0 -5
- data/lib/atp_scraper/ranking.rb +6 -2
- data/lib/atp_scraper/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3cef6acd9e40a30750c787843d0726e19d4f9ef9
|
4
|
+
data.tar.gz: 41574d18eceb43ea314c589ba581e0aaef5fc30e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5aee0fc52a7da12114bd6d33b52d16623a48efdd25486f3de6a79e507674012d18b8d5af7afe949fe249d473383a3f6d3e5009b69d48ce151e09f7a68643462c
|
7
|
+
data.tar.gz: f42d99878a62911900e7c769fc5329cf25160aa3fe2d30aa8e473615290e2f1746d3f5970d5f85469f3e94bf19c377b814d3f25cd6367e88b81204d4046655e5
|
data/lib/atp_scraper/activity.rb
CHANGED
@@ -14,12 +14,17 @@
|
|
14
14
|
module AtpScraper
|
15
15
|
# Scrape activity data
|
16
16
|
class Activity
|
17
|
-
def
|
17
|
+
def initialize(html, html_charset = 'utf-8')
|
18
|
+
@activity_doc = AtpScraper::Html.parse(html, html_charset)
|
19
|
+
@player_name = pickup_player_name(@activity_doc)
|
20
|
+
end
|
21
|
+
|
22
|
+
def pickup_activity_data
|
18
23
|
result = []
|
19
24
|
player = {}
|
20
|
-
player[:name] =
|
25
|
+
player[:name] = @player_name
|
21
26
|
|
22
|
-
search_tournaments_doc(activity_doc).each do |tournament_doc|
|
27
|
+
search_tournaments_doc(@activity_doc).each do |tournament_doc|
|
23
28
|
tournament = pickup_tournament_info(tournament_doc)
|
24
29
|
player[:rank] = pickup_player_rank(tournament[:caption])
|
25
30
|
search_records_doc(tournament_doc).each do |record_doc|
|
@@ -55,7 +60,8 @@ module AtpScraper
|
|
55
60
|
tournament_location: tournament[:location],
|
56
61
|
tournament_start_date: tournament[:date][:start],
|
57
62
|
tournament_end_date: tournament[:date][:end],
|
58
|
-
tournament_surface: tournament[:surface]
|
63
|
+
tournament_surface: tournament[:surface],
|
64
|
+
tournament_surface_inout: tournament[:surface_inout]
|
59
65
|
}
|
60
66
|
end
|
61
67
|
|
@@ -87,13 +93,15 @@ module AtpScraper
|
|
87
93
|
|
88
94
|
def pickup_tournament_info(tournament_doc)
|
89
95
|
tournament_date = pickup_text(tournament_doc, ".tourney-dates")
|
96
|
+
surface = pickup_surface(tournament_doc)
|
90
97
|
{
|
91
98
|
name: pickup_text(tournament_doc, ".tourney-title"),
|
92
99
|
location: pickup_text(tournament_doc, ".tourney-location"),
|
93
100
|
date: divide_tournament_date(tournament_date),
|
94
101
|
year: tournament_date[0, 4],
|
95
102
|
caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
|
96
|
-
surface:
|
103
|
+
surface: surface[:surface],
|
104
|
+
surface_inout: surface[:inout]
|
97
105
|
}
|
98
106
|
end
|
99
107
|
|
@@ -114,10 +122,19 @@ module AtpScraper
|
|
114
122
|
end
|
115
123
|
|
116
124
|
def pickup_surface(tournament_doc)
|
117
|
-
tournament_doc
|
125
|
+
surface = tournament_doc
|
118
126
|
.css(".tourney-details")[1]
|
119
127
|
.css(".item-details")
|
120
128
|
.first.content.gsub(/\t|\s/, "")
|
129
|
+
divide_surface(surface)
|
130
|
+
end
|
131
|
+
|
132
|
+
def divide_surface(surface)
|
133
|
+
if (surface.match(/^Outdoor/))
|
134
|
+
return { surface: surface.gsub(/Outdoor/, ''), inout: "Outdoor" }
|
135
|
+
else
|
136
|
+
return { surface: surface.gsub(/Indoor/, ''), inout: "Indoor" }
|
137
|
+
end
|
121
138
|
end
|
122
139
|
end
|
123
140
|
end
|
data/lib/atp_scraper/get.rb
CHANGED
@@ -3,16 +3,16 @@ module AtpScraper
|
|
3
3
|
class Get
|
4
4
|
def self.singles_ranking(rank_range = nil)
|
5
5
|
request_uri = "/en/rankings/singles?rankRange=#{rank_range}"
|
6
|
-
|
7
|
-
ranking = AtpScraper::Ranking.new
|
8
|
-
ranking.pickup_ranking_data
|
6
|
+
ranking_html = AtpScraper::Html.get(request_uri)
|
7
|
+
ranking = AtpScraper::Ranking.new(ranking_html[:html], ranking_html[:charset])
|
8
|
+
ranking.pickup_ranking_data
|
9
9
|
end
|
10
10
|
|
11
11
|
def self.player_activity(player_id, year)
|
12
12
|
request_uri = "/players/anything/#{player_id}/player-activity?year=#{year}"
|
13
|
-
|
14
|
-
activity = AtpScraper::Activity.new
|
15
|
-
activity.pickup_activity_data
|
13
|
+
activity_html = AtpScraper::Html.get(request_uri)
|
14
|
+
activity = AtpScraper::Activity.new(activity_html[:html], activity_html[:charset])
|
15
|
+
activity.pickup_activity_data
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
data/lib/atp_scraper/html.rb
CHANGED
@@ -4,11 +4,6 @@ module AtpScraper
|
|
4
4
|
# Get and parse html from atpworldtour.com
|
5
5
|
class Html
|
6
6
|
BASE = "http://www.atpworldtour.com"
|
7
|
-
def self.get_and_parse(uri)
|
8
|
-
html = get(uri)
|
9
|
-
parse(html[:html], html[:charset])
|
10
|
-
end
|
11
|
-
|
12
7
|
def self.get(uri)
|
13
8
|
charset = nil
|
14
9
|
html = open(BASE + uri) do |f|
|
data/lib/atp_scraper/ranking.rb
CHANGED
@@ -9,9 +9,13 @@
|
|
9
9
|
module AtpScraper
|
10
10
|
# Scrape ranking data
|
11
11
|
class Ranking
|
12
|
-
def
|
12
|
+
def initialize(html, html_charset = 'utf-8')
|
13
|
+
@ranking_doc = AtpScraper::Html.parse(html, html_charset)
|
14
|
+
end
|
15
|
+
|
16
|
+
def pickup_ranking_data
|
13
17
|
result = []
|
14
|
-
search_player_doc(ranking_doc).each do |player_doc|
|
18
|
+
search_player_doc(@ranking_doc).each do |player_doc|
|
15
19
|
result.push(pickup_player_data(player_doc))
|
16
20
|
end
|
17
21
|
result
|
data/lib/atp_scraper/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: atp_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosuke5
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|