atp_scraper 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f8a982c8a114ded0d06e9dc11d027d5dadc2f2c
4
- data.tar.gz: ee462bc2065b283ee4699a2b82d52ee582777828
3
+ metadata.gz: ee413b367b7003e9748268c3e6ebac13a5758db0
4
+ data.tar.gz: dc366cb110b4063407919d9eb782f9f59af4ba7c
5
5
  SHA512:
6
- metadata.gz: c5c00331c5873c6838b18735fb131070269ec896b155e53e9f211877d64070263fe9d5051b64da33e6484aaa8ba585e2b9a2322d21b717fb9921e6166a025cca
7
- data.tar.gz: 87d432f14e2060b5995840fc59f527ee938d844e9a88827fe3d46728d95b839342e6b98dfc085f4d8fc8ba0d32f6e35755bc199614fdd5e4324004e7394e0780
6
+ metadata.gz: ee1d39cc20915134812ce6d6f1781c3cf0a6eb11d878e3a4218df8b47ab502389c9c3acd23dfd144dc145836e4a434ad996757be4a067e7ed78aefa318573bd3
7
+ data.tar.gz: 89e81c1b0251913fcd810671425b6773fdb2173f7c14a81293f1715b883d05529e2794a4c5c51b49383a8ac6ba393a517394475b01583adb5738913a778b0d19
data/README.md CHANGED
@@ -1,5 +1,6 @@
1
1
  [![Build Status](https://travis-ci.org/mosuke5/atp_scraper.svg?branch=master)](https://travis-ci.org/mosuke5/atp_scraper)
2
2
  # AtpScraper
3
+ AtpScraper is a tool scraping tennis data from atpworldtour.com
3
4
 
4
5
  ## Installation
5
6
 
@@ -22,7 +23,7 @@ Or install it yourself as:
22
23
  require "atp_scraper"
23
24
 
24
25
  # Get Singles Ranking TOP100
25
- AtpScraper::Ranking.get
26
+ AtpScraper::Get.singles_ranking
26
27
  # Response Json
27
28
  # {
28
29
  # rannking: "5"
@@ -32,10 +33,10 @@ AtpScraper::Ranking.get
32
33
  # }
33
34
 
34
35
  # Get Singles Ranking 101-200
35
- AtpScraper::Ranking.get("101-200")
36
+ AtpScraper::Get.singles_ranking("101-200")
36
37
 
37
38
  # Get Player Activity. For Example Rafael Nadal's activity in 2016
38
- AtpScraper::Activity.get("n409", 2016)
39
+ AtpScraper::Get.player_activity("n409", 2016)
39
40
  # Response Json
40
41
  # {
41
42
  # year: 2016,
@@ -54,12 +55,6 @@ AtpScraper::Activity.get("n409", 2016)
54
55
  # }
55
56
  ```
56
57
 
57
- ## Development
58
-
59
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
60
-
61
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
62
-
63
58
  ## Contributing
64
59
  - Fork the project.
65
60
  - Make your feature addition or bug fix, write tests.
@@ -68,6 +63,5 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
68
63
 
69
64
 
70
65
  ## License
71
-
72
66
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
73
67
 
@@ -14,120 +14,110 @@
14
14
  module AtpScraper
15
15
  # Scrape activity data
16
16
  class Activity
17
- class << self
18
- def get(player_id, year)
19
- request_uri = build_uri(player_id, year)
20
- activity_doc = AtpScraper::Html.get_and_parse(request_uri)
21
- pickup_activity_data(activity_doc)
22
- end
23
-
24
- def build_uri(player_id, year)
25
- "/players/anything/#{player_id}/player-activity?year=#{year}"
26
- end
17
+ def pickup_activity_data(activity_doc)
18
+ result = []
19
+ player = {}
20
+ player[:name] = pickup_player_name(activity_doc)
27
21
 
28
- def pickup_activity_data(activity_doc)
29
- result = []
30
- player = {}
31
- player[:name] = pickup_player_name(activity_doc)
32
-
33
- search_tournaments_doc(activity_doc).each do |tournament_doc|
34
- tournament = pickup_tournament_info(tournament_doc)
35
- player[:rank] = pickup_player_rank(tournament[:caption])
36
- search_records_doc(tournament_doc).each do |record_doc|
37
- record = pickup_record(record_doc)
38
- record_hash = create_record(record, player, tournament)
39
- result.push(record_hash)
40
- end
22
+ search_tournaments_doc(activity_doc).each do |tournament_doc|
23
+ tournament = pickup_tournament_info(tournament_doc)
24
+ player[:rank] = pickup_player_rank(tournament[:caption])
25
+ search_records_doc(tournament_doc).each do |record_doc|
26
+ record = pickup_record(record_doc)
27
+ record_hash = create_record(record, player, tournament)
28
+ result.push(record_hash)
41
29
  end
42
- result
43
30
  end
31
+ result
32
+ end
44
33
 
45
- def search_tournaments_doc(activity_doc)
46
- activity_doc.css(".activity-tournament-table")
47
- end
34
+ private
48
35
 
49
- def search_records_doc(tournament_doc)
50
- tournament_doc.css(".mega-table tbody tr")
51
- end
36
+ def search_tournaments_doc(activity_doc)
37
+ activity_doc.css(".activity-tournament-table")
38
+ end
52
39
 
53
- def create_record(record, player, tournament)
54
- {
55
- year: tournament[:year],
56
- player_name: player[:name],
57
- player_rank: player[:rank],
58
- opponent_name: record[:opponent_name],
59
- opponent_rank: record[:opponent_rank],
60
- round: record[:round],
61
- score: record[:score],
62
- win_loss: record[:win_loss],
63
- tournament_name: tournament[:name],
64
- tournament_location: tournament[:location],
65
- tournament_start_date: tournament[:date][:start],
66
- tournament_end_date: tournament[:date][:end],
67
- tournament_surface: tournament[:surface]
68
- }
69
- end
40
+ def search_records_doc(tournament_doc)
41
+ tournament_doc.css(".mega-table tbody tr")
42
+ end
70
43
 
71
- def pickup_player_name(activity_doc)
72
- activity_doc
73
- .css("meta[property=\"pageTransitionTitle\"]")
74
- .attr("content").value
75
- end
44
+ def create_record(record, player, tournament)
45
+ {
46
+ year: tournament[:year],
47
+ player_name: player[:name],
48
+ player_rank: player[:rank],
49
+ opponent_name: record[:opponent_name],
50
+ opponent_rank: record[:opponent_rank],
51
+ round: record[:round],
52
+ score: record[:score],
53
+ win_loss: record[:win_loss],
54
+ tournament_name: tournament[:name],
55
+ tournament_location: tournament[:location],
56
+ tournament_start_date: tournament[:date][:start],
57
+ tournament_end_date: tournament[:date][:end],
58
+ tournament_surface: tournament[:surface]
59
+ }
60
+ end
76
61
 
77
- def pickup_record(record_doc)
78
- result = {}
79
- record_doc.css("td").each_with_index do |td, n|
80
- record_content = td.content.strip
81
- case n
82
- when 0 then
83
- result[:round] = record_content
84
- when 1 then
85
- result[:opponent_rank] = record_content
86
- when 2 then
87
- result[:opponent_name] = record_content
88
- when 3 then
89
- result[:win_loss] = record_content
90
- when 4 then
91
- result[:score] = record_content
92
- end
62
+ def pickup_player_name(activity_doc)
63
+ activity_doc
64
+ .css("meta[property=\"pageTransitionTitle\"]")
65
+ .attr("content").value
66
+ end
67
+
68
+ def pickup_record(record_doc)
69
+ result = {}
70
+ record_doc.css("td").each_with_index do |td, n|
71
+ record_content = td.content.strip
72
+ case n
73
+ when 0 then
74
+ result[:round] = record_content
75
+ when 1 then
76
+ result[:opponent_rank] = record_content
77
+ when 2 then
78
+ result[:opponent_name] = record_content
79
+ when 3 then
80
+ result[:win_loss] = record_content
81
+ when 4 then
82
+ result[:score] = record_content
93
83
  end
94
- result
95
84
  end
85
+ result
86
+ end
96
87
 
97
- def pickup_tournament_info(tournament_doc)
98
- tournament_date = pickup_text(tournament_doc, ".tourney-dates")
99
- {
100
- name: pickup_text(tournament_doc, ".tourney-title"),
101
- location: pickup_text(tournament_doc, ".tourney-location"),
102
- date: divide_tournament_date(tournament_date),
103
- year: tournament_date[0, 4],
104
- caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
105
- surface: pickup_surface(tournament_doc)
106
- }
107
- end
88
+ def pickup_tournament_info(tournament_doc)
89
+ tournament_date = pickup_text(tournament_doc, ".tourney-dates")
90
+ {
91
+ name: pickup_text(tournament_doc, ".tourney-title"),
92
+ location: pickup_text(tournament_doc, ".tourney-location"),
93
+ date: divide_tournament_date(tournament_date),
94
+ year: tournament_date[0, 4],
95
+ caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
96
+ surface: pickup_surface(tournament_doc)
97
+ }
98
+ end
108
99
 
109
- def pickup_player_rank(tournament_caption)
110
- rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
111
- rank[1].strip
112
- end
100
+ def pickup_player_rank(tournament_caption)
101
+ rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
102
+ rank[1].strip
103
+ end
113
104
 
114
- # Before: String "2011.01.03 - 2011.01.08"
115
- # After: Hash { start: 2011.01.03, end: 2011.01.08 }
116
- def divide_tournament_date(date)
117
- date = date.split('-').map(&:strip)
118
- { start: date[0], end: date[1] }
119
- end
105
+ # Before: String "2011.01.03 - 2011.01.08"
106
+ # After: Hash { start: 2011.01.03, end: 2011.01.08 }
107
+ def divide_tournament_date(date)
108
+ date = date.split('-').map(&:strip)
109
+ { start: date[0], end: date[1] }
110
+ end
120
111
 
121
- def pickup_text(doc, selector)
122
- doc.css(selector).first.content.strip
123
- end
112
+ def pickup_text(doc, selector)
113
+ doc.css(selector).first.content.strip
114
+ end
124
115
 
125
- def pickup_surface(tournament_doc)
126
- tournament_doc
127
- .css(".tourney-details")[1]
128
- .css(".item-details")
129
- .first.content.gsub(/\t|\s/, "")
130
- end
116
+ def pickup_surface(tournament_doc)
117
+ tournament_doc
118
+ .css(".tourney-details")[1]
119
+ .css(".item-details")
120
+ .first.content.gsub(/\t|\s/, "")
131
121
  end
132
122
  end
133
123
  end
@@ -0,0 +1,18 @@
1
+ module AtpScraper
2
+ # Main class
3
+ class Get
4
+ def self.singles_ranking(rank_range = nil)
5
+ request_uri = "/en/rankings/singles?rankRange=#{rank_range}"
6
+ ranking_doc = AtpScraper::Html.get_and_parse(request_uri)
7
+ ranking = AtpScraper::Ranking.new
8
+ ranking.pickup_ranking_data(ranking_doc)
9
+ end
10
+
11
+ def self.player_activity(player_id, year)
12
+ request_uri = "/players/anything/#{player_id}/player-activity?year=#{year}"
13
+ activity_doc = AtpScraper::Html.get_and_parse(request_uri)
14
+ activity = AtpScraper::Activity.new
15
+ activity.pickup_activity_data(activity_doc)
16
+ end
17
+ end
18
+ end
@@ -9,59 +9,49 @@
9
9
  module AtpScraper
10
10
  # Scrape ranking data
11
11
  class Ranking
12
- class << self
13
- def get(rank_range = nil)
14
- request_uri = build_uri(rank_range)
15
- ranking_doc = AtpScraper::Html.get_and_parse(request_uri)
16
- pickup_ranking_data(ranking_doc)
12
+ def pickup_ranking_data(ranking_doc)
13
+ result = []
14
+ search_player_doc(ranking_doc).each do |player_doc|
15
+ result.push(pickup_player_data(player_doc))
17
16
  end
17
+ result
18
+ end
18
19
 
19
- def build_uri(rank_range = "0-100")
20
- "/en/rankings/singles?rankRange=#{rank_range}"
21
- end
20
+ private
22
21
 
23
- def pickup_ranking_data(ranking_doc)
24
- result = []
25
- search_player_doc(ranking_doc).each do |player_doc|
26
- result.push(pickup_player_data(player_doc))
27
- end
28
- result
29
- end
30
-
31
- def search_player_doc(ranking_doc)
32
- ranking_doc.css(".mega-table tbody tr")
33
- end
22
+ def search_player_doc(ranking_doc)
23
+ ranking_doc.css(".mega-table tbody tr")
24
+ end
34
25
 
35
- def pickup_player_data(player_doc)
36
- url = pickup_player_url(player_doc)
37
- {
38
- ranking: pickup_player_rank(player_doc),
39
- player_name: pickup_player_name(player_doc),
40
- player_url_name: get_url_name(url),
41
- player_id: get_url_id(url)
42
- }
43
- end
26
+ def pickup_player_data(player_doc)
27
+ url = pickup_player_url(player_doc)
28
+ {
29
+ ranking: pickup_player_rank(player_doc),
30
+ player_name: pickup_player_name(player_doc),
31
+ player_url_name: get_url_name(url),
32
+ player_id: get_url_id(url)
33
+ }
34
+ end
44
35
 
45
- def pickup_player_rank(player_doc)
46
- player_doc.css(".rank-cell").first.content.strip
47
- end
36
+ def pickup_player_rank(player_doc)
37
+ player_doc.css(".rank-cell").first.content.strip
38
+ end
48
39
 
49
- def pickup_player_name(player_doc)
50
- player_doc.css(".player-cell").first.content.strip
51
- end
40
+ def pickup_player_name(player_doc)
41
+ player_doc.css(".player-cell").first.content.strip
42
+ end
52
43
 
53
- def pickup_player_url(player_doc)
54
- player_doc.css(".player-cell a").attr("href").value
55
- end
44
+ def pickup_player_url(player_doc)
45
+ player_doc.css(".player-cell a").attr("href").value
46
+ end
56
47
 
57
- # url => "/en/players/rafael-nadal/n409/overview"
58
- def get_url_name(url)
59
- url.split("/")[3]
60
- end
48
+ # url => "/en/players/rafael-nadal/n409/overview"
49
+ def get_url_name(url)
50
+ url.split("/")[3]
51
+ end
61
52
 
62
- def get_url_id(url)
63
- url.split("/")[4]
64
- end
53
+ def get_url_id(url)
54
+ url.split("/")[4]
65
55
  end
66
56
  end
67
57
  end
@@ -1,3 +1,3 @@
1
1
  module AtpScraper
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/atp_scraper.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "atp_scraper/version"
2
+ require "atp_scraper/get"
3
+ require "atp_scraper/html"
2
4
  require "atp_scraper/activity"
3
5
  require "atp_scraper/ranking"
4
- require "atp_scraper/html"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: atp_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mosuke5
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-16 00:00:00.000000000 Z
11
+ date: 2016-02-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -142,6 +142,7 @@ files:
142
142
  - bin/setup
143
143
  - lib/atp_scraper.rb
144
144
  - lib/atp_scraper/activity.rb
145
+ - lib/atp_scraper/get.rb
145
146
  - lib/atp_scraper/html.rb
146
147
  - lib/atp_scraper/ranking.rb
147
148
  - lib/atp_scraper/version.rb