atp_scraper 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f7fe84730985d940e8b413c65f1b33cc375c30ec
4
- data.tar.gz: ef3f2b93d9fa239d98254e0c272ef4cd3cd6d7e4
3
+ metadata.gz: f7d4557a6bb017dd9f6607a7a55a7ffd8d53b219
4
+ data.tar.gz: 1bbae10d70dd2deac57b51c0f86c7b80fda6fc55
5
5
  SHA512:
6
- metadata.gz: b614765df55a42cfdababaf9c798970d868676fff8be8e7d4b8538d87081d9a0800721dd03231aeec982ff3dd2e187dee53951bca0350cc6ca32c4013cb7148e
7
- data.tar.gz: bd0e5db0f6f37e89321c0436b734a59878db052c9ab7b39ef114a4de9ecabc18bf38d2b59737724ff0af651ff684cdef57f7ef9bf7f07fb6eee875a19d4f41a4
6
+ metadata.gz: b71241b3908338ba1d07c73d708bfd2ea9ffe5c7f3226a17243a296a78c9d5a49a01f3d54499b23f26303f5adbeabf4a3b4985578a231e3d486825e8dd2fa920
7
+ data.tar.gz: 8e4854126992fab5cd987e38365c150a4976edfd157da2e3dd516fbbfda32d9f013de39678aa55952661782cf4959e404e7afc7caa07f54b1cb03b2ea5b6cf5b
data/README.md CHANGED
@@ -1,3 +1,4 @@
1
+ [![Gem Version](https://badge.fury.io/rb/atp_scraper.svg)](https://badge.fury.io/rb/atp_scraper)
1
2
  [![Build Status](https://travis-ci.org/mosuke5/atp_scraper.svg?branch=master)](https://travis-ci.org/mosuke5/atp_scraper)
2
3
  # AtpScraper
3
4
  AtpScraper is a tool scraping tennis data from atpworldtour.com
@@ -49,6 +50,7 @@ AtpScraper::Get.player_activity("n409", 2016)
49
50
  # score: "673 64 63 674 26",
50
51
  # win_loss: "L",
51
52
  # tournament_name: "Australian Open",
53
+ # tournament_category: "grandslam",
52
54
  # tournament_location: "Melbourne, Australia",
53
55
  # tournament_start_date: "2016.01.18",
54
56
  # tournament_end_date: "2016.01.31",
@@ -1,7 +1,17 @@
1
1
  module Activities
2
2
  # Activity Record Class
3
3
  class Record
4
- def self.pickup_record(record_doc)
4
+ def initialize(doc)
5
+ @record = doc
6
+ end
7
+
8
+ def info
9
+ pickup_record(@record)
10
+ end
11
+
12
+ private
13
+
14
+ def pickup_record(record_doc)
5
15
  result = {}
6
16
  record_doc.css("td").each_with_index do |td, n|
7
17
  record_content = td.content.strip
@@ -1,5 +1,75 @@
1
1
  module Activities
2
2
  # Activity Tournamnet Class
3
3
  class Tournament
4
+ def initialize(doc)
5
+ @tournament = doc
6
+ end
7
+
8
+ def info
9
+ pickup_info(@tournament)
10
+ end
11
+
12
+ def records
13
+ search_records_doc
14
+ end
15
+
16
+ private
17
+
18
+ def pickup_info(tournament_doc)
19
+ tournament_date = pickup_text(".tourney-dates")
20
+ surface = pickup_surface
21
+ caption = pickup_text(".activity-tournament-caption")
22
+ {
23
+ name: pickup_text(".tourney-title"),
24
+ category: pickup_category,
25
+ location: pickup_text(".tourney-location"),
26
+ date: divide_tournament_date(tournament_date),
27
+ year: tournament_date[0, 4],
28
+ surface: surface[:surface],
29
+ surface_inout: surface[:inout],
30
+ ranking: pickup_player_rank(caption)
31
+ }
32
+ end
33
+
34
+ # Before: String "2011.01.03 - 2011.01.08"
35
+ # After: Hash { start: 2011.01.03, end: 2011.01.08 }
36
+ def divide_tournament_date(date)
37
+ date = date.split('-').map(&:strip)
38
+ { start: date[0], end: date[1] }
39
+ end
40
+
41
+ def pickup_text(selector)
42
+ @tournament.css(selector).first.content.strip
43
+ end
44
+
45
+ def pickup_category
46
+ # ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
47
+ badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
48
+ badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
49
+ end
50
+
51
+ def pickup_surface
52
+ surface = @tournament
53
+ .css(".tourney-details")[1]
54
+ .css(".item-details")
55
+ .first.content.gsub(/\t|\s/, "")
56
+ divide_surface(surface)
57
+ end
58
+
59
+ # "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
60
+ def divide_surface(surface)
61
+ inout = surface.match(/^(Outdoor|Indoor)/)
62
+ return { surface: surface, inout: nil } if inout.nil?
63
+ { surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
64
+ end
65
+
66
+ def pickup_player_rank(tournament_caption)
67
+ rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
68
+ rank[1].strip
69
+ end
70
+
71
+ def search_records_doc
72
+ @tournament.css(".mega-table tbody tr")
73
+ end
4
74
  end
5
75
  end
@@ -12,6 +12,7 @@
12
12
  ###################
13
13
 
14
14
  require 'atp_scraper/activities/record'
15
+ require 'atp_scraper/activities/tournament'
15
16
  module AtpScraper
16
17
  # Scrape activity data
17
18
  class Activity
@@ -23,14 +24,12 @@ module AtpScraper
23
24
 
24
25
  def pickup_activity_data
25
26
  result = []
26
- player = {}
27
27
 
28
28
  search_tournaments_doc(@activity_doc).each do |tournament_doc|
29
- tournament = pickup_tournament_info(tournament_doc)
30
- player[:rank] = pickup_player_rank(tournament[:caption])
31
- search_records_doc(tournament_doc).each do |record_doc|
32
- record = Record.pickup_record(record_doc)
33
- record_hash = create_record(record, player, tournament)
29
+ tournament = Tournament.new(tournament_doc)
30
+ tournament.records.each do |record_doc|
31
+ record = Record.new(record_doc)
32
+ record_hash = create_record(record.info, tournament.info)
34
33
  result.push(record_hash)
35
34
  end
36
35
  end
@@ -43,21 +42,18 @@ module AtpScraper
43
42
  activity_doc.css(".activity-tournament-table")
44
43
  end
45
44
 
46
- def search_records_doc(tournament_doc)
47
- tournament_doc.css(".mega-table tbody tr")
48
- end
49
-
50
- def create_record(record, player, tournament)
45
+ def create_record(record, tournament)
51
46
  {
52
47
  year: tournament[:year],
53
48
  player_name: @player_name,
54
- player_rank: player[:rank],
49
+ player_rank: tournament[:ranking],
55
50
  opponent_name: record[:opponent_name],
56
51
  opponent_rank: record[:opponent_rank],
57
52
  round: record[:round],
58
53
  score: record[:score],
59
54
  win_loss: record[:win_loss],
60
55
  tournament_name: tournament[:name],
56
+ tournament_category: tournament[:category],
61
57
  tournament_location: tournament[:location],
62
58
  tournament_start_date: tournament[:date][:start],
63
59
  tournament_end_date: tournament[:date][:end],
@@ -71,49 +67,5 @@ module AtpScraper
71
67
  .css("meta[property=\"pageTransitionTitle\"]")
72
68
  .attr("content").value
73
69
  end
74
-
75
- def pickup_tournament_info(tournament_doc)
76
- tournament_date = pickup_text(tournament_doc, ".tourney-dates")
77
- surface = pickup_surface(tournament_doc)
78
- {
79
- name: pickup_text(tournament_doc, ".tourney-title"),
80
- location: pickup_text(tournament_doc, ".tourney-location"),
81
- date: divide_tournament_date(tournament_date),
82
- year: tournament_date[0, 4],
83
- caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
84
- surface: surface[:surface],
85
- surface_inout: surface[:inout]
86
- }
87
- end
88
-
89
- def pickup_player_rank(tournament_caption)
90
- rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
91
- rank[1].strip
92
- end
93
-
94
- # Before: String "2011.01.03 - 2011.01.08"
95
- # After: Hash { start: 2011.01.03, end: 2011.01.08 }
96
- def divide_tournament_date(date)
97
- date = date.split('-').map(&:strip)
98
- { start: date[0], end: date[1] }
99
- end
100
-
101
- def pickup_text(doc, selector)
102
- doc.css(selector).first.content.strip
103
- end
104
-
105
- def pickup_surface(tournament_doc)
106
- surface = tournament_doc
107
- .css(".tourney-details")[1]
108
- .css(".item-details")
109
- .first.content.gsub(/\t|\s/, "")
110
- divide_surface(surface)
111
- end
112
-
113
- def divide_surface(surface)
114
- inout = surface.match(/^(Outdoor|Indoor)/)
115
- return { surface: surface, inout: nil } if inout.nil?
116
- { surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
117
- end
118
70
  end
119
71
  end
@@ -1,3 +1,3 @@
1
1
  module AtpScraper
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: atp_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mosuke5
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-28 00:00:00.000000000 Z
11
+ date: 2016-02-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri