atp_scraper 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f7fe84730985d940e8b413c65f1b33cc375c30ec
4
- data.tar.gz: ef3f2b93d9fa239d98254e0c272ef4cd3cd6d7e4
3
+ metadata.gz: f7d4557a6bb017dd9f6607a7a55a7ffd8d53b219
4
+ data.tar.gz: 1bbae10d70dd2deac57b51c0f86c7b80fda6fc55
5
5
  SHA512:
6
- metadata.gz: b614765df55a42cfdababaf9c798970d868676fff8be8e7d4b8538d87081d9a0800721dd03231aeec982ff3dd2e187dee53951bca0350cc6ca32c4013cb7148e
7
- data.tar.gz: bd0e5db0f6f37e89321c0436b734a59878db052c9ab7b39ef114a4de9ecabc18bf38d2b59737724ff0af651ff684cdef57f7ef9bf7f07fb6eee875a19d4f41a4
6
+ metadata.gz: b71241b3908338ba1d07c73d708bfd2ea9ffe5c7f3226a17243a296a78c9d5a49a01f3d54499b23f26303f5adbeabf4a3b4985578a231e3d486825e8dd2fa920
7
+ data.tar.gz: 8e4854126992fab5cd987e38365c150a4976edfd157da2e3dd516fbbfda32d9f013de39678aa55952661782cf4959e404e7afc7caa07f54b1cb03b2ea5b6cf5b
data/README.md CHANGED
@@ -1,3 +1,4 @@
1
+ [![Gem Version](https://badge.fury.io/rb/atp_scraper.svg)](https://badge.fury.io/rb/atp_scraper)
1
2
  [![Build Status](https://travis-ci.org/mosuke5/atp_scraper.svg?branch=master)](https://travis-ci.org/mosuke5/atp_scraper)
2
3
  # AtpScraper
3
4
  AtpScraper is a tool scraping tennis data from atpworldtour.com
@@ -49,6 +50,7 @@ AtpScraper::Get.player_activity("n409", 2016)
49
50
  # score: "673 64 63 674 26",
50
51
  # win_loss: "L",
51
52
  # tournament_name: "Australian Open",
53
+ # tournament_category: "grandslam",
52
54
  # tournament_location: "Melbourne, Australia",
53
55
  # tournament_start_date: "2016.01.18",
54
56
  # tournament_end_date: "2016.01.31",
@@ -1,7 +1,17 @@
1
1
  module Activities
2
2
  # Activity Record Class
3
3
  class Record
4
- def self.pickup_record(record_doc)
4
+ def initialize(doc)
5
+ @record = doc
6
+ end
7
+
8
+ def info
9
+ pickup_record(@record)
10
+ end
11
+
12
+ private
13
+
14
+ def pickup_record(record_doc)
5
15
  result = {}
6
16
  record_doc.css("td").each_with_index do |td, n|
7
17
  record_content = td.content.strip
@@ -1,5 +1,75 @@
1
1
  module Activities
2
2
  # Activity Tournamnet Class
3
3
  class Tournament
4
+ def initialize(doc)
5
+ @tournament = doc
6
+ end
7
+
8
+ def info
9
+ pickup_info(@tournament)
10
+ end
11
+
12
+ def records
13
+ search_records_doc
14
+ end
15
+
16
+ private
17
+
18
+ def pickup_info(tournament_doc)
19
+ tournament_date = pickup_text(".tourney-dates")
20
+ surface = pickup_surface
21
+ caption = pickup_text(".activity-tournament-caption")
22
+ {
23
+ name: pickup_text(".tourney-title"),
24
+ category: pickup_category,
25
+ location: pickup_text(".tourney-location"),
26
+ date: divide_tournament_date(tournament_date),
27
+ year: tournament_date[0, 4],
28
+ surface: surface[:surface],
29
+ surface_inout: surface[:inout],
30
+ ranking: pickup_player_rank(caption)
31
+ }
32
+ end
33
+
34
+ # Before: String "2011.01.03 - 2011.01.08"
35
+ # After: Hash { start: 2011.01.03, end: 2011.01.08 }
36
+ def divide_tournament_date(date)
37
+ date = date.split('-').map(&:strip)
38
+ { start: date[0], end: date[1] }
39
+ end
40
+
41
+ def pickup_text(selector)
42
+ @tournament.css(selector).first.content.strip
43
+ end
44
+
45
+ def pickup_category
46
+ # ex) /~/media/images/tourtypes/categorystamps_itf_118x64.png?xxxxx
47
+ badge_url = @tournament.css(".tourney-badge-wrapper img").attr("src").value
48
+ badge_url.match(/categorystamps_(.*)_[0-9]*x[0-9]*.png/)[1]
49
+ end
50
+
51
+ def pickup_surface
52
+ surface = @tournament
53
+ .css(".tourney-details")[1]
54
+ .css(".item-details")
55
+ .first.content.gsub(/\t|\s/, "")
56
+ divide_surface(surface)
57
+ end
58
+
59
+ # "OutdoorHard" => { surface: "Hard", inout: "Outdoor" }
60
+ def divide_surface(surface)
61
+ inout = surface.match(/^(Outdoor|Indoor)/)
62
+ return { surface: surface, inout: nil } if inout.nil?
63
+ { surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
64
+ end
65
+
66
+ def pickup_player_rank(tournament_caption)
67
+ rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
68
+ rank[1].strip
69
+ end
70
+
71
+ def search_records_doc
72
+ @tournament.css(".mega-table tbody tr")
73
+ end
4
74
  end
5
75
  end
@@ -12,6 +12,7 @@
12
12
  ###################
13
13
 
14
14
  require 'atp_scraper/activities/record'
15
+ require 'atp_scraper/activities/tournament'
15
16
  module AtpScraper
16
17
  # Scrape activity data
17
18
  class Activity
@@ -23,14 +24,12 @@ module AtpScraper
23
24
 
24
25
  def pickup_activity_data
25
26
  result = []
26
- player = {}
27
27
 
28
28
  search_tournaments_doc(@activity_doc).each do |tournament_doc|
29
- tournament = pickup_tournament_info(tournament_doc)
30
- player[:rank] = pickup_player_rank(tournament[:caption])
31
- search_records_doc(tournament_doc).each do |record_doc|
32
- record = Record.pickup_record(record_doc)
33
- record_hash = create_record(record, player, tournament)
29
+ tournament = Tournament.new(tournament_doc)
30
+ tournament.records.each do |record_doc|
31
+ record = Record.new(record_doc)
32
+ record_hash = create_record(record.info, tournament.info)
34
33
  result.push(record_hash)
35
34
  end
36
35
  end
@@ -43,21 +42,18 @@ module AtpScraper
43
42
  activity_doc.css(".activity-tournament-table")
44
43
  end
45
44
 
46
- def search_records_doc(tournament_doc)
47
- tournament_doc.css(".mega-table tbody tr")
48
- end
49
-
50
- def create_record(record, player, tournament)
45
+ def create_record(record, tournament)
51
46
  {
52
47
  year: tournament[:year],
53
48
  player_name: @player_name,
54
- player_rank: player[:rank],
49
+ player_rank: tournament[:ranking],
55
50
  opponent_name: record[:opponent_name],
56
51
  opponent_rank: record[:opponent_rank],
57
52
  round: record[:round],
58
53
  score: record[:score],
59
54
  win_loss: record[:win_loss],
60
55
  tournament_name: tournament[:name],
56
+ tournament_category: tournament[:category],
61
57
  tournament_location: tournament[:location],
62
58
  tournament_start_date: tournament[:date][:start],
63
59
  tournament_end_date: tournament[:date][:end],
@@ -71,49 +67,5 @@ module AtpScraper
71
67
  .css("meta[property=\"pageTransitionTitle\"]")
72
68
  .attr("content").value
73
69
  end
74
-
75
- def pickup_tournament_info(tournament_doc)
76
- tournament_date = pickup_text(tournament_doc, ".tourney-dates")
77
- surface = pickup_surface(tournament_doc)
78
- {
79
- name: pickup_text(tournament_doc, ".tourney-title"),
80
- location: pickup_text(tournament_doc, ".tourney-location"),
81
- date: divide_tournament_date(tournament_date),
82
- year: tournament_date[0, 4],
83
- caption: pickup_text(tournament_doc, ".activity-tournament-caption"),
84
- surface: surface[:surface],
85
- surface_inout: surface[:inout]
86
- }
87
- end
88
-
89
- def pickup_player_rank(tournament_caption)
90
- rank = tournament_caption.match(/ATP Ranking:(.+), Prize/)
91
- rank[1].strip
92
- end
93
-
94
- # Before: String "2011.01.03 - 2011.01.08"
95
- # After: Hash { start: 2011.01.03, end: 2011.01.08 }
96
- def divide_tournament_date(date)
97
- date = date.split('-').map(&:strip)
98
- { start: date[0], end: date[1] }
99
- end
100
-
101
- def pickup_text(doc, selector)
102
- doc.css(selector).first.content.strip
103
- end
104
-
105
- def pickup_surface(tournament_doc)
106
- surface = tournament_doc
107
- .css(".tourney-details")[1]
108
- .css(".item-details")
109
- .first.content.gsub(/\t|\s/, "")
110
- divide_surface(surface)
111
- end
112
-
113
- def divide_surface(surface)
114
- inout = surface.match(/^(Outdoor|Indoor)/)
115
- return { surface: surface, inout: nil } if inout.nil?
116
- { surface: surface.gsub(/#{inout[0]}/, ''), inout: inout[0] }
117
- end
118
70
  end
119
71
  end
@@ -1,3 +1,3 @@
1
1
  module AtpScraper
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: atp_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - mosuke5
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-28 00:00:00.000000000 Z
11
+ date: 2016-02-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri