transfermarkt 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.byebug_history +46 -0
- data/.rspec +2 -0
- data/lib/transfermarkt/club.rb +7 -9
- data/lib/transfermarkt/league.rb +70 -21
- data/lib/transfermarkt/live_game.rb +26 -0
- data/lib/transfermarkt/player.rb +141 -62
- data/lib/transfermarkt/version.rb +1 -1
- data/lib/transfermarkt.rb +5 -2
- data/spec/spec_helper.rb +8 -0
- data/spec/static_htmls/cup.html +1572 -0
- data/spec/static_htmls/jose_antonio_reyes_player_page.html +96 -0
- data/spec/static_htmls/maccabi_haifa_page.html +124 -0
- data/spec/static_htmls/messi_player_page.html +2404 -0
- data/spec/static_htmls/premier_league_html.html +3694 -0
- data/spec/units/club_spec.rb +20 -0
- data/spec/units/league_spec.rb +54 -0
- data/spec/units/player_spec.rb +39 -0
- data/spec/units/transfermarkt_spec.rb +11 -0
- data/transfermarkt.gemspec +5 -0
- metadata +108 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652ee707a1c68398d1a1c5e7fa225efcb5121e62
|
4
|
+
data.tar.gz: 2ab4ee752870bff4cd27e6302a2664fe4303e6c6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a17533a019da46374560802bd3498aa5df94273f560c7c8f705c25f24db8c72d66ae882f2539e41b694cf04babf8f52fc9b514e27e69484b43dda961199a28c
|
7
|
+
data.tar.gz: e3d9d192737972180d2d9a203566fa18d105636985a96b42401439652d27a0ff1592ad10c8cbf418c49d93cd4d70f3464bbeb0b6335aea6c5f31b63d5dfc7dac
|
data/.byebug_history
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
c
|
2
|
+
profile_html.css("table.auflistung").search("tr")[1].text
|
3
|
+
profile_html.css("table.auflistung").search("tr").first.text
|
4
|
+
profile_html.css("table.auflistung").search("tr").first
|
5
|
+
profile_html.css("table.auflistung").search("tr")
|
6
|
+
profile_html.css("table.auflistung").search
|
7
|
+
profile_html.css("table.auflistung")
|
8
|
+
profile_html.css("table.auflistung > tbody")
|
9
|
+
profile_html.css("table.auflistung > tbody:nth-child(2)")
|
10
|
+
profile_html.css("table.auflistung > tbody:nth-child(0)")
|
11
|
+
profile_html.css("table.auflistung > tbody:nth-child(1)")
|
12
|
+
profile_html.css("table.auflistung > tbody")
|
13
|
+
profile_html.css("table.auflistung tbody")
|
14
|
+
profile_html.css("table.auflistung")
|
15
|
+
player_info.find("tr")
|
16
|
+
player_info.find("tr").first
|
17
|
+
player_info.find("tr")
|
18
|
+
player_info
|
19
|
+
exit
|
20
|
+
options
|
21
|
+
c
|
22
|
+
profile_html.css("table.auflistung")
|
23
|
+
profile_html.css("div.dataDaten:nth-child(2) > p:nth-child(2) > span:nth-child(2)").text.strip
|
24
|
+
profile_html.css("div.dataDaten:nth-child(2) > p:nth-child(2) > span:nth-child(2)").text
|
25
|
+
profile_html.css("div.dataDaten:nth-child(2) > p:nth-child(2) > span:nth-child(2)")
|
26
|
+
profile_html.css("table.auflistung > tbody:nth-child(1) > tr:nth-child(4) > td:nth-child(2)")
|
27
|
+
profile_html.css("table.auflistung > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2)")
|
28
|
+
profile_html.css("table.auflistung > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2)").text
|
29
|
+
profile.html.css("table.auflistung > tbody:nth-child(1) > tr:nth-child(1) > td:nth-child(2)").text
|
30
|
+
profile_html.css("div.dataDaten:nth-child(2) > p:nth-child(4) > span:nth-child(2) > a:nth-child(1)").text
|
31
|
+
profile_html.css(".dataMarktwert > a:nth-child(1)").text.split(" ").first
|
32
|
+
profile_html.css(".dataMarktwert > a:nth-child(1)").text
|
33
|
+
profile_html.css(".dataMarktwert > a:nth-child(1)")
|
34
|
+
profile_html.css(".dataBild > img:nth-child(1)").first["src"]
|
35
|
+
profile_html.css(".dataBild > img:nth-child(1)").first
|
36
|
+
profile_html.css(".dataBild > img:nth-child(1)")
|
37
|
+
profile_html.css(".dataName > h1:nth-child(2)").text
|
38
|
+
profile_html.css(".dataName > h1:nth-child(2)")
|
39
|
+
profile_html.css(".hauptpunkt > a:nth-child(1)").text
|
40
|
+
profile_html.css(".hauptpunkt > a:nth-child(1)")
|
41
|
+
profile_html = Nokogiri::HTML(req.parsed_response)
|
42
|
+
Hash.from_xml(req.parsed_response)
|
43
|
+
exit
|
44
|
+
profile_html
|
45
|
+
profile_html.xpath
|
46
|
+
club
|
data/.rspec
ADDED
data/lib/transfermarkt/club.rb
CHANGED
@@ -7,26 +7,24 @@ module Transfermarkt
|
|
7
7
|
:player_uris
|
8
8
|
|
9
9
|
def self.fetch_by_club_uri(club_uri, fetch_players = false)
|
10
|
-
|
11
|
-
|
12
|
-
req = self.get("/#{club_uri}", headers: {"User-Agent" => Transfermarkt::USER_AGENT})
|
10
|
+
req = self.get("/#{club_uri}", headers: {"User-Agent" => UserAgents.rand()})
|
13
11
|
if req.code != 200
|
14
12
|
nil
|
15
13
|
else
|
16
14
|
club_html = Nokogiri::HTML(req.parsed_response)
|
17
15
|
options = {}
|
16
|
+
puts "**** parsing club #{club_uri}"
|
18
17
|
|
19
18
|
options[:club_uri] = club_uri
|
20
|
-
options[:name] = club_html.xpath('//*[@
|
21
|
-
options[:country] = club_html.xpath('//*[@id="
|
22
|
-
options[:player_uris] = club_html.xpath('
|
19
|
+
options[:name] = club_html.xpath('//*[@class="spielername-profil"]').text.strip
|
20
|
+
options[:country] = club_html.xpath('//*[@id="land_select_breadcrumb"]//option[@selected="selected"]').text.strip
|
21
|
+
options[:player_uris] = club_html.xpath('//*[@id="yw1"]//table//tr//td[2]//a[contains(@href,"profil")]').collect{|player_html| player_html["href"]}
|
23
22
|
|
24
|
-
puts "found #{options[:player_uris].count} players"
|
25
23
|
options[:players] = []
|
26
24
|
|
27
25
|
if fetch_players
|
28
26
|
options[:player_uris].each do |player_uri|
|
29
|
-
options[:players] << Transfermarkt::Player.fetch_by_profile_uri(player_uri)
|
27
|
+
options[:players] << Transfermarkt::Player.fetch_by_profile_uri(URI.encode(player_uri))
|
30
28
|
end
|
31
29
|
end
|
32
30
|
|
@@ -36,4 +34,4 @@ module Transfermarkt
|
|
36
34
|
end
|
37
35
|
end
|
38
36
|
end
|
39
|
-
end
|
37
|
+
end
|
data/lib/transfermarkt/league.rb
CHANGED
@@ -4,27 +4,44 @@ module Transfermarkt
|
|
4
4
|
:country,
|
5
5
|
:league_uri,
|
6
6
|
:clubs,
|
7
|
-
:clubs_index
|
7
|
+
:clubs_index,
|
8
8
|
:club_uris
|
9
9
|
|
10
|
+
def valid_league?
|
11
|
+
if name.nil? or name.empty?
|
12
|
+
false
|
13
|
+
else
|
14
|
+
true
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
10
18
|
def self.fetch_clubs_and_uris_by_league_uri(league_uri)
|
11
|
-
req = self.get("/#{league_uri}", headers: {"User-Agent" =>
|
19
|
+
req = self.get("/#{league_uri}", headers: {"User-Agent" => ::UserAgents.rand()})
|
12
20
|
if req.code != 200
|
13
|
-
|
21
|
+
raise req.code.to_s
|
14
22
|
else
|
15
23
|
league_html = Nokogiri::HTML(req.parsed_response)
|
16
24
|
options = {}
|
17
|
-
|
25
|
+
puts "**** Parsing league #{league_uri}"
|
18
26
|
options[:league_uri] = league_uri
|
19
|
-
|
20
|
-
|
27
|
+
unless league_html.xpath('//table[@class="profilheader"]//tr[1]//th[1]')[0].text == "Type of cup:"
|
28
|
+
league_name = league_html.xpath('//select[@id="wettbewerb_select_breadcrumb"]//option[@selected="selected"]')
|
29
|
+
if league_name.empty?
|
30
|
+
options[:name] = league_html.xpath('//div[@class="spielername-profil"]').text.strip
|
31
|
+
options[:country] = league_html.xpath('//table[@class="profilheader"]//img/@title').first.value
|
32
|
+
else
|
33
|
+
options[:name] = league_name[0].text
|
34
|
+
options[:country] = league_html.xpath('//select[@id="land_select_breadcrumb"]//option[@selected="selected"]').text
|
35
|
+
end
|
36
|
+
club_uris = league_html.xpath('//*[@id="yw1"]//table//tr//td[2]//a[1]').collect{|player_html| player_html["href"]}
|
37
|
+
club_names = league_html.xpath('//*[@id="yw1"]//table//tr//td[2]//a[1]').collect{|player_html| player_html.text }
|
38
|
+
|
39
|
+
clubs = Hash[club_names.zip(club_uris)]
|
21
40
|
|
22
|
-
|
23
|
-
club_names = league_html.xpath('//table[@id="vereine"]//tr//td[2]//a[@class="s10"]').collect{|player_html| player_html.text }
|
41
|
+
options[:clubs_index] = clubs
|
24
42
|
|
25
|
-
|
26
|
-
|
27
|
-
options[:clubs_index] = clubs
|
43
|
+
puts "**** Finish parsing #{league_uri}"
|
44
|
+
end
|
28
45
|
self.new(options)
|
29
46
|
end
|
30
47
|
end
|
@@ -32,7 +49,7 @@ module Transfermarkt
|
|
32
49
|
def self.fetch_by_league_uri(league_uri, fetch_clubs = false)
|
33
50
|
puts "fetching league #{league_uri}"
|
34
51
|
|
35
|
-
req = self.get("/#{league_uri}", headers: {"User-Agent" =>
|
52
|
+
req = self.get("/#{league_uri}", headers: {"User-Agent" => Useragents.rand()})
|
36
53
|
if req.code != 200
|
37
54
|
nil
|
38
55
|
else
|
@@ -40,10 +57,10 @@ module Transfermarkt
|
|
40
57
|
options = {}
|
41
58
|
|
42
59
|
options[:league_uri] = league_uri
|
43
|
-
options[:name] = league_html.xpath('
|
44
|
-
options[:country] = league_html.xpath('
|
60
|
+
options[:name] = league_html.xpath('//select[@id="wettbewerb_select_breadcrumb"]//option[@selected="selected"]')[0].text
|
61
|
+
options[:country] = league_html.xpath('//select[@id="land_select_breadcrumb"]//option[@selected="selected"]').text
|
45
62
|
|
46
|
-
options[:club_uris] = league_html.xpath('
|
63
|
+
options[:club_uris] = league_html.xpath('//*[@id="yw1"]//table//tr//td[2]//a[1]').collect{|player_html| player_html["href"]}
|
47
64
|
|
48
65
|
puts "Found #{options[:club_uris].count} clubs"
|
49
66
|
options[:clubs] = []
|
@@ -61,14 +78,46 @@ module Transfermarkt
|
|
61
78
|
end
|
62
79
|
|
63
80
|
def self.fetch_league_uris
|
64
|
-
|
65
|
-
|
81
|
+
competition_uris = ["/wettbewerbe/europa",
|
82
|
+
"/wettbewerbe/asien",
|
83
|
+
"/wettbewerbe/amerika",
|
84
|
+
"/wettbewerbe/afrika"]
|
85
|
+
|
86
|
+
all_leagues = []
|
87
|
+
competition_uris.each do |competition_uri|
|
88
|
+
all_leagues << Transfermarkt::League.fetch_competition_leagues(competition_uri)
|
89
|
+
end
|
90
|
+
|
91
|
+
all_leagues.flatten
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.fetch_competition_leagues(competition_uri)
|
95
|
+
puts "Fetching #{competition_uri}"
|
96
|
+
req = self.get(competition_uri, headers: {"User-Agent" => UserAgents.rand()})
|
97
|
+
league_uris = []
|
66
98
|
if req.code != 200
|
67
|
-
|
99
|
+
[]
|
68
100
|
else
|
69
|
-
|
70
|
-
league_uris
|
101
|
+
competition_html = Nokogiri::HTML(req.parsed_response)
|
102
|
+
league_uris << competition_html.xpath('//*[@id="yw1"]//table[@class="items"]//tr//td[2]//a').collect {|league| league["href"] }
|
103
|
+
|
104
|
+
next_page_link = competition_html.xpath('//*[@id="yw2"]//li[@class="naechste-seite"]//a')[0]
|
105
|
+
if next_page_link
|
106
|
+
link = next_page_link["href"].split("?").first
|
107
|
+
|
108
|
+
page = next_page_link["href"].scan(/page=(\d)/).flatten.first
|
109
|
+
league_uris << Transfermarkt::League.fetch_competition_leagues(link + "?page=#{page}")
|
110
|
+
else
|
111
|
+
league_uris.flatten
|
112
|
+
end
|
113
|
+
|
114
|
+
league_uris.flatten
|
71
115
|
end
|
72
116
|
end
|
73
117
|
end
|
74
|
-
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Transfermarkt
|
2
|
+
class LiveGame
|
3
|
+
include HTTParty
|
4
|
+
|
5
|
+
URL = "http://www.transfermarkt.com/en/livescores-und-livetabellen/uebersicht/livescores.html"
|
6
|
+
|
7
|
+
def self.fetch
|
8
|
+
req = self.get(URL, headers: {"User-Agent" => UserAgents.rand()})
|
9
|
+
if req.code != 200
|
10
|
+
nil
|
11
|
+
else
|
12
|
+
live_html = Nokogiri::HTML(req.parsed_response)
|
13
|
+
home_teams = live_html.xpath('//*[@id="centerbig"]//form//div[2]//table//tr//td[4]/a').collect {|a| a["href"]}
|
14
|
+
results = live_html.xpath('//*[@id="centerbig"]//form//div[2]//table//tr//td[6]').collect(&:text).collect(&:strip)
|
15
|
+
away_teams = live_html.xpath('//*[@id="centerbig"]//form//div[2]//table//tr//td[8]/a').collect {|a| a["href"]}
|
16
|
+
|
17
|
+
result_set = []
|
18
|
+
home_teams.each_with_index do |home_team, index|
|
19
|
+
result_set << {home: home_team, result: results[index], away: away_teams[index]}
|
20
|
+
end
|
21
|
+
|
22
|
+
result_set
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/transfermarkt/player.rb
CHANGED
@@ -2,117 +2,196 @@ module Transfermarkt
|
|
2
2
|
class Player < Transfermarkt::EntityBase
|
3
3
|
attr_accessor :profile_uri,
|
4
4
|
:age,
|
5
|
-
:date_of_birth,
|
5
|
+
:date_of_birth,
|
6
6
|
:full_name,
|
7
7
|
:name_in_native_country,
|
8
|
+
:complete_name,
|
8
9
|
:foot,
|
9
10
|
:height,
|
10
11
|
:picture,
|
11
12
|
:club,
|
12
|
-
:market_value,
|
13
|
-
:nationality,
|
13
|
+
:market_value,
|
14
|
+
:nationality,
|
14
15
|
:position,
|
15
16
|
:performance_data,
|
16
|
-
:injuries_data
|
17
|
+
:injuries_data,
|
18
|
+
:player_agent
|
17
19
|
|
18
20
|
def initialize(options = {})
|
19
21
|
super
|
20
|
-
|
22
|
+
|
23
|
+
encoding_options = {
|
24
|
+
:invalid => :replace, # Replace invalid byte sequences
|
25
|
+
:undef => :replace, # Replace anything not defined in ASCII
|
26
|
+
:replace => '', # Use a blank for those replacements
|
27
|
+
:UNIVERSAL_NEWLINE_DECORATOR => true # Always break lines with \n
|
28
|
+
}
|
29
|
+
self.age = self.age.to_i
|
30
|
+
self.market_value = #self.market_value.to_s.gsub(",", "").to_i
|
21
31
|
self.height = self.height.to_s.gsub(",", "").to_i
|
32
|
+
self.nationality = self.nationality.to_s.encode(Encoding.find('ASCII'), encoding_options).split("\n").collect(&:strip)
|
33
|
+
if self.date_of_birth.present?
|
34
|
+
self.age = Date.today.year - Date.parse(self.date_of_birth).year
|
35
|
+
end
|
22
36
|
end
|
23
37
|
|
24
|
-
def
|
25
|
-
|
38
|
+
def valid_player?
|
39
|
+
if club.nil? or club.empty?
|
40
|
+
false
|
41
|
+
else
|
42
|
+
true
|
43
|
+
end
|
44
|
+
end
|
26
45
|
|
27
|
-
|
46
|
+
def self.fetch_by_profile_uri(profile_uri = "")
|
47
|
+
req = self.get("/#{profile_uri}", headers: {"User-Agent" => UserAgents.rand()})
|
28
48
|
if req.code != 200
|
29
49
|
nil
|
30
50
|
else
|
51
|
+
#byebug
|
31
52
|
profile_html = Nokogiri::HTML(req.parsed_response)
|
32
53
|
options = {}
|
33
54
|
|
34
|
-
|
35
|
-
options[:club] = profile_html.xpath('//*[@id="centerbig"]//div[1]//div//table//tr[2]//td//a[1]').text
|
36
|
-
options[:full_name] = profile_html.xpath('//*[@id="centerbig"]//div[1]//div//table//tr[1]//td[2]//h1').text.gsub(/[\d]/, "").strip
|
37
|
-
options[:picture] = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[1]//img')[1]["src"]
|
38
|
-
|
39
|
-
headers = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[2]//table//tr//td[1]').collect(&:text)
|
40
|
-
headers = headers.collect {|header| header.downcase.gsub(":", "").gsub(" ", "_").gsub("'s", "").to_sym}
|
41
|
-
|
42
|
-
values = profile_html.xpath('//*[@id="centerbig"]//div[1]//table//tr//td[2]//table//tr//td[2]').collect(&:text)
|
43
|
-
values = values.collect {|value| value.strip.match(/[A-Za-z0-9,. -]*/)[0] }
|
44
|
-
|
45
|
-
# get player performance
|
46
|
-
options[:performance_data] = {}
|
47
|
-
|
48
|
-
performance_uri = profile_uri.gsub("profil", "leistungsdaten")
|
55
|
+
puts "**** Parsing player #{profile_uri}"
|
49
56
|
|
50
|
-
options =
|
51
|
-
|
52
|
-
# If there is a performance data blcok
|
53
|
-
if profile_html.xpath('//*[@id="centerbig"]/div[4]/p[3]/a').any?
|
54
|
-
|
55
|
-
perforamnce_types = []
|
56
|
-
10.times do |i|
|
57
|
-
perforamnce_types << (Time.now.year - i).to_s
|
58
|
-
end
|
57
|
+
options[:profile_uri] = profile_uri
|
59
58
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
# //*[@id="main"]/div[7]/div/div/div[2]/div[2]/div[2]/table/tbody/tr[2]/td/a
|
60
|
+
club = profile_html.css(".hauptpunkt > a:nth-child(1)").text
|
61
|
+
|
62
|
+
unless club.empty?
|
63
|
+
options[:club] = profile_html.css(".hauptpunkt > a:nth-child(1)").text
|
64
|
+
|
65
|
+
# options[:position] = profile_html.xpath('//*[@id="main"]//div[7]//table[1]//tr[3]//td[1]')[1].text.strip
|
66
|
+
options[:full_name] = profile_html.css(".dataName > h1:nth-child(2)").text
|
67
|
+
|
68
|
+
options[:picture] = "https:" + profile_html.css(".dataBild > img:nth-child(1)").first["src"]
|
69
|
+
#profile_html.xpath('//*[@id="main"]//div[7]//div//div//div[2]//div[1]//img')[0]["src"]
|
70
|
+
|
71
|
+
# options[:name_in_native_country] = profile_html.xpath('//*[@id="main"]//div[9]//div[1]//div[2]//div[2]//div[1]//div//table//tr[1]//td[1]')[0].text
|
72
|
+
|
73
|
+
options[:market_value] = profile_html.css(".dataMarktwert > a:nth-child(1)").text.split(" ").first
|
74
|
+
|
75
|
+
agent = profile_html.css("div.dataDaten:nth-child(2) > p:nth-child(4) > span:nth-child(2) > a:nth-child(1)").text
|
76
|
+
|
77
|
+
# if options[:market_value].include?("Mil")
|
78
|
+
# options[:market_value] = options[:market_value].to_f * 1_000_000
|
79
|
+
# else
|
80
|
+
# options[:market_value] = options[:market_value].to_f * 100_000
|
81
|
+
# end
|
82
|
+
|
83
|
+
options[:name_in_native_country] = options[:full_name]
|
84
|
+
options[:complete_name] = options[:full_name]
|
85
|
+
|
86
|
+
player_info = profile_html.css("table.auflistung").search("tr")
|
87
|
+
|
88
|
+
player_info.each do |info_row|
|
89
|
+
header = info_row.search('th')[0].text.strip
|
90
|
+
if header == "Name in home country:"
|
91
|
+
options[:name_in_native_country] = info_row.search('td')[0].text.strip
|
92
|
+
puts options
|
93
|
+
elsif header == "Date of birth:"
|
94
|
+
options[:date_of_birth] = info_row.search('td')[0].text.strip
|
95
|
+
puts options
|
96
|
+
elsif header == "Place of birth:"
|
97
|
+
options[:place_of_birth] = info_row.search('td')[0].text.strip
|
98
|
+
puts options
|
99
|
+
# elsif header == "Age:"
|
100
|
+
# options[:age] = info_row.search('td')[0].text.strip
|
101
|
+
# puts options
|
102
|
+
elsif header == "Height:"
|
103
|
+
options[:height] = info_row.search('td')[0].text.strip
|
104
|
+
puts options
|
105
|
+
elsif header == "Nationality:"
|
106
|
+
options[:nationality] = info_row.search('td')[0].text.strip
|
107
|
+
puts options
|
108
|
+
elsif header == "Position:"
|
109
|
+
options[:position] = info_row.search('td')[0].text.strip
|
110
|
+
puts options
|
111
|
+
elsif header == "Foot:"
|
112
|
+
options[:foot] = info_row.search('td')[0].text.strip
|
113
|
+
puts options
|
114
|
+
elsif header == "Complete name:"
|
115
|
+
options[:complete_name] = info_row.search('td')[0].text.strip
|
116
|
+
puts options
|
66
117
|
end
|
67
|
-
|
68
|
-
goalkeeper = options[:position] == "Goalkeeper"
|
69
|
-
options[:performance_data][type] = self.fetch_performance_data(performance_with_type_uri, goalkeeper)
|
70
118
|
end
|
71
|
-
end
|
72
|
-
|
73
|
-
options[:injuries_data] = self.fetch_injuries_data(profile_html)
|
74
119
|
|
75
|
-
|
120
|
+
# get player performance
|
121
|
+
# options[:performance_data] = {}
|
122
|
+
#
|
123
|
+
# performance_uri = profile_uri.gsub("profil", "leistungsdaten") + "/saison/"
|
124
|
+
#
|
125
|
+
# years = (Time.now.year - 6..Time.now.year).to_a
|
126
|
+
# years.each do |year|
|
127
|
+
# goalkeeper = options[:position] == "Goalkeeper"
|
128
|
+
# options[:performance_data][year.to_s] = self.fetch_performance_data(performance_uri + year.to_s, goalkeeper)
|
129
|
+
# end
|
130
|
+
#
|
131
|
+
# # Get injury data
|
132
|
+
#
|
133
|
+
# injury_uri = profile_uri.gsub("profil", "verletzungen")
|
134
|
+
#
|
135
|
+
# options[:injuries_data] = self.fetch_injuries_data(injury_uri)
|
136
|
+
end
|
76
137
|
|
77
138
|
self.new(options)
|
78
139
|
end
|
79
140
|
end
|
80
141
|
private
|
81
142
|
def self.fetch_performance_data(performance_uri, is_goalkeeper = false)
|
82
|
-
req = self.get("/#{performance_uri}", headers: {"User-Agent" =>
|
143
|
+
req = self.get("/#{performance_uri}", headers: {"User-Agent" => UserAgents.rand()})
|
83
144
|
if req.code != 200
|
84
145
|
nil
|
85
146
|
else
|
86
147
|
performance_data = []
|
87
148
|
performance_html = Nokogiri::HTML(req.parsed_response)
|
88
149
|
performance_headers = if is_goalkeeper
|
89
|
-
[:competition, :
|
150
|
+
[:competition, :blank, :appearances, :goals, :yellow_cards, :second_yellows, :red_cards, :goals_conceded, :games_without_conceded_goals, :minutes]
|
90
151
|
else
|
91
|
-
[:competition, :
|
152
|
+
[:competition, :blank, :appearances, :goals, :assists, :yellow_cards, :second_yellows, :red_cards, :minutes]
|
92
153
|
end
|
93
|
-
|
94
|
-
|
154
|
+
# performance_html.xpath('//*[@id="yw2"]//table//tbody//tr[position()>0]').each do |competition|
|
155
|
+
# values = Nokogiri::HTML::DocumentFragment.parse(competition.to_html).search("*//td").collect(&:text)
|
156
|
+
# if values.first == ""
|
157
|
+
# values.delete_at 0
|
158
|
+
# end
|
159
|
+
# competition_performance = Hash[performance_headers.zip(values)]
|
160
|
+
# competition_performance[:minutes] = competition_performance[:minutes].gsub(".", "").to_i
|
161
|
+
# performance_data << competition_performance
|
162
|
+
# end
|
163
|
+
performance_html.xpath('//*[@id="yw2"]//table//tfoot//tr[position()>0]').each do |competition|
|
95
164
|
values = Nokogiri::HTML::DocumentFragment.parse(competition.to_html).search("*//td").collect(&:text)
|
96
165
|
if values.first == ""
|
97
166
|
values.delete_at 0
|
98
167
|
end
|
99
|
-
|
168
|
+
competition_performance = Hash[performance_headers.zip(values)]
|
169
|
+
competition_performance[:minutes] = competition_performance[:minutes].gsub(".", "").to_i
|
170
|
+
competition_performance.delete(:blank)
|
171
|
+
performance_data << competition_performance
|
100
172
|
end
|
101
|
-
performance_data
|
102
173
|
end
|
103
|
-
|
104
174
|
return performance_data
|
105
175
|
end
|
106
176
|
|
107
|
-
def self.fetch_injuries_data(
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
177
|
+
def self.fetch_injuries_data(injury_uri)
|
178
|
+
req = self.get("/#{injury_uri}", headers: {"User-Agent" => UserAgents.rand()})
|
179
|
+
if req.code != 200
|
180
|
+
[]
|
181
|
+
else
|
182
|
+
injury_data = []
|
183
|
+
player_html = Nokogiri::HTML(req.parsed_response)
|
184
|
+
injuries_headers = [:season, :injury, :from, :to, :days_out, :games_missed]
|
185
|
+
|
186
|
+
player_html.xpath('//*[@id="yw1"]//table//tr[position()>1]').each do |injury_row|
|
187
|
+
values = Nokogiri::HTML::DocumentFragment.parse(injury_row.to_html).search("*//td").collect(&:text)
|
188
|
+
injury_details = Hash[injuries_headers.zip(values)]
|
189
|
+
injury_details[:days_out] = injury_details[:days_out].strip.to_i
|
190
|
+
injury_details[:games_missed] = injury_details[:games_missed].strip.to_i
|
191
|
+
injury_data << injury_details
|
192
|
+
end
|
193
|
+
injury_data
|
114
194
|
end
|
115
|
-
injury_data
|
116
195
|
end
|
117
196
|
end
|
118
|
-
end
|
197
|
+
end
|
data/lib/transfermarkt.rb
CHANGED
@@ -1,18 +1,21 @@
|
|
1
1
|
require "transfermarkt/version"
|
2
|
-
|
2
|
+
require 'byebug'
|
3
|
+
require 'active_support/core_ext/hash'
|
3
4
|
module Transfermarkt
|
4
5
|
require 'httparty'
|
5
6
|
require 'nokogiri'
|
7
|
+
require 'useragents'
|
6
8
|
|
7
9
|
autoload :EntityBase, 'transfermarkt/entity_base'
|
8
10
|
autoload :Player, 'transfermarkt/player'
|
9
11
|
autoload :Club, 'transfermarkt/club'
|
10
12
|
autoload :League, 'transfermarkt/league'
|
13
|
+
autoload :LiveGame, 'transfermarkt/live_game'
|
11
14
|
|
12
15
|
USER_AGENT = "Firefox"
|
13
16
|
|
14
17
|
def Transfermarkt.base_uri
|
15
|
-
"http://
|
18
|
+
"http://transfermarkt.co.uk"
|
16
19
|
end
|
17
20
|
|
18
21
|
def self.test_fetch_league
|