fmscraper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fmscraper-0.1.0/LICENSE +21 -0
- fmscraper-0.1.0/PKG-INFO +42 -0
- fmscraper-0.1.0/README.md +27 -0
- fmscraper-0.1.0/fmscraper/__init__.py +4 -0
- fmscraper-0.1.0/fmscraper/match_list_scraper.py +82 -0
- fmscraper-0.1.0/fmscraper/match_stats.py +22 -0
- fmscraper-0.1.0/fmscraper/xmas_generator.py +95 -0
- fmscraper-0.1.0/fmscraper.egg-info/PKG-INFO +42 -0
- fmscraper-0.1.0/fmscraper.egg-info/SOURCES.txt +12 -0
- fmscraper-0.1.0/fmscraper.egg-info/dependency_links.txt +1 -0
- fmscraper-0.1.0/fmscraper.egg-info/requires.txt +3 -0
- fmscraper-0.1.0/fmscraper.egg-info/top_level.txt +1 -0
- fmscraper-0.1.0/pyproject.toml +22 -0
- fmscraper-0.1.0/setup.cfg +4 -0
fmscraper-0.1.0/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Mieszko Pugowski
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
fmscraper-0.1.0/PKG-INFO
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: fmscraper
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Scraper for FotMob matches
|
5
|
+
Author: Mieszko Pugowski
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/MieszkoPugowski/FMScraper
|
8
|
+
Requires-Python: >=3.8
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: selenium
|
12
|
+
Requires-Dist: requests
|
13
|
+
Requires-Dist: beautifulsoup4
|
14
|
+
Dynamic: license-file
|
15
|
+
|
16
|
+
# FMScraper
|
17
|
+
|
18
|
+
FMScraper is a Python-based web scraping tool designed to collect football match data from [FotMob](https://www.fotmob.com/). It automates the extraction of match information.
|
19
|
+
|
20
|
+
Inspired by: [Webscraper-PremData](https://github.com/deanpatel2/Webscraper-PremData/tree/main) and [scraping-football-sites](https://github.com/axelbol/scraping-football-sites/tree/main)
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Scrapes match info from FotMob
|
25
|
+
- Handles JavaScript-driven layouts using Selenium
|
26
|
+
- Extracts data for specific leagues, seasons, and matchweeks
|
27
|
+
|
28
|
+
## Requirements
|
29
|
+
|
30
|
+
- Python 3.8+
|
31
|
+
- [Selenium](https://selenium.dev/)
|
32
|
+
- [chromedriver](https://chromedriver.chromium.org/) or another compatible WebDriver
|
33
|
+
|
34
|
+
## Disclaimer
|
35
|
+
For educational and research purposes only. Do not use it commercially.
|
36
|
+
|
37
|
+
## Installation
|
38
|
+
|
39
|
+
1. Clone the repository:
|
40
|
+
```bash
|
41
|
+
git clone https://github.com/MieszkoPugowski/FMScraper.git
|
42
|
+
cd FMScraper
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# FMScraper
|
2
|
+
|
3
|
+
FMScraper is a Python-based web scraping tool designed to collect football match data from [FotMob](https://www.fotmob.com/). It automates the extraction of match information.
|
4
|
+
|
5
|
+
Inspired by: [Webscraper-PremData](https://github.com/deanpatel2/Webscraper-PremData/tree/main) and [scraping-football-sites](https://github.com/axelbol/scraping-football-sites/tree/main)
|
6
|
+
|
7
|
+
## Features
|
8
|
+
|
9
|
+
- Scrapes match info from FotMob
|
10
|
+
- Handles JavaScript-driven layouts using Selenium
|
11
|
+
- Extracts data for specific leagues, seasons, and matchweeks
|
12
|
+
|
13
|
+
## Requirements
|
14
|
+
|
15
|
+
- Python 3.8+
|
16
|
+
- [Selenium](https://selenium.dev/)
|
17
|
+
- [chromedriver](https://chromedriver.chromium.org/) or another compatible WebDriver
|
18
|
+
|
19
|
+
## Disclaimer
|
20
|
+
For educational and research purposes only. Do not use it commercially.
|
21
|
+
|
22
|
+
## Installation
|
23
|
+
|
24
|
+
1. Clone the repository:
|
25
|
+
```bash
|
26
|
+
git clone https://github.com/MieszkoPugowski/FMScraper.git
|
27
|
+
cd FMScraper
|
@@ -0,0 +1,82 @@
|
|
1
|
+
"""
|
2
|
+
|
3
|
+
Author: Mieszko Pugowski
|
4
|
+
|
5
|
+
FotMob scraper
|
6
|
+
|
7
|
+
**FOR EDUCATIONAL PURPOSES ONLY**
|
8
|
+
"""
|
9
|
+
from selenium import webdriver
|
10
|
+
from selenium.webdriver.common.by import By
|
11
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
12
|
+
from selenium.webdriver.support import expected_conditions as ec
|
13
|
+
import time
|
14
|
+
|
15
|
+
|
16
|
+
LEAGUE_ID = 38
|
17
|
+
LEAGUE = "bundesliga"
|
18
|
+
SEASON = "2024-2025"
|
19
|
+
|
20
|
+
|
21
|
+
class MatchLinks:
|
22
|
+
def __init__(self, league_id:int,league:str, season:str):
|
23
|
+
self.base_url = "https://www.fotmob.com/leagues"
|
24
|
+
self.league_id = str(league_id)
|
25
|
+
self.league = league
|
26
|
+
self.season = season
|
27
|
+
self.final_url = ""
|
28
|
+
# Setting up selenium driver
|
29
|
+
options = webdriver.ChromeOptions()
|
30
|
+
options.add_argument("--no-sandbox")
|
31
|
+
options.add_argument('--headless')
|
32
|
+
self.driver = webdriver.Chrome(options=options)
|
33
|
+
self._url_to_scrape()
|
34
|
+
|
35
|
+
def _url_to_scrape(self):
|
36
|
+
try:
|
37
|
+
matches_url = "/".join([self.base_url, self.league_id,
|
38
|
+
'matches',self.league])
|
39
|
+
url_to_scrape = "?".join([matches_url, f"season={self.season}"])
|
40
|
+
self.final_url = url_to_scrape
|
41
|
+
except:
|
42
|
+
return f"Please pick correct league's id or season (in format 20xx-20xx)"
|
43
|
+
|
44
|
+
def _consent_fotmob(self):
|
45
|
+
wait = WebDriverWait(self.driver, 5)
|
46
|
+
consent_button = wait.until(
|
47
|
+
ec.element_to_be_clickable((By.CSS_SELECTOR, "button.fc-button.fc-cta-consent.fc-primary-button"))
|
48
|
+
)
|
49
|
+
consent_button.click()
|
50
|
+
|
51
|
+
|
52
|
+
def get_matches_ids(self,rounds):
|
53
|
+
games_list = []
|
54
|
+
games_ids = []
|
55
|
+
self.driver.get(self.final_url)
|
56
|
+
self._consent_fotmob()
|
57
|
+
for i in range(rounds):
|
58
|
+
round_i = self.final_url +f"&group=by-round&round={i}"
|
59
|
+
self.driver.get(round_i)
|
60
|
+
try:
|
61
|
+
time.sleep(2)
|
62
|
+
hrefs = [a.get_attribute("href") for a in
|
63
|
+
self.driver.find_elements(By.CSS_SELECTOR,
|
64
|
+
"a.css-1ajdexg-MatchWrapper.e1mxmq6p0")]
|
65
|
+
if not hrefs:
|
66
|
+
return "You have exceeded the number of rounds in the league"
|
67
|
+
else:
|
68
|
+
games_list.extend(hrefs)
|
69
|
+
except:
|
70
|
+
print(f"Error: stale element reference for match {i}")
|
71
|
+
self.driver.quit()
|
72
|
+
for game in games_list:
|
73
|
+
match_id = game.split('#')[-1]
|
74
|
+
games_ids.append(match_id.replace("\n", ""))
|
75
|
+
return games_ids
|
76
|
+
|
77
|
+
|
78
|
+
def write_to_file(self,file_name,ids):
|
79
|
+
with open(f"{file_name}.txt","w") as file:
|
80
|
+
for id in ids:
|
81
|
+
file.write(f'{id}\n')
|
82
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
import requests
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
import json
|
4
|
+
from fmscraper.xmas_generator import generate_xmas_header
|
5
|
+
|
6
|
+
|
7
|
+
class MatchStats:
|
8
|
+
def __init__(self,match_id):
|
9
|
+
self.url = "https://www.fotmob.com"
|
10
|
+
self.id = match_id
|
11
|
+
self.api_url = f'/api/data/matchDetails?matchId={self.id}'
|
12
|
+
|
13
|
+
def get_json_content(self):
|
14
|
+
headers = {
|
15
|
+
"x-mas": generate_xmas_header(self.api_url)
|
16
|
+
}
|
17
|
+
full_url = self.url+self.api_url
|
18
|
+
response = requests.get(full_url, headers=headers)
|
19
|
+
response.raise_for_status()
|
20
|
+
soup = BeautifulSoup(response.text, 'html.parser').text
|
21
|
+
data = json.loads(soup)
|
22
|
+
return data
|
@@ -0,0 +1,95 @@
|
|
1
|
+
import base64
|
2
|
+
import hashlib
|
3
|
+
import json
|
4
|
+
import time
|
5
|
+
|
6
|
+
# Key to session-generated key for api. That's not a joke
|
7
|
+
H_LYRICS = """[Spoken Intro: Alan Hansen & Trevor Brooking]
|
8
|
+
I think it's bad news for the English game
|
9
|
+
We're not creative enough, and we're not positive enough
|
10
|
+
|
11
|
+
[Refrain: Ian Broudie & Jimmy Hill]
|
12
|
+
It's coming home, it's coming home, it's coming
|
13
|
+
Football's coming home (We'll go on getting bad results)
|
14
|
+
It's coming home, it's coming home, it's coming
|
15
|
+
Football's coming home
|
16
|
+
It's coming home, it's coming home, it's coming
|
17
|
+
Football's coming home
|
18
|
+
It's coming home, it's coming home, it's coming
|
19
|
+
Football's coming home
|
20
|
+
|
21
|
+
[Verse 1: Frank Skinner]
|
22
|
+
Everyone seems to know the score, they've seen it all before
|
23
|
+
They just know, they're so sure
|
24
|
+
That England's gonna throw it away, gonna blow it away
|
25
|
+
But I know they can play, 'cause I remember
|
26
|
+
|
27
|
+
[Chorus: All]
|
28
|
+
Three lions on a shirt
|
29
|
+
Jules Rimet still gleaming
|
30
|
+
Thirty years of hurt
|
31
|
+
Never stopped me dreaming
|
32
|
+
|
33
|
+
[Verse 2: David Baddiel]
|
34
|
+
So many jokes, so many sneers
|
35
|
+
But all those "Oh, so near"s wear you down through the years
|
36
|
+
But I still see that tackle by Moore and when Lineker scored
|
37
|
+
Bobby belting the ball, and Nobby dancing
|
38
|
+
|
39
|
+
[Chorus: All]
|
40
|
+
Three lions on a shirt
|
41
|
+
Jules Rimet still gleaming
|
42
|
+
Thirty years of hurt
|
43
|
+
Never stopped me dreaming
|
44
|
+
|
45
|
+
[Bridge]
|
46
|
+
England have done it, in the last minute of extra time!
|
47
|
+
What a save, Gordon Banks!
|
48
|
+
Good old England, England that couldn't play football!
|
49
|
+
England have got it in the bag!
|
50
|
+
I know that was then, but it could be again
|
51
|
+
|
52
|
+
[Refrain: Ian Broudie]
|
53
|
+
It's coming home, it's coming
|
54
|
+
Football's coming home
|
55
|
+
It's coming home, it's coming home, it's coming
|
56
|
+
Football's coming home
|
57
|
+
(England have done it!)
|
58
|
+
It's coming home, it's coming home, it's coming
|
59
|
+
Football's coming home
|
60
|
+
It's coming home, it's coming home, it's coming
|
61
|
+
Football's coming home
|
62
|
+
[Chorus: All]
|
63
|
+
(It's coming home) Three lions on a shirt
|
64
|
+
(It's coming home, it's coming) Jules Rimet still gleaming
|
65
|
+
(Football's coming home
|
66
|
+
It's coming home) Thirty years of hurt
|
67
|
+
(It's coming home, it's coming) Never stopped me dreaming
|
68
|
+
(Football's coming home
|
69
|
+
It's coming home) Three lions on a shirt
|
70
|
+
(It's coming home, it's coming) Jules Rimet still gleaming
|
71
|
+
(Football's coming home
|
72
|
+
It's coming home) Thirty years of hurt
|
73
|
+
(It's coming home, it's coming) Never stopped me dreaming
|
74
|
+
(Football's coming home
|
75
|
+
It's coming home) Three lions on a shirt
|
76
|
+
(It's coming home, it's coming) Jules Rimet still gleaming
|
77
|
+
(Football's coming home
|
78
|
+
It's coming home) Thirty years of hurt
|
79
|
+
(It's coming home, it's coming) Never stopped me dreaming
|
80
|
+
(Football's coming home)"""
|
81
|
+
|
82
|
+
def generate_xmas_header(url, h=H_LYRICS):
|
83
|
+
body = {
|
84
|
+
"url": url,
|
85
|
+
"code": int(time.time() * 1000),
|
86
|
+
"foo": "production:e590188e5cefd1927f5971700c5e8175db729285-undefined"
|
87
|
+
}
|
88
|
+
json_body = json.dumps(body, separators=(',', ':'))
|
89
|
+
signature = hashlib.md5((json_body + h).encode('utf-8')).hexdigest().upper()
|
90
|
+
header_obj = {
|
91
|
+
"body": body,
|
92
|
+
"signature": signature
|
93
|
+
}
|
94
|
+
xmas = base64.b64encode(json.dumps(header_obj, separators=(',', ':')).encode('utf-8')).decode('utf-8')
|
95
|
+
return xmas
|
@@ -0,0 +1,42 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: fmscraper
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: Scraper for FotMob matches
|
5
|
+
Author: Mieszko Pugowski
|
6
|
+
License-Expression: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/MieszkoPugowski/FMScraper
|
8
|
+
Requires-Python: >=3.8
|
9
|
+
Description-Content-Type: text/markdown
|
10
|
+
License-File: LICENSE
|
11
|
+
Requires-Dist: selenium
|
12
|
+
Requires-Dist: requests
|
13
|
+
Requires-Dist: beautifulsoup4
|
14
|
+
Dynamic: license-file
|
15
|
+
|
16
|
+
# FMScraper
|
17
|
+
|
18
|
+
FMScraper is a Python-based web scraping tool designed to collect football match data from [FotMob](https://www.fotmob.com/). It automates the extraction of match information.
|
19
|
+
|
20
|
+
Inspired by: [Webscraper-PremData](https://github.com/deanpatel2/Webscraper-PremData/tree/main) and [scraping-football-sites](https://github.com/axelbol/scraping-football-sites/tree/main)
|
21
|
+
|
22
|
+
## Features
|
23
|
+
|
24
|
+
- Scrapes match info from FotMob
|
25
|
+
- Handles JavaScript-driven layouts using Selenium
|
26
|
+
- Extracts data for specific leagues, seasons, and matchweeks
|
27
|
+
|
28
|
+
## Requirements
|
29
|
+
|
30
|
+
- Python 3.8+
|
31
|
+
- [Selenium](https://selenium.dev/)
|
32
|
+
- [chromedriver](https://chromedriver.chromium.org/) or another compatible WebDriver
|
33
|
+
|
34
|
+
## Disclaimer
|
35
|
+
For educational and research purposes only. Do not use it commercially.
|
36
|
+
|
37
|
+
## Installation
|
38
|
+
|
39
|
+
1. Clone the repository:
|
40
|
+
```bash
|
41
|
+
git clone https://github.com/MieszkoPugowski/FMScraper.git
|
42
|
+
cd FMScraper
|
@@ -0,0 +1,12 @@
|
|
1
|
+
LICENSE
|
2
|
+
README.md
|
3
|
+
pyproject.toml
|
4
|
+
fmscraper/__init__.py
|
5
|
+
fmscraper/match_list_scraper.py
|
6
|
+
fmscraper/match_stats.py
|
7
|
+
fmscraper/xmas_generator.py
|
8
|
+
fmscraper.egg-info/PKG-INFO
|
9
|
+
fmscraper.egg-info/SOURCES.txt
|
10
|
+
fmscraper.egg-info/dependency_links.txt
|
11
|
+
fmscraper.egg-info/requires.txt
|
12
|
+
fmscraper.egg-info/top_level.txt
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
fmscraper
|
@@ -0,0 +1,22 @@
|
|
1
|
+
[project]
|
2
|
+
name = "fmscraper"
|
3
|
+
version = "0.1.0"
|
4
|
+
description = "Scraper for FotMob matches"
|
5
|
+
authors = [
|
6
|
+
{ name = "Mieszko Pugowski"}
|
7
|
+
]
|
8
|
+
license = "MIT"
|
9
|
+
readme = "README.md"
|
10
|
+
requires-python = ">=3.8"
|
11
|
+
dependencies = [
|
12
|
+
"selenium",
|
13
|
+
"requests",
|
14
|
+
"beautifulsoup4"
|
15
|
+
]
|
16
|
+
|
17
|
+
[build-system]
|
18
|
+
requires = ["setuptools", "wheel"]
|
19
|
+
build-backend = "setuptools.build_meta"
|
20
|
+
|
21
|
+
[project.urls]
|
22
|
+
"Homepage" = "https://github.com/MieszkoPugowski/FMScraper"
|