football-docs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,87 @@
1
+ # @nutmeg/docs
2
+
3
+ Searchable football data provider documentation for AI coding agents. Like [Context7](https://context7.com) for football data.
4
+
5
+ An MCP server that gives your AI agent instant access to documentation for Opta, StatsBomb, Wyscout, SportMonks, kloppy, and free sources (FBref, Understat, ClubElo). Search event types, qualifier IDs, coordinate systems, API endpoints, and cross-provider mappings.
6
+
7
+ ## Quick start
8
+
9
+ ### With Claude Code
10
+
11
+ Add to your MCP config:
12
+
13
+ ```json
14
+ {
15
+ "mcpServers": {
16
+ "football-docs": {
17
+ "command": "npx",
18
+ "args": ["-y", "@nutmeg/docs"]
19
+ }
20
+ }
21
+ }
22
+ ```
23
+
24
+ ### With any MCP client
25
+
26
+ ```bash
27
+ npx @nutmeg/docs
28
+ ```
29
+
30
+ ## Tools
31
+
32
+ | Tool | Description |
33
+ |------|-------------|
34
+ | `search_docs` | Full-text search across all provider docs. Filter by provider. |
35
+ | `list_providers` | List all indexed providers and their doc coverage. |
36
+ | `compare_providers` | Compare how different providers handle the same concept. |
37
+
38
+ ## Example queries
39
+
40
+ - "What is Opta qualifier 76?" (big chance)
41
+ - "How does StatsBomb represent shot events?"
42
+ - "Compare Opta and Wyscout coordinate systems"
43
+ - "Does SportMonks have xG data?"
44
+ - "What event types does kloppy map to GenericEvent?"
45
+
46
+ ## Indexed providers
47
+
48
+ | Provider | Chunks | Categories |
49
+ |----------|--------|------------|
50
+ | Opta | 29 | event-types, qualifiers, coordinate-system, api-access |
51
+ | StatsBomb | 143 | event-types, data-model, coordinate-system, api-access, xg-model |
52
+ | Wyscout | 61 | event-types, data-model, coordinate-system, api-access |
53
+ | SportMonks | 71 | event-types, data-model, api-access |
54
+ | kloppy | 100 | data-model, usage, provider-mapping |
55
+ | Free sources | 28 | overview, fbref, understat |
56
+
57
+ **432 searchable chunks** across 6 providers.
58
+
59
+ ## Contributing docs
60
+
61
+ Add or improve provider documentation by creating markdown files in `docs/{provider}/`:
62
+
63
+ ```
64
+ docs/
65
+ opta/
66
+ event-types.md
67
+ qualifiers.md
68
+ coordinate-system.md
69
+ api-access.md
70
+ statsbomb/
71
+ ...
72
+ your-new-provider/
73
+ events.md
74
+ api.md
75
+ ```
76
+
77
+ Then rebuild the index:
78
+
79
+ ```bash
80
+ npm run ingest
81
+ ```
82
+
83
+ Each markdown file is chunked by heading (## or ###) and indexed with FTS5. PRs welcome.
84
+
85
+ ## License
86
+
87
+ MIT
package/bin/serve.js ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env node
2
+ import { resolve, dirname } from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+
5
+ const __dirname = dirname(fileURLToPath(import.meta.url));
6
+ await import(resolve(__dirname, "..", "dist", "index.js"));
package/data/docs.db ADDED
Binary file
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Nutmeg Football Docs MCP Server
3
+ *
4
+ * A Context7-style searchable index of football data provider documentation.
5
+ * Exposes two tools:
6
+ * - search_docs: Full-text search across all provider docs
7
+ * - list_providers: List all indexed providers and their coverage
8
+ *
9
+ * Data is stored in a SQLite FTS5 index for fast offline search.
10
+ */
11
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Nutmeg Football Docs MCP Server
3
+ *
4
+ * A Context7-style searchable index of football data provider documentation.
5
+ * Exposes two tools:
6
+ * - search_docs: Full-text search across all provider docs
7
+ * - list_providers: List all indexed providers and their coverage
8
+ *
9
+ * Data is stored in a SQLite FTS5 index for fast offline search.
10
+ */
11
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
12
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
13
+ import { z } from "zod";
14
+ import Database from "better-sqlite3";
15
+ import { resolve, dirname } from "node:path";
16
+ import { fileURLToPath } from "node:url";
17
+ import { existsSync } from "node:fs";
18
+ const __dirname = dirname(fileURLToPath(import.meta.url));
19
+ const DB_PATH = resolve(__dirname, "..", "data", "docs.db");
20
+ // ── Database ────────────────────────────────────────────────────────────
21
+ function openDb() {
22
+ if (!existsSync(DB_PATH)) {
23
+ throw new Error(`Docs database not found at ${DB_PATH}. Run 'npm run ingest' first to build the index.`);
24
+ }
25
+ return new Database(DB_PATH, { readonly: true });
26
+ }
27
+ // ── Server ──────────────────────────────────────────────────────────────
28
+ const server = new McpServer({
29
+ name: "nutmeg-football-docs",
30
+ version: "0.1.0",
31
+ });
32
+ // Tool: search_docs
33
+ server.tool("search_docs", "Search football data provider documentation. Use for finding event types, qualifier IDs, API endpoints, coordinate systems, data models, and cross-provider mappings. Returns the most relevant documentation chunks.", {
34
+ query: z.string().describe("Search query. Examples: 'Opta goal qualifier', 'StatsBomb shot event type', 'coordinate system differences', 'xG qualifier ID', 'SportMonks fixture endpoint'"),
35
+ provider: z
36
+ .string()
37
+ .optional()
38
+ .describe("Filter to a specific provider: opta, statsbomb, wyscout, sportmonks, fbref, understat, kloppy, or leave empty for all"),
39
+ max_results: z
40
+ .number()
41
+ .optional()
42
+ .default(10)
43
+ .describe("Maximum number of results to return (default 10)"),
44
+ }, async ({ query, provider, max_results }) => {
45
+ const db = openDb();
46
+ try {
47
+ let sql = `
48
+ SELECT provider, category, title, content,
49
+ rank * -1 as relevance
50
+ FROM docs_fts
51
+ WHERE docs_fts MATCH ?
52
+ `;
53
+ const params = [query];
54
+ if (provider) {
55
+ sql += ` AND provider = ?`;
56
+ params.push(provider.toLowerCase());
57
+ }
58
+ sql += ` ORDER BY rank LIMIT ?`;
59
+ params.push(max_results ?? 10);
60
+ const rows = db.prepare(sql).all(...params);
61
+ if (rows.length === 0) {
62
+ return {
63
+ content: [
64
+ {
65
+ type: "text",
66
+ text: `No results found for "${query}"${provider ? ` in ${provider}` : ""}. Try broader terms or remove the provider filter.`,
67
+ },
68
+ ],
69
+ };
70
+ }
71
+ const results = rows
72
+ .map((r, i) => `## [${i + 1}] ${r.title}\n**Provider:** ${r.provider} | **Category:** ${r.category}\n\n${r.content}`)
73
+ .join("\n\n---\n\n");
74
+ return {
75
+ content: [
76
+ {
77
+ type: "text",
78
+ text: `Found ${rows.length} result(s) for "${query}"${provider ? ` in ${provider}` : ""}:\n\n${results}`,
79
+ },
80
+ ],
81
+ };
82
+ }
83
+ finally {
84
+ db.close();
85
+ }
86
+ });
87
+ // Tool: list_providers
88
+ server.tool("list_providers", "List all indexed football data providers, their document count, and coverage categories. Use to understand what documentation is available.", {}, async () => {
89
+ const db = openDb();
90
+ try {
91
+ const rows = db
92
+ .prepare(`SELECT provider, category, COUNT(*) as chunks
93
+ FROM docs
94
+ GROUP BY provider, category
95
+ ORDER BY provider, category`)
96
+ .all();
97
+ const byProvider = new Map();
98
+ for (const r of rows) {
99
+ const entry = byProvider.get(r.provider) ?? { categories: [], total: 0 };
100
+ entry.categories.push(`${r.category} (${r.chunks})`);
101
+ entry.total += r.chunks;
102
+ byProvider.set(r.provider, entry);
103
+ }
104
+ const lines = [...byProvider.entries()]
105
+ .map(([p, info]) => `**${p}** (${info.total} chunks): ${info.categories.join(", ")}`)
106
+ .join("\n");
107
+ return {
108
+ content: [
109
+ {
110
+ type: "text",
111
+ text: `Indexed providers:\n\n${lines}`,
112
+ },
113
+ ],
114
+ };
115
+ }
116
+ finally {
117
+ db.close();
118
+ }
119
+ });
120
+ // Tool: compare_providers
121
+ server.tool("compare_providers", "Compare what two or more providers offer for a specific data type or concept. For example: 'How do Opta and StatsBomb represent shot events differently?'", {
122
+ topic: z.string().describe("The concept to compare across providers. Examples: 'shot events', 'coordinate systems', 'xG', 'pass types'"),
123
+ providers: z.array(z.string()).optional().describe("Providers to compare. If omitted, compares all indexed providers."),
124
+ }, async ({ topic, providers }) => {
125
+ const db = openDb();
126
+ try {
127
+ let sql = `
128
+ SELECT provider, category, title, content,
129
+ rank * -1 as relevance
130
+ FROM docs_fts
131
+ WHERE docs_fts MATCH ?
132
+ `;
133
+ const params = [topic];
134
+ if (providers && providers.length > 0) {
135
+ const placeholders = providers.map(() => "?").join(", ");
136
+ sql += ` AND provider IN (${placeholders})`;
137
+ params.push(...providers.map((p) => p.toLowerCase()));
138
+ }
139
+ sql += ` ORDER BY provider, rank LIMIT 30`;
140
+ const rows = db.prepare(sql).all(...params);
141
+ if (rows.length === 0) {
142
+ return {
143
+ content: [
144
+ {
145
+ type: "text",
146
+ text: `No documentation found for "${topic}". Try different terms.`,
147
+ },
148
+ ],
149
+ };
150
+ }
151
+ // Group by provider
152
+ const grouped = new Map();
153
+ for (const r of rows) {
154
+ const entries = grouped.get(r.provider) ?? [];
155
+ entries.push(`### ${r.title}\n${r.content}`);
156
+ grouped.set(r.provider, entries);
157
+ }
158
+ const sections = [...grouped.entries()]
159
+ .map(([p, chunks]) => `## ${p}\n\n${chunks.slice(0, 3).join("\n\n")}`)
160
+ .join("\n\n---\n\n");
161
+ return {
162
+ content: [
163
+ {
164
+ type: "text",
165
+ text: `Comparison for "${topic}" across ${grouped.size} provider(s):\n\n${sections}`,
166
+ },
167
+ ],
168
+ };
169
+ }
170
+ finally {
171
+ db.close();
172
+ }
173
+ });
174
+ // ── Start ───────────────────────────────────────────────────────────────
175
+ async function main() {
176
+ const transport = new StdioServerTransport();
177
+ await server.connect(transport);
178
+ }
179
+ main().catch((err) => {
180
+ console.error("Failed to start nutmeg docs server:", err);
181
+ process.exit(1);
182
+ });
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Ingest provider documentation into the SQLite FTS5 index.
3
+ *
4
+ * Reads markdown files from docs/providers/{provider}/*.md and chunks them
5
+ * by heading (## or ###), storing each chunk with metadata.
6
+ *
7
+ * Usage:
8
+ * npm run ingest # ingest all providers
9
+ * npm run ingest -- --provider opta # ingest one provider
10
+ *
11
+ * Doc file naming convention:
12
+ * docs/providers/{provider}/{category}.md
13
+ *
14
+ * Example:
15
+ * docs/providers/opta/event-types.md
16
+ * docs/providers/opta/qualifiers.md
17
+ * docs/providers/opta/coordinate-system.md
18
+ * docs/providers/statsbomb/events.md
19
+ * docs/providers/kloppy/data-model.md
20
+ */
21
+ export {};
package/dist/ingest.js ADDED
@@ -0,0 +1,133 @@
1
+ /**
2
+ * Ingest provider documentation into the SQLite FTS5 index.
3
+ *
4
+ * Reads markdown files from docs/providers/{provider}/*.md and chunks them
5
+ * by heading (## or ###), storing each chunk with metadata.
6
+ *
7
+ * Usage:
8
+ * npm run ingest # ingest all providers
9
+ * npm run ingest -- --provider opta # ingest one provider
10
+ *
11
+ * Doc file naming convention:
12
+ * docs/providers/{provider}/{category}.md
13
+ *
14
+ * Example:
15
+ * docs/providers/opta/event-types.md
16
+ * docs/providers/opta/qualifiers.md
17
+ * docs/providers/opta/coordinate-system.md
18
+ * docs/providers/statsbomb/events.md
19
+ * docs/providers/kloppy/data-model.md
20
+ */
21
+ import Database from "better-sqlite3";
22
+ import { resolve, dirname, basename } from "node:path";
23
+ import { fileURLToPath } from "node:url";
24
+ import { existsSync, mkdirSync, readFileSync, readdirSync } from "node:fs";
25
+ const __dirname = dirname(fileURLToPath(import.meta.url));
26
+ const DOCS_DIR = resolve(__dirname, "..", "docs");
27
+ const DB_DIR = resolve(__dirname, "..", "data");
28
+ const DB_PATH = resolve(DB_DIR, "docs.db");
29
+ /** Split a markdown file into chunks by ## or ### headings. */
30
+ function chunkMarkdown(text, provider, category) {
31
+ const chunks = [];
32
+ const lines = text.split("\n");
33
+ let currentTitle = `${provider} - ${category}`;
34
+ let currentLines = [];
35
+ for (const line of lines) {
36
+ const headingMatch = line.match(/^(#{1,3})\s+(.+)/);
37
+ if (headingMatch && currentLines.length > 0) {
38
+ const content = currentLines.join("\n").trim();
39
+ if (content.length > 20) {
40
+ chunks.push({ provider, category, title: currentTitle, content });
41
+ }
42
+ currentTitle = headingMatch[2].trim();
43
+ currentLines = [line];
44
+ }
45
+ else {
46
+ currentLines.push(line);
47
+ }
48
+ }
49
+ const content = currentLines.join("\n").trim();
50
+ if (content.length > 20) {
51
+ chunks.push({ provider, category, title: currentTitle, content });
52
+ }
53
+ return chunks;
54
+ }
55
+ /** Ingest all docs for a provider. */
56
+ function ingestProvider(db, provider) {
57
+ const providerDir = resolve(DOCS_DIR, provider);
58
+ if (!existsSync(providerDir)) {
59
+ console.log(` Skipping ${provider}: no docs directory`);
60
+ return 0;
61
+ }
62
+ const files = readdirSync(providerDir).filter((f) => f.endsWith(".md"));
63
+ let totalChunks = 0;
64
+ const insert = db.prepare("INSERT INTO docs (provider, category, title, content) VALUES (?, ?, ?, ?)");
65
+ for (const file of files) {
66
+ const category = basename(file, ".md");
67
+ const text = readFileSync(resolve(providerDir, file), "utf-8");
68
+ const chunks = chunkMarkdown(text, provider, category);
69
+ for (const chunk of chunks) {
70
+ insert.run(chunk.provider, chunk.category, chunk.title, chunk.content);
71
+ }
72
+ totalChunks += chunks.length;
73
+ console.log(` ${provider}/${file}: ${chunks.length} chunks`);
74
+ }
75
+ return totalChunks;
76
+ }
77
+ function main() {
78
+ const providerArg = process.argv.indexOf("--provider");
79
+ const singleProvider = providerArg >= 0 ? process.argv[providerArg + 1] : undefined;
80
+ mkdirSync(DB_DIR, { recursive: true });
81
+ const db = new Database(DB_PATH);
82
+ db.exec(`
83
+ DROP TABLE IF EXISTS docs_fts;
84
+ DROP TABLE IF EXISTS docs;
85
+
86
+ CREATE TABLE docs (
87
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
88
+ provider TEXT NOT NULL,
89
+ category TEXT NOT NULL,
90
+ title TEXT NOT NULL,
91
+ content TEXT NOT NULL
92
+ );
93
+
94
+ CREATE VIRTUAL TABLE docs_fts USING fts5(
95
+ provider,
96
+ category,
97
+ title,
98
+ content,
99
+ content='docs',
100
+ content_rowid='id',
101
+ tokenize='porter unicode61'
102
+ );
103
+
104
+ CREATE TRIGGER docs_ai AFTER INSERT ON docs BEGIN
105
+ INSERT INTO docs_fts(rowid, provider, category, title, content)
106
+ VALUES (new.id, new.provider, new.category, new.title, new.content);
107
+ END;
108
+ `);
109
+ console.log("Ingesting provider docs...\n");
110
+ if (singleProvider) {
111
+ const count = ingestProvider(db, singleProvider);
112
+ console.log(`\nDone: ${count} chunks from ${singleProvider}`);
113
+ }
114
+ else {
115
+ if (!existsSync(DOCS_DIR)) {
116
+ console.log(`No docs directory at ${DOCS_DIR}`);
117
+ console.log("Create docs/providers/{provider}/*.md files first.");
118
+ db.close();
119
+ return;
120
+ }
121
+ const providers = readdirSync(DOCS_DIR).filter((d) => {
122
+ const full = resolve(DOCS_DIR, d);
123
+ return existsSync(full) && readdirSync(full).some((f) => f.endsWith(".md"));
124
+ });
125
+ let total = 0;
126
+ for (const provider of providers) {
127
+ total += ingestProvider(db, provider);
128
+ }
129
+ console.log(`\nDone: ${total} chunks from ${providers.length} providers`);
130
+ }
131
+ db.close();
132
+ }
133
+ main();
@@ -0,0 +1,48 @@
1
+ # FBref
2
+
3
+ Football Reference (fbref.com). The most comprehensive free source for season-level aggregated statistics. Powered by StatsBomb data since 2017/18 for the top 5 European leagues.
4
+
5
+ ## What's available
6
+
7
+ ### Team stats (per season)
8
+
9
+ | Category | Examples |
10
+ |----------|---------|
11
+ | Standard | Goals, assists, xG, xAG, progressive passes/carries |
12
+ | Shooting | Shots, SoT, shot distance, free kicks, penalties |
13
+ | Passing | Total/short/medium/long, key passes, final third passes, progressive passes |
14
+ | Pass types | Live ball, dead ball, free kicks, through balls, switches, crosses |
15
+ | Goal and shot creation | SCA, GCA, types (live, dead, take-on, shot, foul, defensive) |
16
+ | Defensive | Tackles, interceptions, blocks, clearances, errors |
17
+ | Possession | Touches by zone, take-ons, carries, progressive carries, receiving |
18
+ | Goalkeeper | Save %, PSxG, crosses stopped, sweeper actions |
19
+
20
+ ### Access methods
21
+
22
+ **Python (soccerdata):**
23
+
24
+ ```python
25
+ import soccerdata as sd
26
+ fbref = sd.FBref('ENG-Premier League', '2024')
27
+ team_stats = fbref.read_team_season_stats(stat_type='standard')
28
+ player_stats = fbref.read_player_season_stats(stat_type='shooting')
29
+ ```
30
+
31
+ **R (worldfootballR):**
32
+
33
+ ```r
34
+ library(worldfootballR)
35
+ team_stats <- fb_season_team_stats("ENG", "M", 2024, "standard")
36
+ player_stats <- fb_big5_advanced_season_stats(season_end_year=2024, stat_type="standard")
37
+ ```
38
+
39
+ ## Coverage
40
+
41
+ Top 5 European leagues + Championship + Champions League. Stats from 1990s, xG from 2017/18.
42
+
43
+ ## Caveats
44
+
45
+ - xG data is from StatsBomb. FBref doesn't compute its own.
46
+ - No event-level data (pass-by-pass). Only aggregates.
47
+ - Rate limit: max ~10 requests per minute. Use `time.sleep(6)` between requests.
48
+ - Cache results locally. Aggressive scraping will get you blocked.
@@ -0,0 +1,180 @@
1
+ # Free Football Data Sources
2
+
3
+ ## Overview
4
+
5
+ | Source | Data Type | Access Method | Coverage | Rate Limits |
6
+ |---|---|---|---|---|
7
+ | StatsBomb Open Data | Event-level (full) | GitHub download / API | Select matches (World Cups, specific leagues/seasons) | None |
8
+ | FBref | Aggregated stats | Web scrape / soccerdata | Top leagues, 5+ seasons | Strict (3s between requests) |
9
+ | Understat | xG, shot-level | Web scrape / soccerdata | Top 5 European leagues | Moderate |
10
+ | ClubElo | Historical Elo ratings | HTTP API | All top European leagues, 1946-present | Generous |
11
+ | football-data.co.uk | Match results + odds | CSV download | 25+ leagues, 20+ seasons | None |
12
+ | Transfermarkt | Market values, transfers, injuries | Web scrape | All professional leagues | Strict |
13
+ | WhoScored | Match ratings, event-level (limited) | Web scrape (headed browser) | Top leagues | Strict, requires JS rendering |
14
+ | European Football Statistics | Historical results | CSV download | Many European leagues | None |
15
+
16
+ ## StatsBomb Open Data
17
+
18
+ **What it provides**: Full event-level data identical to their commercial product -- every pass, shot, duel, carry, pressure event with coordinates, xG, and freeze frames for shots.
19
+
20
+ **Coverage** (as of 2025):
21
+ - FIFA World Cups (2018, 2022)
22
+ - FIFA Women's World Cup (2019, 2023)
23
+ - UEFA Euro (2020, 2024)
24
+ - La Liga (2004/05-2020/21 -- Messi-era seasons)
25
+ - Premier League (select seasons)
26
+ - NWSL (multiple seasons)
27
+ - FA Women's Super League (multiple seasons)
28
+ - Champions League (select seasons)
29
+ - Various international tournaments
30
+
31
+ **Access**:
32
+ ```bash
33
+ git clone https://github.com/statsbomb/open-data.git
34
+ ```
35
+
36
+ Or via kloppy:
37
+ ```python
38
+ from kloppy import statsbomb
39
+ dataset = statsbomb.load_open_data(match_id=3788741)
40
+ ```
41
+
42
+ **Data format**: JSON files organised by competition and season. See `docs/providers/statsbomb/` for full event type documentation.
43
+
44
+ **License**: Free for non-commercial use with attribution. Must credit StatsBomb.
45
+
46
+ ## FBref
47
+
48
+ **What it provides**: Comprehensive aggregated statistics sourced from Opta (via StatsPerform). Player-level and team-level stats across many categories.
49
+
50
+ **Stat categories**: Passing, shooting, pass types, goal & shot creation (GCA/SCA), defensive actions, possession, playing time, miscellaneous, goalkeeping, advanced goalkeeping.
51
+
52
+ **Coverage**: Top 5 European leagues + MLS, plus many others. Detailed stats from 2017/18 onwards (when Opta data begins); basic stats go back further.
53
+
54
+ **Access**: Web scraping or `soccerdata` Python library. See `fbref.md` for details.
55
+
56
+ **Key URL patterns**:
57
+ - Team: `https://fbref.com/en/squads/{team_id}/{team_name}-Stats`
58
+ - Player: `https://fbref.com/en/players/{player_id}/{player_name}`
59
+ - Match: `https://fbref.com/en/matches/{match_id}/{match_name}`
60
+ - Season: `https://fbref.com/en/comps/{comp_id}/{season}/stats`
61
+
62
+ ## Understat
63
+
64
+ **What it provides**: Expected goals (xG) data at the shot level, plus player and team aggregated stats. Uses their own xG model.
65
+
66
+ **Coverage**: Top 5 European leagues (Premier League, La Liga, Bundesliga, Serie A, Ligue 1) from 2014/15 onwards. Russian Premier League also included.
67
+
68
+ **Access**: Web scraping or `soccerdata` Python library. See `understat.md` for details.
69
+
70
+ **Key data points**: Shot coordinates (x, y), xG per shot, result (goal/saved/blocked/missed), situation (open play/set piece/counter/penalty), body part, player, assist player.
71
+
72
+ ## ClubElo
73
+
74
+ **What it provides**: Historical Elo ratings for European football clubs, updated daily during the season. The Elo model adjusts for home advantage, goal difference, and competition level.
75
+
76
+ **Coverage**: Most European leagues from 1946 to present. Updated daily during the season.
77
+
78
+ **Access**: Simple HTTP API returning CSV.
79
+
80
+ **API endpoints**:
81
+ ```
82
+ # Single club history
83
+ http://api.clubelo.com/{club_name}
84
+ # Example: http://api.clubelo.com/Liverpool
85
+
86
+ # All clubs on a specific date
87
+ http://api.clubelo.com/{YYYY-MM-DD}
88
+ # Example: http://api.clubelo.com/2024-12-01
89
+
90
+ # All clubs in a country on a date
91
+ http://api.clubelo.com/{YYYY-MM-DD}/{country_code}
92
+ ```
93
+
94
+ **Response format** (CSV):
95
+ ```
96
+ Rank,Club,Country,Level,Elo,From,To
97
+ 1,Liverpool,ENG,1,2050,2024-11-30,2024-12-07
98
+ ```
99
+
100
+ **Used in**: myTeam's PL Era Champions story page (`scripts/fetch-elo-history.ts`).
101
+
102
+ ## football-data.co.uk
103
+
104
+ **What it provides**: Match results, odds data, and basic match statistics. Excellent historical coverage.
105
+
106
+ **Coverage**: 25+ leagues, 20+ seasons. Premier League data back to 1993/94.
107
+
108
+ **Access**: Direct CSV download.
109
+
110
+ **URL pattern**: `https://www.football-data.co.uk/mmz4281/{season}/{league}.csv`
111
+
112
+ Season format: `2425` for 2024/25. League codes: `E0` (Premier League), `E1` (Championship), `SP1` (La Liga), `I1` (Serie A), `D1` (Bundesliga), `F1` (Ligue 1).
113
+
114
+ **CSV columns include**:
115
+ - `Date`, `HomeTeam`, `AwayTeam`, `FTHG`, `FTAG`, `FTR` (Full Time Result: H/D/A)
116
+ - `HTHG`, `HTAG`, `HTR` (Half Time)
117
+ - `HS`, `AS` (Shots), `HST`, `AST` (Shots on Target)
118
+ - `HC`, `AC` (Corners), `HF`, `AF` (Fouls), `HY`, `AY` (Yellows), `HR`, `AR` (Reds)
119
+ - Betting odds from multiple bookmakers (B365H, B365D, B365A, etc.)
120
+
121
+ ## Transfermarkt
122
+
123
+ **What it provides**: Player market values, transfer history, contract details, injury history, squad information, manager history.
124
+
125
+ **Coverage**: Essentially all professional leagues worldwide. Market values from ~2004 onwards.
126
+
127
+ **Access**: Web scraping only (no official API). The site uses anti-scraping measures and requires setting a proper User-Agent header.
128
+
129
+ **Python libraries**:
130
+ - `transfermarkt-api` -- unofficial REST API wrapper
131
+ - Direct scraping with `requests` + `BeautifulSoup` (need to set `User-Agent` header)
132
+
133
+ **Key data points**:
134
+ - Player market values (current + historical)
135
+ - Transfer fees and dates
136
+ - Injury history with dates and types
137
+ - Contract expiry dates
138
+ - Squad lists with shirt numbers and positions
139
+
140
+ **Caveats**: Transfermarkt explicitly prohibits automated scraping in their ToS. Use responsibly with generous rate limiting and caching.
141
+
142
+ ## WhoScored
143
+
144
+ **What it provides**: Match ratings, player ratings, event-level data (passes, shots, tackles, etc.) derived from Opta. Also provides match statistics, heat maps, and chalkboard visualisations.
145
+
146
+ **Coverage**: Top European leagues, Champions League, Europa League, international tournaments.
147
+
148
+ **Access**: Web scraping with a **headed browser** (JavaScript rendering required). The match data is embedded in the page as JavaScript objects.
149
+
150
+ **Data extraction**: Match event data is embedded in `matchCentreData` JavaScript variable. Requires executing JS or extracting from page source. See `docs/WHOSCORED_EVENT_DATA.md` for the full event type and qualifier reference.
151
+
152
+ **Key data points**: Full event stream (Opta-derived), player ratings (0-10), team statistics, formation data, touch heat maps.
153
+
154
+ **Caveats**:
155
+ - Requires headed browser (Puppeteer/Playwright) -- no simple HTTP scraping
156
+ - Rate limiting is strict; adding delays between requests is essential
157
+ - Data is Opta-sourced, so event types and qualifier IDs match Opta's system
158
+
159
+ **Used in**: myTeam's passmap data pipeline (`scripts/fetch-whoscored-events.ts` stores events in Postgres, `scripts/generate-passmap-data.ts` processes them).
160
+
161
+ ## European Football Statistics
162
+
163
+ **What it provides**: Historical match results for European leagues.
164
+
165
+ **Access**: CSV download from `https://www.european-football-statistics.co.uk/`.
166
+
167
+ **Coverage**: Various European leagues with long historical records. Useful for pre-digital era results.
168
+
169
+ ## Comparison Matrix
170
+
171
+ | Feature | StatsBomb Open | FBref | Understat | ClubElo | football-data.co.uk | Transfermarkt | WhoScored |
172
+ |---|---|---|---|---|---|---|---|
173
+ | Event-level data | Full | No | Shot-level | No | No | No | Full |
174
+ | Aggregated stats | Via events | Yes | Yes | No | Basic | No | Yes |
175
+ | xG | Yes | Yes (Opta) | Yes (own model) | No | No | No | No |
176
+ | Coordinates | Yes | No | Shot coords | No | No | No | Yes |
177
+ | Historical depth | Limited | 2017+ detailed | 2014+ | 1946+ | 1993+ | 2004+ | ~2010+ |
178
+ | League coverage | Select | Top 5+ | Top 5 | Europe | 25+ | Global | Top 5+ |
179
+ | Commercial use | No | No | Unclear | Yes | Yes | No | No |
180
+ | API available | GitHub | No | No | Yes (CSV) | CSV download | No | No |