pfr-player-data 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -33
- package/dist/index.js +518 -0
- package/package.json +29 -10
- package/index.js +0 -3
package/README.md
CHANGED
|
@@ -1,64 +1,141 @@
|
|
|
1
1
|
# pfr-player-data
|
|
2
2
|
|
|
3
|
-
Fetch NFL player data from [Pro Football Reference](https://www.pro-football-reference.com/) as
|
|
3
|
+
Fetch NFL player data from [Pro Football Reference](https://www.pro-football-reference.com/) as JSON. ESM-only.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Install
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
npm install pfr-player-data
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Methods
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
- **`getPlayerData(input: string): Promise<Object>`** — Full parsed page (bio, raw careerStats tables, awardsAndHonors, gameLogs, url).
|
|
14
|
+
- **`getSummaryPlayerData(input: string): Promise<Object>`** — Normalized career summary (name, position, careerStats by season + summary totals).
|
|
15
|
+
- **`searchPlayers(name: string): Promise<Array<{ id, name, position, yearsActive, teams }>>`** — Search by player name; returns matches. Use `id` with `getPlayerData(id)` or `getSummaryPlayerData(id)`.
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
const data = await getPlayerData('https://www.pro-football-reference.com/players/B/BradTo00.htm');
|
|
17
|
+
**Input** (getPlayerData / getSummaryPlayerData): player id or full URL.
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
```
|
|
19
|
+
- Player id (PFR slug): `'BradTo00'`, `'PeteAd01'`, `'McNaDo00'`
|
|
20
|
+
- Full URL: `'https://www.pro-football-reference.com/players/B/BradTo00.htm'`
|
|
22
21
|
|
|
23
22
|
## API
|
|
24
23
|
|
|
25
|
-
### `getPlayerData(input
|
|
24
|
+
### `getPlayerData(input)` → `Promise<Object>`
|
|
26
25
|
|
|
27
|
-
|
|
28
|
-
- **Returns** — A Promise that resolves to a JSON object with all available player data from the page.
|
|
26
|
+
Fetches the player page and returns the full parsed payload: `bio`, `summary`, `careerStats` (raw table arrays keyed by table id, e.g. `rushing_and_receiving`, `passing`), `awardsAndHonors`, `gameLogs`, `url`.
|
|
29
27
|
|
|
30
|
-
|
|
28
|
+
### `getSummaryPlayerData(input)` → `Promise<Object>`
|
|
31
29
|
|
|
32
|
-
|
|
30
|
+
Uses `getPlayerData` and normalizes to a career summary: `name`, `position`, `careerStats` (structured by season type and category). Best for apps that need totals and a stable shape.
|
|
33
31
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
### `searchPlayers(name)` → `Promise<Array<{ id, name, position, yearsActive, teams }>>`
|
|
33
|
+
|
|
34
|
+
Searches PFR by name (e.g. `'adrian peterson'`). Each result has `id` (PFR slug, e.g. `PeteAd01`), `name`, `position`, `yearsActive` (e.g. `'2007-2021'`), and `teams` (array of team names). Use `id` to call `getPlayerData` or `getSummaryPlayerData`.
|
|
35
|
+
|
|
36
|
+
## Usage
|
|
39
37
|
|
|
40
|
-
|
|
38
|
+
```js
|
|
39
|
+
import { getPlayerData, getSummaryPlayerData, searchPlayers } from 'pfr-player-data';
|
|
41
40
|
|
|
42
|
-
|
|
41
|
+
// Search by name, then fetch with chosen id
|
|
42
|
+
const results = await searchPlayers('adrian peterson');
|
|
43
|
+
// [{ id: 'PeteAd00', name: 'Adrian Peterson', position: 'RB', yearsActive: '2002-2009', teams: ['Bears'] }, ...]
|
|
44
|
+
const summary = await getSummaryPlayerData(results[1].id); // e.g. PeteAd01
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
const
|
|
46
|
+
// Or call directly with id / URL
|
|
47
|
+
const full = await getPlayerData('BradTo00');
|
|
48
|
+
const s = await getSummaryPlayerData('McNaDo00');
|
|
49
|
+
```
|
|
46
50
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
51
|
+
## Summary response shape
|
|
52
|
+
|
|
53
|
+
`getSummaryPlayerData` returns:
|
|
54
|
+
|
|
55
|
+
```ts
|
|
56
|
+
{
|
|
57
|
+
name: string;
|
|
58
|
+
position: string;
|
|
59
|
+
careerStats: {
|
|
60
|
+
regularSeason: {
|
|
61
|
+
passing: { passCmp?, passAtt?, passYds?, passTd?, passInt?, passCmpPct?, passYdsPerAtt?, passYdsPerCmp?, passYdsPerG?, comebacks?, gwd?, games?, gamesStarted?, av?, awards?, ... };
|
|
62
|
+
rushingReceiving: { games?, gamesStarted?, rushAtt?, rushYds?, rushTd?, rec?, recYds?, recTd?, touches?, ydsFromScrimmage?, rushReceiveTd?, fumbles?, av?, awards?, ... };
|
|
63
|
+
teams: string[];
|
|
64
|
+
};
|
|
65
|
+
postSeason: {
|
|
66
|
+
passing: { ... }; // same keys as regularSeason.passing
|
|
67
|
+
rushingReceiving: { ... };
|
|
68
|
+
teams: string[];
|
|
69
|
+
};
|
|
70
|
+
allSeason: {
|
|
71
|
+
passing: { ... }; // cumulative (reg + post)
|
|
72
|
+
rushingReceiving: { ... };
|
|
73
|
+
teams: string[];
|
|
74
|
+
};
|
|
75
|
+
summary: {
|
|
76
|
+
tdsAll: number; // tdsRush + tdsRec + tdsPass
|
|
77
|
+
tdsRush: number;
|
|
78
|
+
tdsRec: number;
|
|
79
|
+
tdsPass: number;
|
|
80
|
+
ydsAll: number; // ydsRush + ydsRec + ydsPass
|
|
81
|
+
ydsRush: number;
|
|
82
|
+
ydsRec: number;
|
|
83
|
+
ydsPass: number;
|
|
84
|
+
};
|
|
85
|
+
};
|
|
52
86
|
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
- **Passing** is populated from the PFR passing table (QBs); **rushingReceiving** from the rushing & receiving table. Missing categories are `{}`.
|
|
90
|
+
- **summary** values are derived from **allSeason** (regular + postseason combined).
|
|
53
91
|
|
|
54
|
-
|
|
92
|
+
## Example output
|
|
93
|
+
|
|
94
|
+
```json
|
|
95
|
+
{
|
|
96
|
+
"name": "Adrian Peterson",
|
|
97
|
+
"position": "RB",
|
|
98
|
+
"careerStats": {
|
|
99
|
+
"regularSeason": {
|
|
100
|
+
"passing": {},
|
|
101
|
+
"rushingReceiving": {
|
|
102
|
+
"games": 184,
|
|
103
|
+
"gamesStarted": 167,
|
|
104
|
+
"rushAtt": 3230,
|
|
105
|
+
"rushYds": 14918,
|
|
106
|
+
"rushTd": 120,
|
|
107
|
+
"rec": 305,
|
|
108
|
+
"recYds": 2474,
|
|
109
|
+
"recTd": 6,
|
|
110
|
+
"touches": 3535,
|
|
111
|
+
"ydsFromScrimmage": 17392,
|
|
112
|
+
"rushReceiveTd": 126,
|
|
113
|
+
"fumbles": 49,
|
|
114
|
+
"av": 129
|
|
115
|
+
},
|
|
116
|
+
"teams": ["ARI", "DET", "MIN", "NOR", "SEA", "TEN", "WAS"]
|
|
117
|
+
},
|
|
118
|
+
"postSeason": { "passing": {}, "rushingReceiving": { ... }, "teams": [] },
|
|
119
|
+
"allSeason": { "passing": {}, "rushingReceiving": { ... }, "teams": [] },
|
|
120
|
+
"summary": {
|
|
121
|
+
"tdsAll": 131,
|
|
122
|
+
"tdsRush": 125,
|
|
123
|
+
"tdsRec": 6,
|
|
124
|
+
"tdsPass": 0,
|
|
125
|
+
"ydsAll": 17858,
|
|
126
|
+
"ydsRush": 15330,
|
|
127
|
+
"ydsRec": 2528,
|
|
128
|
+
"ydsPass": 0
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
55
132
|
```
|
|
56
133
|
|
|
57
134
|
## Notes
|
|
58
135
|
|
|
59
|
-
- **
|
|
60
|
-
-
|
|
61
|
-
-
|
|
136
|
+
- **ESM only.** Use `import`; no CommonJS support.
|
|
137
|
+
- Not affiliated with Sports Reference LLC. Data is scraped from public pages.
|
|
138
|
+
- Use reasonable request frequency and caching; respect [PFR terms of use](https://www.sports-reference.com/termsofuse.html).
|
|
62
139
|
|
|
63
140
|
## License
|
|
64
141
|
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,518 @@
|
|
|
1
|
+
// src/parsePlayerPage.js
|
|
2
|
+
import * as cheerio from "cheerio";
|
|
3
|
+
var PFR_BASE = "https://www.pro-football-reference.com";
|
|
4
|
+
function text($el) {
|
|
5
|
+
if (!$el || !$el.length) return "";
|
|
6
|
+
return $el.text().trim().replace(/\s+/g, " ");
|
|
7
|
+
}
|
|
8
|
+
function parseBio($) {
|
|
9
|
+
const bio = {};
|
|
10
|
+
const meta = $("#meta");
|
|
11
|
+
if (!meta.length) return bio;
|
|
12
|
+
const nameEl = $('h1[itemprop="name"]').first();
|
|
13
|
+
if (nameEl.length) bio.name = text(nameEl);
|
|
14
|
+
if (!bio.name) bio.name = text($("h1").first());
|
|
15
|
+
meta.find("p").each((_, p) => {
|
|
16
|
+
const $p = $(p);
|
|
17
|
+
const content = $p.html() || "";
|
|
18
|
+
const strongRegex = /<strong[^>]*>([^<]*)<\/strong>\s*:?\s*([^<]*)/gi;
|
|
19
|
+
let match;
|
|
20
|
+
while ((match = strongRegex.exec(content)) !== null) {
|
|
21
|
+
const key = match[1].replace(/\s*:\s*$/, "").trim().replace(/\s+/g, " ");
|
|
22
|
+
let val = match[2].trim();
|
|
23
|
+
const $links = $p.find("a");
|
|
24
|
+
$links.each((_2, a) => {
|
|
25
|
+
const $a = $(a);
|
|
26
|
+
const href = $a.attr("href") || "";
|
|
27
|
+
const linkText = text($a);
|
|
28
|
+
if (linkText && val.includes(linkText)) val = val.replace(linkText, linkText);
|
|
29
|
+
});
|
|
30
|
+
if (key && val) {
|
|
31
|
+
const camel = key.replace(/\s+(\w)/g, (_2, c) => c.toUpperCase()).replace(/\W/g, "");
|
|
32
|
+
if (camel) bio[camel] = val;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
const firstP = $("#meta p").first();
|
|
37
|
+
if (firstP.length) {
|
|
38
|
+
const html = firstP.html() || "";
|
|
39
|
+
const fullNameMatch = html.match(/<strong[^>]*>([^<]+)<\/strong>/);
|
|
40
|
+
if (fullNameMatch) bio.fullName = fullNameMatch[1].trim();
|
|
41
|
+
}
|
|
42
|
+
return bio;
|
|
43
|
+
}
|
|
44
|
+
function parseSummary($) {
|
|
45
|
+
const summary = {};
|
|
46
|
+
$(".stats_pullout div").each((_, div) => {
|
|
47
|
+
const $div = $(div);
|
|
48
|
+
const divs = $div.find("div");
|
|
49
|
+
if (divs.length >= 2) {
|
|
50
|
+
const stat = text(divs.first());
|
|
51
|
+
const value = text(divs.last());
|
|
52
|
+
if (stat && value) summary[stat] = value;
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
return Object.keys(summary).length ? summary : null;
|
|
56
|
+
}
|
|
57
|
+
function buildTableHeaders($, tableEl) {
|
|
58
|
+
const $table = $(tableEl);
|
|
59
|
+
const $theadRows = $table.find("thead tr");
|
|
60
|
+
const headers = [];
|
|
61
|
+
const $headerRow = $theadRows.length ? $($theadRows[$theadRows.length - 1]) : $();
|
|
62
|
+
const seen = {};
|
|
63
|
+
$headerRow.find("th").each((_, th) => {
|
|
64
|
+
const $th = $(th);
|
|
65
|
+
const dataStat = $th.attr("data-stat");
|
|
66
|
+
let colName = dataStat || text($th).toLowerCase().replace(/\s+/g, "_").replace(/[^a-z0-9_]/g, "") || `col_${headers.length}`;
|
|
67
|
+
if (colName && seen[colName] !== void 0) {
|
|
68
|
+
seen[colName] = (seen[colName] || 0) + 1;
|
|
69
|
+
colName = `${colName}_${seen[colName]}`;
|
|
70
|
+
} else if (colName) {
|
|
71
|
+
seen[colName] = 0;
|
|
72
|
+
}
|
|
73
|
+
headers.push(colName);
|
|
74
|
+
});
|
|
75
|
+
return headers;
|
|
76
|
+
}
|
|
77
|
+
function parseTbodyRows($, tableEl, tbodyEl, headers) {
|
|
78
|
+
const rows = [];
|
|
79
|
+
$(tbodyEl).find("tr").each((_, tr) => {
|
|
80
|
+
const $tr = $(tr);
|
|
81
|
+
if ($tr.hasClass("thead")) return;
|
|
82
|
+
const row = {};
|
|
83
|
+
const cells = $tr.find("td, th");
|
|
84
|
+
cells.each((i, cell) => {
|
|
85
|
+
const $cell = $(cell);
|
|
86
|
+
const key = $cell.attr("data-stat") || headers[i] || `col_${i}`;
|
|
87
|
+
const link = $cell.find("a").first();
|
|
88
|
+
let val = link.length ? text(link) : text($cell);
|
|
89
|
+
if (val === "" && $cell.attr("data-stat")) val = null;
|
|
90
|
+
if (key && val !== void 0) row[key] = val;
|
|
91
|
+
});
|
|
92
|
+
if (Object.keys(row).length) rows.push(row);
|
|
93
|
+
});
|
|
94
|
+
return rows;
|
|
95
|
+
}
|
|
96
|
+
function parseTfootRows($, tableEl, headers) {
|
|
97
|
+
const rows = [];
|
|
98
|
+
$(tableEl).find("tfoot tr").each((_, tr) => {
|
|
99
|
+
const $tr = $(tr);
|
|
100
|
+
if ($tr.hasClass("spacer")) return;
|
|
101
|
+
const row = {};
|
|
102
|
+
const cells = $tr.find("td, th");
|
|
103
|
+
let headerIdx = 0;
|
|
104
|
+
cells.each((_2, cell) => {
|
|
105
|
+
const $cell = $(cell);
|
|
106
|
+
const colspan = Math.min(parseInt($cell.attr("colspan"), 10) || 1, headers.length - headerIdx);
|
|
107
|
+
const dataStat = $cell.attr("data-stat");
|
|
108
|
+
const link = $cell.find("a").first();
|
|
109
|
+
let val = link.length ? text(link) : text($cell);
|
|
110
|
+
if (val === "" && dataStat) val = null;
|
|
111
|
+
const key = dataStat || headers[headerIdx] || `col_${headerIdx}`;
|
|
112
|
+
if (key && val !== void 0) row[key] = val;
|
|
113
|
+
headerIdx += colspan;
|
|
114
|
+
});
|
|
115
|
+
if (Object.keys(row).length) rows.push(row);
|
|
116
|
+
});
|
|
117
|
+
return rows;
|
|
118
|
+
}
|
|
119
|
+
function parseTable($, tableEl) {
|
|
120
|
+
const $table = $(tableEl);
|
|
121
|
+
const headers = buildTableHeaders($, tableEl);
|
|
122
|
+
if (!headers.length) return [];
|
|
123
|
+
const rows = [];
|
|
124
|
+
$table.find("tbody").each((_, tbodyEl) => {
|
|
125
|
+
rows.push(...parseTbodyRows($, tableEl, tbodyEl, headers));
|
|
126
|
+
});
|
|
127
|
+
rows.push(...parseTfootRows($, tableEl, headers));
|
|
128
|
+
return rows;
|
|
129
|
+
}
|
|
130
|
+
function parseAllTables($) {
|
|
131
|
+
const tables = {};
|
|
132
|
+
$("table.stats_table").each((_, tableEl) => {
|
|
133
|
+
const $table = $(tableEl);
|
|
134
|
+
let id = $table.attr("id") || $table.closest("[id]").attr("id") || $table.closest("div[class]").attr("class")?.split(" ")[0] || `table_${Object.keys(tables).length}`;
|
|
135
|
+
id = id.replace(/^all_/, "");
|
|
136
|
+
const rows = parseTable($, tableEl);
|
|
137
|
+
if (rows.length) {
|
|
138
|
+
if (tables[id]) id = `${id}_playoffs`;
|
|
139
|
+
tables[id] = rows;
|
|
140
|
+
}
|
|
141
|
+
});
|
|
142
|
+
return tables;
|
|
143
|
+
}
|
|
144
|
+
function parseAwardsAndHonors($) {
|
|
145
|
+
const awards = [];
|
|
146
|
+
$("#meta ul li").each((_, li) => {
|
|
147
|
+
const t = text($(li));
|
|
148
|
+
if (t && !t.startsWith("Become a") && t.length < 200) awards.push(t);
|
|
149
|
+
});
|
|
150
|
+
return awards;
|
|
151
|
+
}
|
|
152
|
+
function parsePlayerPage(html) {
|
|
153
|
+
const $ = cheerio.load(html);
|
|
154
|
+
const bio = parseBio($);
|
|
155
|
+
const summary = parseSummary($);
|
|
156
|
+
const careerStats = parseAllTables($);
|
|
157
|
+
const awardsAndHonors = parseAwardsAndHonors($);
|
|
158
|
+
const result = {
|
|
159
|
+
bio: Object.keys(bio).length ? bio : null,
|
|
160
|
+
summary: summary || null,
|
|
161
|
+
careerStats: Object.keys(careerStats).length ? careerStats : null,
|
|
162
|
+
awardsAndHonors: awardsAndHonors.length ? awardsAndHonors : null,
|
|
163
|
+
gameLogs: null,
|
|
164
|
+
// game logs live on separate gamelog pages
|
|
165
|
+
otherTables: null
|
|
166
|
+
};
|
|
167
|
+
const mainIds = ["passing", "rushing_receiving", "defense", "punting", "scoring", "snap_counts"];
|
|
168
|
+
const other = {};
|
|
169
|
+
for (const [id, rows] of Object.entries(careerStats || {})) {
|
|
170
|
+
if (!mainIds.includes(id)) other[id] = rows;
|
|
171
|
+
}
|
|
172
|
+
if (Object.keys(other).length) result.otherTables = other;
|
|
173
|
+
return result;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// src/parseSearchPage.js
|
|
177
|
+
import * as cheerio2 from "cheerio";
|
|
178
|
+
|
|
179
|
+
// src/summaryUtils.js
|
|
180
|
+
function snakeToCamel(s) {
|
|
181
|
+
if (typeof s !== "string") return s;
|
|
182
|
+
return s.replace(/_([a-z])/g, (_, c) => c.toUpperCase());
|
|
183
|
+
}
|
|
184
|
+
function toNumber(val) {
|
|
185
|
+
if (val === null || val === void 0 || val === "") return void 0;
|
|
186
|
+
if (typeof val === "number" && !Number.isNaN(val)) return val;
|
|
187
|
+
const s = String(val).trim();
|
|
188
|
+
if (s === "") return void 0;
|
|
189
|
+
const n = Number(s.replace(/,/g, ""));
|
|
190
|
+
return Number.isNaN(n) ? val : n;
|
|
191
|
+
}
|
|
192
|
+
var RUSH_REC_STAT_MAP = {
|
|
193
|
+
games: "games",
|
|
194
|
+
games_started: "gamesStarted",
|
|
195
|
+
g: "games",
|
|
196
|
+
gs: "gamesStarted",
|
|
197
|
+
att: "rushAtt",
|
|
198
|
+
rush_att: "rushAtt",
|
|
199
|
+
yds: "rushYds",
|
|
200
|
+
rush_yds: "rushYds",
|
|
201
|
+
yds_1: "recYds",
|
|
202
|
+
yds_2: "recYds",
|
|
203
|
+
rec_yds: "recYds",
|
|
204
|
+
td: "rushTd",
|
|
205
|
+
rush_td: "rushTd",
|
|
206
|
+
td_1: "recTd",
|
|
207
|
+
td_2: "recTd",
|
|
208
|
+
rec_td: "recTd",
|
|
209
|
+
"1d": "rushFirstDown",
|
|
210
|
+
rush_first_down: "rushFirstDown",
|
|
211
|
+
"1d_1": "recFirstDown",
|
|
212
|
+
"1d_2": "recFirstDown",
|
|
213
|
+
rec_first_down: "recFirstDown",
|
|
214
|
+
rush_yds_per_att: "rushYdsPerAtt",
|
|
215
|
+
y_a: "rushYdsPerAtt",
|
|
216
|
+
rush_yds_per_g: "rushYdsPerG",
|
|
217
|
+
y_g: "rushYdsPerG",
|
|
218
|
+
rush_att_per_g: "rushAttPerG",
|
|
219
|
+
a_g: "rushAttPerG",
|
|
220
|
+
targets: "targets",
|
|
221
|
+
tgt: "targets",
|
|
222
|
+
rec: "rec",
|
|
223
|
+
rec_yds_per_rec: "recYdsPerRec",
|
|
224
|
+
y_r: "recYdsPerRec",
|
|
225
|
+
rec_per_g: "recPerG",
|
|
226
|
+
rec_yds_per_g: "recYdsPerG",
|
|
227
|
+
touches: "touches",
|
|
228
|
+
touch: "touches",
|
|
229
|
+
yds_per_touch: "ydsPerTouch",
|
|
230
|
+
y_tch: "ydsPerTouch",
|
|
231
|
+
ytch: "ydsPerTouch",
|
|
232
|
+
yds_from_scrimmage: "ydsFromScrimmage",
|
|
233
|
+
yds_scrimmage: "ydsFromScrimmage",
|
|
234
|
+
yscm: "ydsFromScrimmage",
|
|
235
|
+
rush_receive_td: "rushReceiveTd",
|
|
236
|
+
ctch: "recCatchPct",
|
|
237
|
+
succ_pct: "succPct",
|
|
238
|
+
lng: "lng",
|
|
239
|
+
fumbles: "fumbles",
|
|
240
|
+
fmb: "fumbles",
|
|
241
|
+
awards: "awards",
|
|
242
|
+
team: "team",
|
|
243
|
+
tm: "team",
|
|
244
|
+
team_name_abbr: "team",
|
|
245
|
+
av: "av"
|
|
246
|
+
};
|
|
247
|
+
var PASS_STAT_MAP = {
|
|
248
|
+
games: "games",
|
|
249
|
+
games_started: "gamesStarted",
|
|
250
|
+
pass_cmp: "passCmp",
|
|
251
|
+
pass_att: "passAtt",
|
|
252
|
+
pass_cmp_pct: "passCmpPct",
|
|
253
|
+
pass_yds: "passYds",
|
|
254
|
+
pass_td: "passTd",
|
|
255
|
+
pass_int: "passInt",
|
|
256
|
+
pass_yds_per_att: "passYdsPerAtt",
|
|
257
|
+
pass_yds_per_cmp: "passYdsPerCmp",
|
|
258
|
+
pass_yds_per_g: "passYdsPerG",
|
|
259
|
+
comebacks: "comebacks",
|
|
260
|
+
gwd: "gwd",
|
|
261
|
+
pass_first_down: "passFirstDown",
|
|
262
|
+
pass_sacked: "passSacked",
|
|
263
|
+
pass_sacked_yds: "passSackedYds",
|
|
264
|
+
pass_rating: "passRating",
|
|
265
|
+
qb_rec: "qbRec",
|
|
266
|
+
av: "av",
|
|
267
|
+
awards: "awards",
|
|
268
|
+
team_name_abbr: "team"
|
|
269
|
+
};
|
|
270
|
+
function buildPassingStatsFromRow(row) {
|
|
271
|
+
const out = {};
|
|
272
|
+
for (const [pfrKey, camelKey] of Object.entries(PASS_STAT_MAP)) {
|
|
273
|
+
if (camelKey === "team") continue;
|
|
274
|
+
const raw = row[pfrKey] ?? row[snakeToCamel(pfrKey)];
|
|
275
|
+
if (raw === void 0) continue;
|
|
276
|
+
if (camelKey === "awards") {
|
|
277
|
+
out.awards = parseAwards(raw);
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
if (camelKey === "qbRec") {
|
|
281
|
+
out[camelKey] = raw;
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
const num = toNumber(raw);
|
|
285
|
+
out[camelKey] = num !== void 0 ? num : raw;
|
|
286
|
+
}
|
|
287
|
+
return out;
|
|
288
|
+
}
|
|
289
|
+
function isCareerRow(row) {
|
|
290
|
+
const v = row.year_id ?? row.season ?? "";
|
|
291
|
+
const s = String(v).trim();
|
|
292
|
+
if (/^\d+\s*yrs?$/i.test(s)) return true;
|
|
293
|
+
if (/^career$/i.test(s)) return true;
|
|
294
|
+
if (/^\d+\s*game\s+avg$/i.test(s)) return false;
|
|
295
|
+
return false;
|
|
296
|
+
}
|
|
297
|
+
function parseAwards(awards) {
|
|
298
|
+
if (awards == null || awards === "") return [];
|
|
299
|
+
const s = String(awards).trim();
|
|
300
|
+
if (!s) return [];
|
|
301
|
+
return s.split(/[,;]/).map((a) => a.trim()).filter(Boolean);
|
|
302
|
+
}
|
|
303
|
+
function buildCareerStatsFromRow(row) {
|
|
304
|
+
const out = {};
|
|
305
|
+
for (const [pfrKey, camelKey] of Object.entries(RUSH_REC_STAT_MAP)) {
|
|
306
|
+
if (camelKey === "team") continue;
|
|
307
|
+
const raw = row[pfrKey] ?? row[snakeToCamel(pfrKey)];
|
|
308
|
+
if (raw === void 0) continue;
|
|
309
|
+
if (camelKey === "awards") {
|
|
310
|
+
out.awards = parseAwards(raw);
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
const num = toNumber(raw);
|
|
314
|
+
out[camelKey] = num !== void 0 ? num : raw;
|
|
315
|
+
}
|
|
316
|
+
return out;
|
|
317
|
+
}
|
|
318
|
+
function collectTeams(rows) {
|
|
319
|
+
const teams = /* @__PURE__ */ new Set();
|
|
320
|
+
for (const row of rows) {
|
|
321
|
+
const t = row.team ?? row.tm ?? row.team_name_abbr ?? "";
|
|
322
|
+
const s = String(t).trim();
|
|
323
|
+
if (/^[A-Z]{2,3}$/.test(s)) teams.add(s);
|
|
324
|
+
const match = s.match(/^([A-Z]{2,3})\s*\(/);
|
|
325
|
+
if (match) teams.add(match[1]);
|
|
326
|
+
}
|
|
327
|
+
return [...teams].sort();
|
|
328
|
+
}
|
|
329
|
+
function isAvgRow(row) {
|
|
330
|
+
const keys = ["year_id", "season", "team", "tm", "pos"];
|
|
331
|
+
for (const k of keys) {
|
|
332
|
+
const v = row[k];
|
|
333
|
+
if (v == null) continue;
|
|
334
|
+
if (/\d+\s*game\s+avg/i.test(String(v))) return true;
|
|
335
|
+
}
|
|
336
|
+
return false;
|
|
337
|
+
}
|
|
338
|
+
function findCareerRow(rows) {
|
|
339
|
+
for (let i = rows.length - 1; i >= 0; i--) {
|
|
340
|
+
if (isCareerRow(rows[i])) return rows[i];
|
|
341
|
+
}
|
|
342
|
+
const last = rows.length > 0 ? rows[rows.length - 1] : null;
|
|
343
|
+
return last && !isAvgRow(last) ? last : null;
|
|
344
|
+
}
|
|
345
|
+
function buildCareerSummary(allPassing, allRushRec) {
|
|
346
|
+
const tdsRush = allRushRec.rushTd ?? 0;
|
|
347
|
+
const tdsRec = allRushRec.recTd ?? 0;
|
|
348
|
+
const tdsPass = allPassing.passTd ?? 0;
|
|
349
|
+
const ydsRush = allRushRec.rushYds ?? 0;
|
|
350
|
+
const ydsRec = allRushRec.recYds ?? 0;
|
|
351
|
+
const ydsPass = allPassing.passYds ?? 0;
|
|
352
|
+
return {
|
|
353
|
+
tdsAll: tdsRush + tdsRec + tdsPass,
|
|
354
|
+
tdsRush,
|
|
355
|
+
tdsRec,
|
|
356
|
+
tdsPass,
|
|
357
|
+
ydsAll: ydsRush + ydsRec + ydsPass,
|
|
358
|
+
ydsRush,
|
|
359
|
+
ydsRec,
|
|
360
|
+
ydsPass
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
function buildSummaryFromFullData(fullData) {
|
|
364
|
+
const name = fullData.bio?.name ?? "";
|
|
365
|
+
const position = (fullData.bio?.Position ?? fullData.bio?.position ?? "").trim();
|
|
366
|
+
let teams = [];
|
|
367
|
+
let regularPassing = {};
|
|
368
|
+
let regularRushRec = {};
|
|
369
|
+
let postPassing = {};
|
|
370
|
+
let postRushRec = {};
|
|
371
|
+
const tables = fullData.careerStats ?? {};
|
|
372
|
+
const rushRec = tables.rushing_and_receiving ?? tables.rushing_receiving ?? Object.entries(tables).find(([k]) => k === "rushing_and_receiving" || k.includes("rushing") && !k.includes("_post") && !k.includes("playoff"))?.[1];
|
|
373
|
+
const rushRecPlayoffs = tables.rushing_and_receiving_post ?? tables.rushing_receiving_playoffs ?? tables.rushing_and_receiving_playoffs ?? Object.entries(tables).find(([k]) => k.includes("rushing") && (k.includes("_post") || k.includes("playoff")))?.[1];
|
|
374
|
+
if (Array.isArray(rushRec)) {
|
|
375
|
+
teams = collectTeams(rushRec);
|
|
376
|
+
const careerRow = findCareerRow(rushRec);
|
|
377
|
+
if (careerRow) regularRushRec = buildCareerStatsFromRow(careerRow);
|
|
378
|
+
}
|
|
379
|
+
if (Array.isArray(rushRecPlayoffs)) {
|
|
380
|
+
const careerRow = findCareerRow(rushRecPlayoffs);
|
|
381
|
+
if (careerRow) postRushRec = buildCareerStatsFromRow(careerRow);
|
|
382
|
+
teams = [.../* @__PURE__ */ new Set([...teams, ...collectTeams(rushRecPlayoffs || [])])].sort();
|
|
383
|
+
}
|
|
384
|
+
const passing = tables.passing ?? Object.entries(tables).find(([k]) => k === "passing" && !k.includes("_post"))?.[1];
|
|
385
|
+
const passingPost = tables.passing_post ?? Object.entries(tables).find(([k]) => k === "passing_post")?.[1];
|
|
386
|
+
if (Array.isArray(passing)) {
|
|
387
|
+
if (teams.length === 0) teams = collectTeams(passing);
|
|
388
|
+
const careerRow = findCareerRow(passing);
|
|
389
|
+
if (careerRow) regularPassing = buildPassingStatsFromRow(careerRow);
|
|
390
|
+
}
|
|
391
|
+
if (Array.isArray(passingPost)) {
|
|
392
|
+
const careerRow = findCareerRow(passingPost);
|
|
393
|
+
if (careerRow) postPassing = buildPassingStatsFromRow(careerRow);
|
|
394
|
+
teams = [.../* @__PURE__ */ new Set([...teams, ...collectTeams(passingPost || [])])].sort();
|
|
395
|
+
}
|
|
396
|
+
const cumulativeRushRec = /* @__PURE__ */ new Set([
|
|
397
|
+
"games",
|
|
398
|
+
"gamesStarted",
|
|
399
|
+
"rushAtt",
|
|
400
|
+
"rushYds",
|
|
401
|
+
"rushTd",
|
|
402
|
+
"rushFirstDown",
|
|
403
|
+
"targets",
|
|
404
|
+
"rec",
|
|
405
|
+
"recYds",
|
|
406
|
+
"recTd",
|
|
407
|
+
"recFirstDown",
|
|
408
|
+
"touches",
|
|
409
|
+
"ydsFromScrimmage",
|
|
410
|
+
"rushReceiveTd",
|
|
411
|
+
"fumbles",
|
|
412
|
+
"av"
|
|
413
|
+
]);
|
|
414
|
+
const cumulativePass = /* @__PURE__ */ new Set([
|
|
415
|
+
"games",
|
|
416
|
+
"gamesStarted",
|
|
417
|
+
"passCmp",
|
|
418
|
+
"passAtt",
|
|
419
|
+
"passYds",
|
|
420
|
+
"passTd",
|
|
421
|
+
"passInt",
|
|
422
|
+
"passFirstDown",
|
|
423
|
+
"comebacks",
|
|
424
|
+
"gwd",
|
|
425
|
+
"passSacked",
|
|
426
|
+
"passSackedYds",
|
|
427
|
+
"av"
|
|
428
|
+
]);
|
|
429
|
+
const allRushRec = {};
|
|
430
|
+
const allPassing = {};
|
|
431
|
+
for (const k of cumulativeRushRec) {
|
|
432
|
+
const r = regularRushRec[k], p = postRushRec[k];
|
|
433
|
+
if (typeof r === "number" || typeof p === "number") {
|
|
434
|
+
allRushRec[k] = (typeof r === "number" ? r : 0) + (typeof p === "number" ? p : 0);
|
|
435
|
+
} else if (r !== void 0) allRushRec[k] = r;
|
|
436
|
+
else if (p !== void 0) allRushRec[k] = p;
|
|
437
|
+
}
|
|
438
|
+
for (const k of cumulativePass) {
|
|
439
|
+
const r = regularPassing[k], p = postPassing[k];
|
|
440
|
+
if (typeof r === "number" || typeof p === "number") {
|
|
441
|
+
allPassing[k] = (typeof r === "number" ? r : 0) + (typeof p === "number" ? p : 0);
|
|
442
|
+
} else if (r !== void 0) allPassing[k] = r;
|
|
443
|
+
else if (p !== void 0) allPassing[k] = p;
|
|
444
|
+
}
|
|
445
|
+
const awardsRushRec = [.../* @__PURE__ */ new Set([...regularRushRec.awards || [], ...postRushRec.awards || []])];
|
|
446
|
+
const awardsPass = [.../* @__PURE__ */ new Set([...regularPassing.awards || [], ...postPassing.awards || []])];
|
|
447
|
+
if (awardsRushRec.length) allRushRec.awards = awardsRushRec;
|
|
448
|
+
if (awardsPass.length) allPassing.awards = awardsPass;
|
|
449
|
+
const postTeamsUnique = [.../* @__PURE__ */ new Set([
|
|
450
|
+
...collectTeams(rushRecPlayoffs || []),
|
|
451
|
+
...collectTeams(passingPost || [])
|
|
452
|
+
])].sort();
|
|
453
|
+
const regularSeason = {
|
|
454
|
+
passing: Object.keys(regularPassing).length ? regularPassing : {},
|
|
455
|
+
rushingReceiving: Object.keys(regularRushRec).length ? regularRushRec : {},
|
|
456
|
+
teams: [...teams]
|
|
457
|
+
};
|
|
458
|
+
const postSeason = {
|
|
459
|
+
passing: Object.keys(postPassing).length ? postPassing : {},
|
|
460
|
+
rushingReceiving: Object.keys(postRushRec).length ? postRushRec : {},
|
|
461
|
+
teams: postTeamsUnique
|
|
462
|
+
};
|
|
463
|
+
const allSeason = {
|
|
464
|
+
passing: Object.keys(allPassing).length ? allPassing : {},
|
|
465
|
+
rushingReceiving: Object.keys(allRushRec).length ? allRushRec : {},
|
|
466
|
+
teams: [...teams]
|
|
467
|
+
};
|
|
468
|
+
const summary = buildCareerSummary(allPassing, allRushRec);
|
|
469
|
+
return {
|
|
470
|
+
name,
|
|
471
|
+
position,
|
|
472
|
+
careerStats: {
|
|
473
|
+
regularSeason,
|
|
474
|
+
postSeason,
|
|
475
|
+
allSeason,
|
|
476
|
+
summary
|
|
477
|
+
}
|
|
478
|
+
};
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
// src/getPlayerData.js
|
|
482
|
+
var PFR_SEARCH = `${PFR_BASE}/search/search.fcgi`;
|
|
483
|
+
var PLAYER_URL_REGEX = /^https?:\/\/www\.pro-football-reference\.com\/players\/[A-Z]\/[\w.-]+\.htm$/i;
|
|
484
|
+
function resolvePlayerUrl(input) {
|
|
485
|
+
const trimmed = (input || "").trim();
|
|
486
|
+
if (PLAYER_URL_REGEX.test(trimmed)) return trimmed;
|
|
487
|
+
const match = trimmed.match(/\/players\/([A-Z])\/([\w.-]+)\.htm/i);
|
|
488
|
+
if (match) {
|
|
489
|
+
const [, letter2, id2] = match;
|
|
490
|
+
return `${PFR_BASE}/players/${letter2}/${id2}.htm`;
|
|
491
|
+
}
|
|
492
|
+
const id = trimmed.replace(/\.htm$/i, "");
|
|
493
|
+
if (!id) throw new Error("Invalid input: provide a PFR player URL or player id (e.g. BradTo00).");
|
|
494
|
+
const letter = id.charAt(0).toUpperCase();
|
|
495
|
+
return `${PFR_BASE}/players/${letter}/${id}.htm`;
|
|
496
|
+
}
|
|
497
|
+
async function getPlayerData(input) {
|
|
498
|
+
const url = resolvePlayerUrl(input);
|
|
499
|
+
const res = await fetch(url, {
|
|
500
|
+
headers: {
|
|
501
|
+
"User-Agent": "Mozilla/5.0 (compatible; pfr-player-data/1.0)",
|
|
502
|
+
"Accept": "text/html,application/xhtml+xml"
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
if (!res.ok) throw new Error(`Failed to fetch player page: ${res.status} ${res.statusText}`);
|
|
506
|
+
const html = await res.text();
|
|
507
|
+
const data = parsePlayerPage(html);
|
|
508
|
+
data.url = url;
|
|
509
|
+
return data;
|
|
510
|
+
}
|
|
511
|
+
async function getSummaryPlayerData(input) {
|
|
512
|
+
const fullData = await getPlayerData(input);
|
|
513
|
+
return buildSummaryFromFullData(fullData);
|
|
514
|
+
}
|
|
515
|
+
export {
|
|
516
|
+
getPlayerData,
|
|
517
|
+
getSummaryPlayerData
|
|
518
|
+
};
|
package/package.json
CHANGED
|
@@ -1,21 +1,40 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pfr-player-data",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Fetch NFL player data from Pro Football Reference as JSON. Scrapes a player's PFR page and returns all available stats and info.",
|
|
5
5
|
"type": "module",
|
|
6
|
-
"main": "index.js",
|
|
7
|
-
"module": "index.js",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"module": "dist/index.js",
|
|
8
8
|
"exports": {
|
|
9
|
-
".": "./index.js"
|
|
9
|
+
".": "./dist/index.js"
|
|
10
10
|
},
|
|
11
11
|
"files": [
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
],
|
|
12
|
+
"dist",
|
|
13
|
+
"README.md"
|
|
14
|
+
],
|
|
15
15
|
"scripts": {
|
|
16
|
-
"test": "echo \"Error: no test specified\" && exit 1"
|
|
16
|
+
"test": "echo \"Error: no test specified\" && exit 1",
|
|
17
|
+
"build": "npx esbuild src/index.js --bundle --format=esm --outfile=dist/index.js --external:cheerio",
|
|
18
|
+
"prepublishOnly": "npm run build",
|
|
19
|
+
"release:patch": "npm version patch && npm publish",
|
|
20
|
+
"release:minor": "npm version minor && npm publish",
|
|
21
|
+
"release:major": "npm version major && npm publish"
|
|
17
22
|
},
|
|
18
|
-
"keywords": [
|
|
23
|
+
"keywords": [
|
|
24
|
+
"nfl",
|
|
25
|
+
"pro-football-reference",
|
|
26
|
+
"pfr",
|
|
27
|
+
"scraper",
|
|
28
|
+
"player-stats",
|
|
29
|
+
"football",
|
|
30
|
+
"sports-reference"
|
|
31
|
+
],
|
|
19
32
|
"author": "",
|
|
20
|
-
"license": "ISC"
|
|
33
|
+
"license": "ISC",
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"cheerio": "^1.0.0"
|
|
36
|
+
},
|
|
37
|
+
"devDependencies": {
|
|
38
|
+
"esbuild": "^0.24.0"
|
|
39
|
+
}
|
|
21
40
|
}
|
package/index.js
DELETED