njsparser 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +390 -40
- package/api.js +76 -50
- package/bun.lock +2 -48
- package/mod.js +148 -0
- package/package.json +11 -16
- package/parser/flight_data.js +189 -306
- package/parser/manifests.js +37 -37
- package/parser/next_data.js +29 -26
- package/parser/types.js +408 -296
- package/parser/urls.js +86 -56
- package/tests/api.test.js +96 -0
- package/tests/integration.test.js +68 -0
- package/tests/parser/flight_data.test.js +105 -0
- package/tests/parser/manifests.test.js +50 -0
- package/tests/parser/next_data.test.js +53 -0
- package/tests/parser/types.test.js +243 -0
- package/tests/parser/urls.test.js +84 -0
- package/tests/property.test.js +299 -0
- package/tests/setup.js +21 -0
- package/tests/utils.test.js +32 -0
- package/tools.js +263 -185
- package/utils.js +29 -24
- package/_.js +0 -10
- package/_.json +0 -12837
- package/api.test.js +0 -41
- package/index.js +0 -8
- package/package-lock.json +0 -291
- package/parser/flight_data.test.js +0 -59
- package/parser/manifests.test.js +0 -36
- package/parser/next_data.test.js +0 -15
- package/parser/types.test.js +0 -261
- package/parser/urls.test.js +0 -26
- package/test/src/index.js +0 -16
- package/tools.test.js +0 -153
- package/utils.test.js +0 -38
package/parser/flight_data.js
CHANGED
|
@@ -1,333 +1,216 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Flight data extraction and parsing
|
|
3
|
+
*/
|
|
3
4
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
import { makeTree } from '../utils.js';
|
|
6
|
+
import { resolveType } from './types.js';
|
|
7
|
+
|
|
8
|
+
// Regex patterns for matching flight data scripts
|
|
9
|
+
const RE_F_INIT = /\(self\.__next_f\s?=\s?self\.__next_f\s?\|\|\s?\[\]\)\.push\((\[.+?\])\)/;
|
|
10
|
+
const RE_F_PAYLOAD = /self\.__next_f\.push\((\[.+)\)$/;
|
|
11
|
+
|
|
12
|
+
// Segment types
|
|
13
|
+
const Segment = {
|
|
14
|
+
is_bootstrap: 0,
|
|
15
|
+
is_not_bootstrap: 1,
|
|
16
|
+
is_form_state: 2,
|
|
17
|
+
is_binary: 3
|
|
18
|
+
};
|
|
7
19
|
|
|
8
20
|
/**
|
|
9
|
-
*
|
|
10
|
-
* @param {
|
|
11
|
-
* @
|
|
21
|
+
* Check if HTML contains flight data
|
|
22
|
+
* @param {string} html - HTML string
|
|
23
|
+
* @param {DOMParser} DOMParser - DOMParser instance
|
|
24
|
+
* @returns {boolean} True if flight data exists
|
|
12
25
|
*/
|
|
13
|
-
export
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const text = $(el).text(); // Use text() instead of html() for scripts content?
|
|
19
|
-
// cheerio .text() gets text content. .html() gets innerHTML.
|
|
20
|
-
// Usually safer to use .html() or .text() depending on encoding.
|
|
21
|
-
// Python lxml xpath text() gets text.
|
|
22
|
-
if (_re_f_init.test(text)) {
|
|
23
|
-
found = true;
|
|
24
|
-
return false; // break
|
|
25
|
-
}
|
|
26
|
-
});
|
|
27
|
-
return found;
|
|
28
|
-
};
|
|
26
|
+
export function hasFlightData(html, DOMParser) {
|
|
27
|
+
const doc = makeTree(html, DOMParser);
|
|
28
|
+
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
|
|
29
|
+
return scripts.some(script => RE_F_INIT.test(script));
|
|
30
|
+
}
|
|
29
31
|
|
|
30
32
|
/**
|
|
31
|
-
*
|
|
32
|
-
* @param {
|
|
33
|
-
* @
|
|
33
|
+
* Extract raw flight data from HTML
|
|
34
|
+
* @param {string} html - HTML string
|
|
35
|
+
* @param {DOMParser} DOMParser - DOMParser instance
|
|
36
|
+
* @returns {Array|null} Raw flight data array or null
|
|
34
37
|
*/
|
|
35
|
-
export
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
export function getRawFlightData(html, DOMParser) {
|
|
39
|
+
const doc = makeTree(html, DOMParser);
|
|
40
|
+
const scripts = Array.from(doc.querySelectorAll('script')).map(s => s.textContent || '');
|
|
41
|
+
|
|
42
|
+
const result = [];
|
|
43
|
+
let foundInit = false;
|
|
44
|
+
|
|
45
|
+
for (const script of scripts) {
|
|
46
|
+
const trimmed = script.trim();
|
|
39
47
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
result.push(JSON.parse(init_match[1])); // Match group 1 is `[...]`
|
|
49
|
-
} catch (e) {
|
|
50
|
-
console.warn("Failed to parse init flight data", e);
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// Note: The python regex for payload might match the same script as init if it has both?
|
|
55
|
-
// Actually the init regex is for the push call structure used initially.
|
|
56
|
-
// The payload regex matches `self.__next_f.push([...])` at the end of string.
|
|
57
|
-
let payload_match;
|
|
58
|
-
if ((payload_match = script.match(_re_f_payload))) {
|
|
59
|
-
try {
|
|
60
|
-
result.push(JSON.parse(payload_match[1]));
|
|
61
|
-
} catch (e) {
|
|
62
|
-
console.warn("Failed to parse payload flight data", e);
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
});
|
|
48
|
+
// Check for initialization script
|
|
49
|
+
if (!foundInit) {
|
|
50
|
+
const initMatch = trimmed.match(RE_F_INIT);
|
|
51
|
+
if (initMatch) {
|
|
52
|
+
foundInit = true;
|
|
53
|
+
result.push(JSON.parse(initMatch[1]));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
66
56
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
}
|
|
57
|
+
// Check for payload script
|
|
58
|
+
const payloadMatch = trimmed.match(RE_F_PAYLOAD);
|
|
59
|
+
if (payloadMatch) {
|
|
60
|
+
result.push(JSON.parse(payloadMatch[1]));
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return result.length > 0 ? result : null;
|
|
65
|
+
}
|
|
76
66
|
|
|
77
67
|
/**
|
|
78
|
-
*
|
|
79
|
-
* @param {Array}
|
|
80
|
-
* @returns {string
|
|
68
|
+
* Decode raw flight data segments
|
|
69
|
+
* @param {Array} rawFlightData - Raw flight data array
|
|
70
|
+
* @returns {Array<string>} Decoded flight data chunks
|
|
81
71
|
*/
|
|
82
|
-
export
|
|
83
|
-
|
|
84
|
-
|
|
72
|
+
export function decodeRawFlightData(rawFlightData) {
|
|
73
|
+
let initialServerDataBuffer;
|
|
74
|
+
let initialFormStateData;
|
|
75
|
+
|
|
76
|
+
for (const seg of rawFlightData) {
|
|
77
|
+
const segmentType = seg[0];
|
|
85
78
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
79
|
+
if (segmentType === Segment.is_bootstrap) {
|
|
80
|
+
initialServerDataBuffer = [];
|
|
81
|
+
} else if (segmentType === Segment.is_not_bootstrap) {
|
|
82
|
+
if (initialServerDataBuffer === undefined) {
|
|
83
|
+
throw new Error(
|
|
84
|
+
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
|
|
85
|
+
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
initialServerDataBuffer.push(seg[1]);
|
|
89
|
+
} else if (segmentType === Segment.is_form_state) {
|
|
90
|
+
initialFormStateData = seg[1];
|
|
91
|
+
} else if (segmentType === Segment.is_binary) {
|
|
92
|
+
if (initialServerDataBuffer === undefined) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
'The `initialServerDataBuffer` was not yet initialized and a segment tried to append its data to it. ' +
|
|
95
|
+
'This should not be happening if the flight data starts correctly with a the `is_bootstrap` segment.'
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
// Decode base64
|
|
99
|
+
const decodedChunk = atob(seg[1]);
|
|
100
|
+
initialServerDataBuffer.push(decodedChunk);
|
|
101
|
+
} else {
|
|
102
|
+
throw new Error(`Unknown segment type seg[0]=${segmentType}`);
|
|
105
103
|
}
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return initialServerDataBuffer;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Parse decoded raw flight data into structured objects
|
|
111
|
+
* @param {Array<string>} decodedRawFlightData - Decoded flight data chunks
|
|
112
|
+
* @returns {Object} Dictionary mapping indices to parsed elements
|
|
113
|
+
*/
|
|
114
|
+
export function parseDecodedRawFlightData(decodedRawFlightData) {
|
|
115
|
+
// Join and encode to bytes
|
|
116
|
+
const compiledRawFlightData = new TextEncoder().encode(decodedRawFlightData.join(''));
|
|
117
|
+
const indexedResult = {};
|
|
118
|
+
let pos = 0;
|
|
119
|
+
|
|
120
|
+
while (true) {
|
|
121
|
+
const indexStringEnd = compiledRawFlightData.indexOf(58, pos); // ':'
|
|
122
|
+
if (indexStringEnd === -1) {
|
|
123
|
+
break;
|
|
115
124
|
}
|
|
116
125
|
|
|
117
|
-
|
|
118
|
-
|
|
126
|
+
const indexStringRaw = compiledRawFlightData.slice(pos, indexStringEnd);
|
|
127
|
+
let index = null;
|
|
128
|
+
if (indexStringRaw.length > 0) {
|
|
129
|
+
const indexStr = new TextDecoder().decode(indexStringRaw);
|
|
130
|
+
index = parseInt(indexStr, 16);
|
|
131
|
+
}
|
|
132
|
+
pos = indexStringEnd + 1;
|
|
119
133
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
134
|
+
// Extract value class (uppercase letters)
|
|
135
|
+
let valueClass = '';
|
|
136
|
+
while (pos < compiledRawFlightData.length) {
|
|
137
|
+
const char = String.fromCharCode(compiledRawFlightData[pos]);
|
|
138
|
+
if (/[A-Z]/.test(char)) {
|
|
139
|
+
valueClass += char;
|
|
140
|
+
pos++;
|
|
141
|
+
} else {
|
|
142
|
+
break;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
valueClass = valueClass || null;
|
|
127
146
|
|
|
128
|
-
|
|
147
|
+
let value;
|
|
129
148
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
break;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
const indexStringBuf = buffer.subarray(pos, colonIndex);
|
|
150
|
-
let index = null;
|
|
151
|
-
if (indexStringBuf.length > 0) {
|
|
152
|
-
const indexString = indexStringBuf.toString();
|
|
153
|
-
try {
|
|
154
|
-
index = parseInt(indexString, 16);
|
|
155
|
-
} catch (e) {
|
|
156
|
-
// Ignore?
|
|
149
|
+
if (valueClass === 'T') {
|
|
150
|
+
const textLengthStringEnd = compiledRawFlightData.indexOf(44, pos); // ','
|
|
151
|
+
const textLengthHex = compiledRawFlightData.slice(pos, textLengthStringEnd);
|
|
152
|
+
const textLength = parseInt(new TextDecoder().decode(textLengthHex), 16);
|
|
153
|
+
const textStart = textLengthStringEnd + 1;
|
|
154
|
+
value = new TextDecoder().decode(compiledRawFlightData.slice(textStart, textStart + textLength));
|
|
155
|
+
pos = textStart + textLength;
|
|
156
|
+
} else {
|
|
157
|
+
// Find next split point
|
|
158
|
+
let dataEnd = -1;
|
|
159
|
+
for (let i = pos; i < compiledRawFlightData.length - 1; i++) {
|
|
160
|
+
if (compiledRawFlightData[i] === 10) { // '\n'
|
|
161
|
+
if (i === 0 || compiledRawFlightData[i - 1] !== 92) { // not escaped
|
|
162
|
+
let j = i + 1;
|
|
163
|
+
while (j < compiledRawFlightData.length && /[0-9a-f]/.test(String.fromCharCode(compiledRawFlightData[j]))) {
|
|
164
|
+
j++;
|
|
157
165
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
// Iterate while char is uppercase letter
|
|
163
|
-
let value_class = "";
|
|
164
|
-
while (pos < buffer.length) {
|
|
165
|
-
const byte = buffer[pos];
|
|
166
|
-
const char = String.fromCharCode(byte);
|
|
167
|
-
if (/[A-Z]/.test(char)) {
|
|
168
|
-
value_class += char;
|
|
169
|
-
pos++;
|
|
170
|
-
} else {
|
|
171
|
-
break;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
if (value_class === "") value_class = null;
|
|
175
|
-
|
|
176
|
-
let raw_value_str;
|
|
177
|
-
let value;
|
|
178
|
-
|
|
179
|
-
if (value_class === "T") {
|
|
180
|
-
// Find comma
|
|
181
|
-
const commaIndex = buffer.indexOf(44, pos); // 44 is ','
|
|
182
|
-
if (commaIndex === -1) throw new Error("Expected comma after 'T' class size");
|
|
183
|
-
|
|
184
|
-
const lenHexBuf = buffer.subarray(pos, commaIndex);
|
|
185
|
-
const textLength = parseInt(lenHexBuf.toString(), 16);
|
|
186
|
-
|
|
187
|
-
const textStart = commaIndex + 1;
|
|
188
|
-
const textEnd = textStart + textLength;
|
|
189
|
-
|
|
190
|
-
const textBuf = buffer.subarray(textStart, textEnd);
|
|
191
|
-
raw_value_str = textBuf.toString('utf-8');
|
|
192
|
-
value = raw_value_str;
|
|
193
|
-
|
|
194
|
-
pos = textEnd;
|
|
195
|
-
} else {
|
|
196
|
-
// Search for next split point: `\n` followed by hex+colon
|
|
197
|
-
// We can search for `\n` and check pattern.
|
|
198
|
-
// Loop until found or end
|
|
199
|
-
let nextSplitPos = -1;
|
|
200
|
-
let searchPos = pos;
|
|
201
|
-
|
|
202
|
-
while (true) {
|
|
203
|
-
const newlineIndex = buffer.indexOf(10, searchPos); // 10 is '\n'
|
|
204
|
-
if (newlineIndex === -1) {
|
|
205
|
-
break;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Check lookbehind: `(?<!\\)` -> char before `\n` should not be `\` (92)
|
|
209
|
-
let isEscaped = false;
|
|
210
|
-
if (newlineIndex > 0 && buffer[newlineIndex - 1] === 92) {
|
|
211
|
-
isEscaped = true;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
if (!isEscaped) {
|
|
215
|
-
// Check lookahead: `[a-f0-9]*:`
|
|
216
|
-
// We scan from newlineIndex + 1 for hex chars then colon
|
|
217
|
-
let p = newlineIndex + 1;
|
|
218
|
-
let isMatch = true;
|
|
219
|
-
while (p < buffer.length) {
|
|
220
|
-
const b = buffer[p];
|
|
221
|
-
if (b === 58) { // found colon
|
|
222
|
-
break;
|
|
223
|
-
}
|
|
224
|
-
const c = String.fromCharCode(b);
|
|
225
|
-
if (!/[a-f0-9]/.test(c)) {
|
|
226
|
-
isMatch = false;
|
|
227
|
-
break;
|
|
228
|
-
}
|
|
229
|
-
p++;
|
|
230
|
-
}
|
|
231
|
-
// If we stopped at colon, it's a match
|
|
232
|
-
if (isMatch && p < buffer.length && buffer[p] === 58) {
|
|
233
|
-
nextSplitPos = newlineIndex; // The split starts at `\n`
|
|
234
|
-
break;
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
searchPos = newlineIndex + 1;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
if (nextSplitPos !== -1) {
|
|
242
|
-
const valBuf = buffer.subarray(pos, nextSplitPos);
|
|
243
|
-
raw_value_str = valBuf.toString('utf-8');
|
|
244
|
-
pos = nextSplitPos + 1; // Skip the newline
|
|
245
|
-
} else {
|
|
246
|
-
// Until end
|
|
247
|
-
// Python: `raw_value = compiled_raw_flight_data[pos:-1]`
|
|
248
|
-
// Wait, [pos:-1] removes the LAST byte?
|
|
249
|
-
// Why?
|
|
250
|
-
// Ah, because `compiled_raw_flight_data` in python might have a trailing `\n` or similar?
|
|
251
|
-
// Or maybe just generic slice logic?
|
|
252
|
-
// `raw_value = compiled_raw_flight_data[pos:data_end]` (excludes `\n`)
|
|
253
|
-
// If it goes to end, `pos:-1`.
|
|
254
|
-
// Let's assume it means "up to the last character".
|
|
255
|
-
// Python slice `[pos:-1]` includes from pos up to (but not including) the last item.
|
|
256
|
-
// Does flight data always end with a newline valid as split point?
|
|
257
|
-
// If the stream ends, it might not have the next split marker.
|
|
258
|
-
// But why exclude the last char?
|
|
259
|
-
// Maybe the stream ends with a newline?
|
|
260
|
-
// Let's check Python code again.
|
|
261
|
-
// `raw_value = compiled_raw_flight_data[pos:-1]`
|
|
262
|
-
// `pos += len(raw_value)`
|
|
263
|
-
// This implies it consumes everything EXCEPT the very last byte.
|
|
264
|
-
// Is there a phantom byte at the end? `compiled_raw_flight_data` is just `.join().encode()`.
|
|
265
|
-
// If I decode `raw_flight_data`, join them, encode them.
|
|
266
|
-
// Maybe I should match strict Python behavior.
|
|
267
|
-
// `combinedString` in JS vs Python.
|
|
268
|
-
// If I have "foo", [0:-1] is "fo".
|
|
269
|
-
|
|
270
|
-
// Let's assume for now I should take everything. The `-1` in Python is suspicious unless I know why.
|
|
271
|
-
// Maybe `_split_points` regex matching behavior in loop?
|
|
272
|
-
// If I am at the last chunk, it might not end with `\n...:`.
|
|
273
|
-
// If I simply take everything `buffer.subarray(pos)`, I might include a trailing newline that effectively belongs to the "next" but nonexistent chunk?
|
|
274
|
-
// But `pos` is advanced.
|
|
275
|
-
|
|
276
|
-
// WAIT. If `_split_points` finds a match, `data_end` is the start of `\n`.
|
|
277
|
-
// `raw_value = ...[pos:data_end]`.
|
|
278
|
-
// If `else` (no match), `raw_value = [pos:-1]`.
|
|
279
|
-
// This definitely drops the last byte.
|
|
280
|
-
// I will replicate this behavior: `buffer.subarray(pos, buffer.length - 1)`.
|
|
281
|
-
|
|
282
|
-
const valBuf = buffer.subarray(pos, buffer.length - 1);
|
|
283
|
-
raw_value_str = valBuf.toString('utf-8');
|
|
284
|
-
pos += valBuf.length;
|
|
285
|
-
// And loop will terminate because pos vs buffer.length check or colonIndex search
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
try {
|
|
289
|
-
value = JSON.parse(raw_value_str);
|
|
290
|
-
} catch (e) {
|
|
291
|
-
// If JSON parse fails, keep string? Python code: `value = orjson.loads(raw_value)`
|
|
292
|
-
// It assumes valid JSON.
|
|
293
|
-
value = raw_value_str;
|
|
166
|
+
if (j < compiledRawFlightData.length && compiledRawFlightData[j] === 58) {
|
|
167
|
+
dataEnd = i;
|
|
168
|
+
break;
|
|
294
169
|
}
|
|
170
|
+
}
|
|
295
171
|
}
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
indexed_result["null"] = [];
|
|
311
|
-
}
|
|
312
|
-
indexed_result["null"].push(resolved);
|
|
313
|
-
} else {
|
|
314
|
-
indexed_result[index] = resolved;
|
|
315
|
-
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const rawValue = dataEnd !== -1
|
|
175
|
+
? compiledRawFlightData.slice(pos, dataEnd)
|
|
176
|
+
: compiledRawFlightData.slice(pos);
|
|
177
|
+
|
|
178
|
+
pos = dataEnd !== -1 ? dataEnd + 1 : compiledRawFlightData.length;
|
|
179
|
+
|
|
180
|
+
const rawText = new TextDecoder().decode(rawValue);
|
|
181
|
+
if (rawText.length === 0) {
|
|
182
|
+
value = null;
|
|
183
|
+
} else {
|
|
184
|
+
value = JSON.parse(rawText);
|
|
185
|
+
}
|
|
316
186
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
187
|
+
|
|
188
|
+
const resolved = resolveType(value, valueClass, index);
|
|
189
|
+
|
|
190
|
+
if (index === null) {
|
|
191
|
+
if (!(index in indexedResult)) {
|
|
192
|
+
indexedResult[index] = [];
|
|
193
|
+
}
|
|
194
|
+
indexedResult[index].push(resolved);
|
|
195
|
+
} else {
|
|
196
|
+
indexedResult[index] = resolved;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return indexedResult;
|
|
201
|
+
}
|
|
320
202
|
|
|
321
203
|
/**
|
|
322
|
-
*
|
|
323
|
-
* @param {
|
|
324
|
-
* @
|
|
204
|
+
* Get parsed flight data from HTML
|
|
205
|
+
* @param {string} html - HTML string
|
|
206
|
+
* @param {DOMParser} DOMParser - DOMParser instance
|
|
207
|
+
* @returns {Object|null} Parsed flight data or null
|
|
325
208
|
*/
|
|
326
|
-
export
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
const decoded = decode_raw_flight_data(raw);
|
|
330
|
-
return parse_decoded_raw_flight_data(decoded);
|
|
331
|
-
}
|
|
209
|
+
export function getFlightData(html, DOMParser) {
|
|
210
|
+
const rawFlightData = getRawFlightData(html, DOMParser);
|
|
211
|
+
if (rawFlightData === null) {
|
|
332
212
|
return null;
|
|
333
|
-
}
|
|
213
|
+
}
|
|
214
|
+
const decodedRawFlightData = decodeRawFlightData(rawFlightData);
|
|
215
|
+
return parseDecodedRawFlightData(decodedRawFlightData);
|
|
216
|
+
}
|
package/parser/manifests.js
CHANGED
|
@@ -1,46 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Build manifest parsing
|
|
3
|
+
*/
|
|
4
|
+
|
|
1
5
|
import { join } from '../utils.js';
|
|
2
|
-
import { _NS } from './urls.js';
|
|
3
6
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
const _NS = '/_next/static/';
|
|
8
|
+
const _build_manifest_name = '_buildManifest.js';
|
|
9
|
+
const _ssg_manifest_name = '_ssgManifest.js';
|
|
10
|
+
const _build_manifest_path = `/${_build_manifest_name}`;
|
|
11
|
+
const _ssg_manifest_path = `/${_ssg_manifest_name}`;
|
|
12
|
+
|
|
8
13
|
export const _manifest_paths = [_build_manifest_path, _ssg_manifest_path];
|
|
9
14
|
|
|
10
15
|
/**
|
|
11
|
-
*
|
|
12
|
-
* @param {string} script
|
|
13
|
-
* @returns {
|
|
16
|
+
* Parse build manifest script
|
|
17
|
+
* @param {string} script - Build manifest script content
|
|
18
|
+
* @returns {Object} Parsed manifest object
|
|
14
19
|
*/
|
|
15
|
-
export
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
} catch (e) {
|
|
33
|
-
console.warn(`Could not parse the given build manifest \`${s}\``);
|
|
34
|
-
return null;
|
|
35
|
-
}
|
|
36
|
-
};
|
|
20
|
+
export function parseBuildManifest(script) {
|
|
21
|
+
const s = script.trim();
|
|
22
|
+
|
|
23
|
+
if (!s.startsWith('self.__BUILD_MANIFEST')) {
|
|
24
|
+
throw new Error('Invalid build manifest (not starting by `"self.__BUILD_MANIFEST"`).');
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Wrap in IIFE and evaluate
|
|
28
|
+
const func = `(function() {self={};${s.replace(/;$/, '')};return self.__BUILD_MANIFEST})();`;
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
return eval(func);
|
|
32
|
+
} catch (e) {
|
|
33
|
+
console.warn(`Could not parse the given build manifest \`${s}\``);
|
|
34
|
+
throw e;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
|
-
*
|
|
40
|
-
* @param {string}
|
|
41
|
-
* @param {string}
|
|
42
|
-
* @returns {string}
|
|
39
|
+
* Get build manifest path
|
|
40
|
+
* @param {string} buildId - Build ID
|
|
41
|
+
* @param {string} basePath - Base path (optional)
|
|
42
|
+
* @returns {string} Build manifest path
|
|
43
43
|
*/
|
|
44
|
-
export
|
|
45
|
-
|
|
46
|
-
}
|
|
44
|
+
export function getBuildManifestPath(buildId, basePath = '') {
|
|
45
|
+
return join(basePath, _NS, buildId, _build_manifest_name);
|
|
46
|
+
}
|