njsparser 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +390 -40
- package/api.js +76 -50
- package/bun.lock +2 -48
- package/mod.js +148 -0
- package/package.json +11 -16
- package/parser/flight_data.js +189 -306
- package/parser/manifests.js +37 -37
- package/parser/next_data.js +29 -26
- package/parser/types.js +408 -296
- package/parser/urls.js +86 -56
- package/tests/api.test.js +96 -0
- package/tests/integration.test.js +68 -0
- package/tests/parser/flight_data.test.js +105 -0
- package/tests/parser/manifests.test.js +50 -0
- package/tests/parser/next_data.test.js +53 -0
- package/tests/parser/types.test.js +243 -0
- package/tests/parser/urls.test.js +84 -0
- package/tests/property.test.js +299 -0
- package/tests/setup.js +21 -0
- package/tests/utils.test.js +32 -0
- package/tools.js +263 -185
- package/utils.js +29 -24
- package/_.js +0 -10
- package/_.json +0 -12837
- package/api.test.js +0 -41
- package/index.js +0 -8
- package/package-lock.json +0 -291
- package/parser/flight_data.test.js +0 -59
- package/parser/manifests.test.js +0 -36
- package/parser/next_data.test.js +0 -15
- package/parser/types.test.js +0 -261
- package/parser/urls.test.js +0 -26
- package/test/src/index.js +0 -16
- package/tools.test.js +0 -153
- package/utils.test.js +0 -38
package/parser/types.test.js
DELETED
|
@@ -1,261 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
Element,
|
|
4
|
-
HintPreload,
|
|
5
|
-
Module,
|
|
6
|
-
Text,
|
|
7
|
-
Data,
|
|
8
|
-
EmptyData,
|
|
9
|
-
SpecialData,
|
|
10
|
-
HTMLElement,
|
|
11
|
-
DataContainer,
|
|
12
|
-
DataParent,
|
|
13
|
-
URLQuery,
|
|
14
|
-
RSCPayload,
|
|
15
|
-
ErrorEle,
|
|
16
|
-
resolve_type,
|
|
17
|
-
RSCPayloadVersion
|
|
18
|
-
} from "../parser/types.js";
|
|
19
|
-
|
|
20
|
-
// Test Data
|
|
21
|
-
const _flightHintPreloadPayload_1 = {
|
|
22
|
-
value: [
|
|
23
|
-
"/_next/static/media/569ce4b8f30dc480-s.p.woff2",
|
|
24
|
-
"font",
|
|
25
|
-
{ "crossOrigin": "", "type": "font/woff2" }
|
|
26
|
-
],
|
|
27
|
-
value_class: "HL",
|
|
28
|
-
index: 1,
|
|
29
|
-
};
|
|
30
|
-
const _flightHintPreloadPayload_2 = {
|
|
31
|
-
value: [
|
|
32
|
-
"/_next/static/css/3a4b7cc0153d49b4.css?dpl=dpl_F2qLi1zuzNsnuiFMqRXyYU9dbJYw",
|
|
33
|
-
"style"
|
|
34
|
-
],
|
|
35
|
-
value_class: "HL",
|
|
36
|
-
index: 1,
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
const _flightModulePayload_1 = {
|
|
40
|
-
value: [
|
|
41
|
-
30777,
|
|
42
|
-
[
|
|
43
|
-
"71523",
|
|
44
|
-
"static/chunks/25c8a87d-0d1c991f726a4cc1.js",
|
|
45
|
-
"10411",
|
|
46
|
-
"static/chunks/app/(webapp)/%5Blang%5D/(public)/user/layout-bd7c1d222b477529.js"
|
|
47
|
-
],
|
|
48
|
-
"default"
|
|
49
|
-
],
|
|
50
|
-
value_class: "I",
|
|
51
|
-
index: 1,
|
|
52
|
-
};
|
|
53
|
-
const _flightModulePayload_2 = {
|
|
54
|
-
value: {
|
|
55
|
-
'id': '47858',
|
|
56
|
-
'chunks': [
|
|
57
|
-
'272:static/chunks/webpack-2f0e36f832c3608a.js',
|
|
58
|
-
'667:static/chunks/2443530c-7d590f93d1ab76bc.js',
|
|
59
|
-
'139:static/chunks/139-1e0b88e46566ba7f.js'
|
|
60
|
-
],
|
|
61
|
-
'name': '',
|
|
62
|
-
'async': false
|
|
63
|
-
},
|
|
64
|
-
value_class: "I",
|
|
65
|
-
index: 1,
|
|
66
|
-
};
|
|
67
|
-
|
|
68
|
-
const _flightTextPayload = { value: "hello world", value_class: "T", index: 1 };
|
|
69
|
-
const _flightDataPayload_1 = { value: ["$", "$L1", null, null], value_class: null, index: 1 };
|
|
70
|
-
const _flightDataPayload_2 = { value: ["$", "$L1", null, {}], value_class: null, index: 1 };
|
|
71
|
-
const _flightEmptyDataPayload = { value: null, value_class: null, index: 1 };
|
|
72
|
-
const _flightSpecialDataPayload = { value: "$Sreact.suspense", value_class: null, index: 1 };
|
|
73
|
-
|
|
74
|
-
const _flightHTMLElementPayload_1 = {
|
|
75
|
-
value: ["$", "div", null, {}],
|
|
76
|
-
value_class: null,
|
|
77
|
-
index: 1,
|
|
78
|
-
};
|
|
79
|
-
const _flightHTMLElementPayload_2 = {
|
|
80
|
-
value: [
|
|
81
|
-
"$",
|
|
82
|
-
"link",
|
|
83
|
-
"https://sentry.io",
|
|
84
|
-
{ "rel": "dns-prefetch", "href": "https://sentry.io" }
|
|
85
|
-
],
|
|
86
|
-
value_class: null,
|
|
87
|
-
index: 1,
|
|
88
|
-
};
|
|
89
|
-
const _flightDataContainerPayload = {
|
|
90
|
-
value: [_flightHTMLElementPayload_1.value, _flightHTMLElementPayload_2.value],
|
|
91
|
-
value_class: null,
|
|
92
|
-
index: 1,
|
|
93
|
-
};
|
|
94
|
-
const _flightDataParentPayload = {
|
|
95
|
-
value: [
|
|
96
|
-
"$",
|
|
97
|
-
"$L16",
|
|
98
|
-
null,
|
|
99
|
-
{
|
|
100
|
-
"children": [
|
|
101
|
-
"$",
|
|
102
|
-
"$L17",
|
|
103
|
-
null,
|
|
104
|
-
{
|
|
105
|
-
"profile": {}
|
|
106
|
-
}
|
|
107
|
-
]
|
|
108
|
-
}
|
|
109
|
-
],
|
|
110
|
-
value_class: null,
|
|
111
|
-
index: null
|
|
112
|
-
};
|
|
113
|
-
const _flightURLQuery = {
|
|
114
|
-
value: ["key", "val", "d"],
|
|
115
|
-
value_class: null,
|
|
116
|
-
index: 1,
|
|
117
|
-
};
|
|
118
|
-
const _flightRSCPayload_old = {
|
|
119
|
-
value: ["$", "$L1", null, { "buildId": "i am a build id" }],
|
|
120
|
-
value_class: null,
|
|
121
|
-
index: 0,
|
|
122
|
-
};
|
|
123
|
-
const _flightRSCPayload_new = {
|
|
124
|
-
value: { "b": "i am a new build id" },
|
|
125
|
-
value_class: null,
|
|
126
|
-
index: 0,
|
|
127
|
-
};
|
|
128
|
-
const _flightErrorPayload = {
|
|
129
|
-
value: { "digest": "NEXT_NOT_FOUND" },
|
|
130
|
-
value_class: "E",
|
|
131
|
-
index: 1,
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
describe("types", () => {
|
|
135
|
-
test("Element", () => {
|
|
136
|
-
expect(() => new Element({ value: "" })).not.toThrow(); // JS doesn't enforce strict dataclass rules unless I add checks
|
|
137
|
-
// Implementation in types.js uses destructuring, so missing keys might be undefined.
|
|
138
|
-
const el = new Element({value: "hi", value_class: null, index: 1});
|
|
139
|
-
expect(el.value).toBe("hi");
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
test("HintPreload", () => {
|
|
143
|
-
const hl1 = new HintPreload(_flightHintPreloadPayload_1);
|
|
144
|
-
expect(hl1.href).toBe("/_next/static/media/569ce4b8f30dc480-s.p.woff2");
|
|
145
|
-
expect(hl1.type_name).toBe("font");
|
|
146
|
-
expect(hl1.attrs).toEqual({ "crossOrigin": "", "type": "font/woff2" });
|
|
147
|
-
|
|
148
|
-
const hl2 = new HintPreload(_flightHintPreloadPayload_2);
|
|
149
|
-
expect(hl2.href).toBe("/_next/static/css/3a4b7cc0153d49b4.css?dpl=dpl_F2qLi1zuzNsnuiFMqRXyYU9dbJYw");
|
|
150
|
-
expect(hl2.type_name).toBe("style");
|
|
151
|
-
expect(hl2.attrs).toBeNull();
|
|
152
|
-
});
|
|
153
|
-
|
|
154
|
-
test("Module", () => {
|
|
155
|
-
const i = new Module(_flightModulePayload_1);
|
|
156
|
-
expect(i.module_id).toBe(30777);
|
|
157
|
-
expect(i.module_chunks_raw()).toEqual({
|
|
158
|
-
'71523': 'static/chunks/25c8a87d-0d1c991f726a4cc1.js',
|
|
159
|
-
'10411': 'static/chunks/app/(webapp)/%5Blang%5D/(public)/user/layout-bd7c1d222b477529.js'
|
|
160
|
-
});
|
|
161
|
-
expect(i.module_chunks).toEqual({
|
|
162
|
-
'71523': '/_next/static/chunks/25c8a87d-0d1c991f726a4cc1.js',
|
|
163
|
-
'10411': '/_next/static/chunks/app/(webapp)/%5Blang%5D/(public)/user/layout-bd7c1d222b477529.js'
|
|
164
|
-
});
|
|
165
|
-
expect(i.module_name).toBe('default');
|
|
166
|
-
expect(i.is_async).toBe(false);
|
|
167
|
-
|
|
168
|
-
const i2 = new Module(_flightModulePayload_2);
|
|
169
|
-
expect(i2.module_id).toBe(47858);
|
|
170
|
-
expect(i2.module_chunks_raw()).toEqual({
|
|
171
|
-
'272': 'static/chunks/webpack-2f0e36f832c3608a.js',
|
|
172
|
-
'667': 'static/chunks/2443530c-7d590f93d1ab76bc.js',
|
|
173
|
-
'139': 'static/chunks/139-1e0b88e46566ba7f.js'
|
|
174
|
-
});
|
|
175
|
-
expect(i2.module_name).toBe('');
|
|
176
|
-
expect(i2.is_async).toBe(false);
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
test("Text", () => {
|
|
180
|
-
const t = new Text(_flightTextPayload);
|
|
181
|
-
expect(t.text).toBe("hello world");
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
test("Data", () => {
|
|
185
|
-
expect(new Data(_flightDataPayload_1).content).toBeNull();
|
|
186
|
-
expect(new Data(_flightDataPayload_2).content).toEqual({});
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
test("EmptyData", () => {
|
|
190
|
-
expect(new EmptyData(_flightEmptyDataPayload).value).toBeNull();
|
|
191
|
-
});
|
|
192
|
-
|
|
193
|
-
test("SpecialData", () => {
|
|
194
|
-
expect(new SpecialData(_flightSpecialDataPayload).value).toBe("$Sreact.suspense");
|
|
195
|
-
});
|
|
196
|
-
|
|
197
|
-
test("HTMLElement", () => {
|
|
198
|
-
const h1 = new HTMLElement(_flightHTMLElementPayload_1);
|
|
199
|
-
expect(h1.tag).toBe('div');
|
|
200
|
-
expect(h1.href).toBeNull();
|
|
201
|
-
expect(h1.attrs).toEqual({});
|
|
202
|
-
const h2 = new HTMLElement(_flightHTMLElementPayload_2);
|
|
203
|
-
expect(h2.tag).toBe('link');
|
|
204
|
-
expect(h2.href).toBe('https://sentry.io');
|
|
205
|
-
expect(h2.attrs).toEqual({"rel": "dns-prefetch", "href": "https://sentry.io"});
|
|
206
|
-
});
|
|
207
|
-
|
|
208
|
-
test("DataContainer", () => {
|
|
209
|
-
const dcp = new DataContainer(JSON.parse(JSON.stringify(_flightDataContainerPayload))); // clone to safely mutate in constructor if any
|
|
210
|
-
// JS DataContainer constructor replaces value with resolved types.
|
|
211
|
-
expect(dcp.value).toBeInstanceOf(Array);
|
|
212
|
-
expect(dcp.value[0]).toBeInstanceOf(HTMLElement);
|
|
213
|
-
});
|
|
214
|
-
|
|
215
|
-
test("DataParent", () => {
|
|
216
|
-
// Deep clone payload because DataParent modifies it in place in python (and JS port)
|
|
217
|
-
const payload = JSON.parse(JSON.stringify(_flightDataParentPayload));
|
|
218
|
-
const dp = new DataParent(payload);
|
|
219
|
-
expect(dp.children).toBeInstanceOf(Data); // children resolves to Data
|
|
220
|
-
expect(dp.children.content).toEqual({"profile": {}});
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
test("URLQuery", () => {
|
|
224
|
-
const urlp = new URLQuery(_flightURLQuery);
|
|
225
|
-
expect(urlp.key).toBe("key");
|
|
226
|
-
expect(urlp.val).toBe("val");
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
test("RSCPayload", () => {
|
|
230
|
-
const rscp1 = new RSCPayload(_flightRSCPayload_old);
|
|
231
|
-
expect(rscp1._version()).toBe(RSCPayloadVersion.old);
|
|
232
|
-
expect(rscp1.build_id).toBe("i am a build id");
|
|
233
|
-
const rscp2 = new RSCPayload(_flightRSCPayload_new);
|
|
234
|
-
expect(rscp2._version()).toBe(RSCPayloadVersion.new);
|
|
235
|
-
expect(rscp2.build_id).toBe("i am a new build id");
|
|
236
|
-
});
|
|
237
|
-
|
|
238
|
-
test("ErrorEle", () => {
|
|
239
|
-
const fe = new ErrorEle(_flightErrorPayload);
|
|
240
|
-
expect(fe.digest).toBe("NEXT_NOT_FOUND");
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
test("resolve_type", () => {
|
|
244
|
-
expect(resolve_type(_flightHintPreloadPayload_1)).toBeInstanceOf(HintPreload);
|
|
245
|
-
expect(resolve_type(_flightHintPreloadPayload_2)).toBeInstanceOf(HintPreload);
|
|
246
|
-
expect(resolve_type(_flightModulePayload_1)).toBeInstanceOf(Module);
|
|
247
|
-
expect(resolve_type(_flightModulePayload_2)).toBeInstanceOf(Module);
|
|
248
|
-
expect(resolve_type(_flightTextPayload)).toBeInstanceOf(Text);
|
|
249
|
-
expect(resolve_type(_flightDataPayload_1)).toBeInstanceOf(Data);
|
|
250
|
-
expect(resolve_type(_flightDataPayload_2)).toBeInstanceOf(Data);
|
|
251
|
-
expect(resolve_type(_flightEmptyDataPayload)).toBeInstanceOf(EmptyData);
|
|
252
|
-
expect(resolve_type(_flightSpecialDataPayload)).toBeInstanceOf(SpecialData);
|
|
253
|
-
expect(resolve_type(_flightURLQuery)).toBeInstanceOf(URLQuery);
|
|
254
|
-
expect(resolve_type(JSON.parse(JSON.stringify(_flightDataContainerPayload)))).toBeInstanceOf(DataContainer);
|
|
255
|
-
expect(resolve_type(JSON.parse(JSON.stringify(_flightHTMLElementPayload_1)))).toBeInstanceOf(HTMLElement);
|
|
256
|
-
expect(resolve_type(JSON.parse(JSON.stringify(_flightDataParentPayload)))).toBeInstanceOf(DataParent); // DataParent detection logic
|
|
257
|
-
expect(resolve_type(_flightRSCPayload_old)).toBeInstanceOf(RSCPayload);
|
|
258
|
-
expect(resolve_type(_flightRSCPayload_new)).toBeInstanceOf(RSCPayload);
|
|
259
|
-
expect(resolve_type(_flightErrorPayload)).toBeInstanceOf(ErrorEle);
|
|
260
|
-
});
|
|
261
|
-
});
|
package/parser/urls.test.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import { get_next_static_urls, get_base_path } from "../parser/urls.js";
|
|
3
|
-
import {
|
|
4
|
-
m_soundcloud_com_html,
|
|
5
|
-
x_com_html,
|
|
6
|
-
nextjs_org_html,
|
|
7
|
-
swag_live_html
|
|
8
|
-
} from "../test/src/index.js";
|
|
9
|
-
|
|
10
|
-
describe("urls", () => {
|
|
11
|
-
test("get_next_static_urls", () => {
|
|
12
|
-
expect(get_next_static_urls(m_soundcloud_com_html)).not.toBeNull();
|
|
13
|
-
expect(get_next_static_urls(x_com_html)).toBeNull();
|
|
14
|
-
expect(get_next_static_urls(nextjs_org_html)).not.toBeNull();
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
test("get_base_path", () => {
|
|
18
|
-
expect(get_base_path(m_soundcloud_com_html)).toBe("https://m.sndcdn.com");
|
|
19
|
-
expect(get_base_path(m_soundcloud_com_html, { remove_domain: true })).toBe("");
|
|
20
|
-
expect(get_base_path(x_com_html)).toBeNull();
|
|
21
|
-
expect(get_base_path(swag_live_html, { remove_domain: true })).toBe("/static");
|
|
22
|
-
|
|
23
|
-
expect(() => get_base_path(["https://test.com/hello"])).toThrow();
|
|
24
|
-
expect(() => get_base_path(["/bubu/_next/static/", "/bububu/_next/static/"])).toThrow();
|
|
25
|
-
});
|
|
26
|
-
});
|
package/test/src/index.js
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import { file } from "bun";
|
|
2
|
-
import { join, dirname } from "path";
|
|
3
|
-
|
|
4
|
-
const srcDir = join(dirname(new URL(import.meta.url).pathname), "../../../test/src");
|
|
5
|
-
|
|
6
|
-
export const m_soundcloud_com_html = await file(join(srcDir, "m.soundcloud.com.html")).text();
|
|
7
|
-
export const nextjs_org_html = await file(join(srcDir, "nextjs.org.html")).text();
|
|
8
|
-
export const x_com_html = await file(join(srcDir, "x.com.html")).text();
|
|
9
|
-
export const swag_live_html = await file(join(srcDir, "swag.live.html")).text();
|
|
10
|
-
export const club_fans_html = await file(join(srcDir, "club.fans.html")).text();
|
|
11
|
-
export const mintstars_com_html = await file(join(srcDir, "mintstars.com.html")).text();
|
|
12
|
-
|
|
13
|
-
export const nextjs_org_4mSOwJptzzPemGzzI8AOo_buildManifest = await file(join(srcDir, "nextjs_org_4mSOwJptzzPemGzzI8AOo_buildManifest.js")).text();
|
|
14
|
-
export const swag_live_giz3a1H7OUzfxgxRHIdMx_buildManifest = await file(join(srcDir, "swag_live_giz3a1H7OUzfxgxRHIdMx_buildManifest.js")).text();
|
|
15
|
-
export const app_osint_industries_yAzR27j6CjHLWW3VxUzzi_buildManifest = await file(join(srcDir, "app_osint_industries_yAzR27j6CjHLWW3VxUzzi_buildManifest.js")).text();
|
|
16
|
-
export const runpod_io_s4xe_TFYlTTFF_bw1HfD4_buildManifest = await file(join(srcDir, "runpod_io_s4xe_TFYlTTFF_bw1HfD4_buildManifest.js")).text();
|
package/tools.test.js
DELETED
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import {
|
|
3
|
-
has_nextjs,
|
|
4
|
-
findall_in_flight_data,
|
|
5
|
-
find_in_flight_data,
|
|
6
|
-
find_build_id,
|
|
7
|
-
BeautifulFD
|
|
8
|
-
} from "./tools.js";
|
|
9
|
-
import {
|
|
10
|
-
RSCPayload,
|
|
11
|
-
ErrorEle,
|
|
12
|
-
SpecialData,
|
|
13
|
-
Text,
|
|
14
|
-
URLQuery,
|
|
15
|
-
Data,
|
|
16
|
-
Module,
|
|
17
|
-
resolve_type
|
|
18
|
-
} from "./parser/types.js";
|
|
19
|
-
import {
|
|
20
|
-
m_soundcloud_com_html,
|
|
21
|
-
nextjs_org_html,
|
|
22
|
-
x_com_html,
|
|
23
|
-
swag_live_html,
|
|
24
|
-
club_fans_html
|
|
25
|
-
} from "./test/src/index.js";
|
|
26
|
-
|
|
27
|
-
describe("tools", () => {
|
|
28
|
-
test("has_nextjs", () => {
|
|
29
|
-
expect(has_nextjs(m_soundcloud_com_html)).toBe(true);
|
|
30
|
-
expect(has_nextjs(nextjs_org_html)).toBe(true);
|
|
31
|
-
expect(has_nextjs(x_com_html)).toBe(false);
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
const flight_data = {
|
|
35
|
-
0: new RSCPayload({ value: { b: "BUILDID" }, value_class: null, index: 0 }),
|
|
36
|
-
1: new ErrorEle({ value: { digest: "NEXT_NOT_FOUND" }, value_class: null, index: 1 }),
|
|
37
|
-
2: new SpecialData({ value: "$Sreactblahblah", value_class: null, index: 2 }),
|
|
38
|
-
3: new Text({ value: "hello world", value_class: null, index: 3 }),
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
test("findall_in_flight_data", () => {
|
|
42
|
-
const class_filters = [RSCPayload, Module];
|
|
43
|
-
// Note: findall_in_flight_data takes arguments as object
|
|
44
|
-
const items = findall_in_flight_data({ flight_data, class_filters });
|
|
45
|
-
for (const item of items) {
|
|
46
|
-
expect(item instanceof RSCPayload || item instanceof Module).toBe(true);
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
expect(findall_in_flight_data({ flight_data })).toEqual(Object.values(flight_data));
|
|
50
|
-
|
|
51
|
-
const filtered = findall_in_flight_data({
|
|
52
|
-
flight_data,
|
|
53
|
-
callback: (item) => item.index % 2 !== 0
|
|
54
|
-
});
|
|
55
|
-
for (const item of filtered) {
|
|
56
|
-
expect(item.index % 2).toBe(1);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
expect(findall_in_flight_data({ flight_data: null })).toEqual([]);
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
const _recursive_data = {
|
|
63
|
-
"value": [
|
|
64
|
-
{"value": null,"value_class": null,"index": null},
|
|
65
|
-
{"value": false,"value_class": null,"index": null},
|
|
66
|
-
{
|
|
67
|
-
"value": [
|
|
68
|
-
"$",
|
|
69
|
-
"$L16",
|
|
70
|
-
null,
|
|
71
|
-
{
|
|
72
|
-
"children": [
|
|
73
|
-
"$",
|
|
74
|
-
"$L17",
|
|
75
|
-
null,
|
|
76
|
-
{
|
|
77
|
-
"profile": {}
|
|
78
|
-
}
|
|
79
|
-
]
|
|
80
|
-
}
|
|
81
|
-
],
|
|
82
|
-
"value_class": null,
|
|
83
|
-
"index": null
|
|
84
|
-
}
|
|
85
|
-
],
|
|
86
|
-
"value_class": null,
|
|
87
|
-
"index": 5,
|
|
88
|
-
"cls": "DataContainer"
|
|
89
|
-
};
|
|
90
|
-
|
|
91
|
-
test("find_in_flight_data", () => {
|
|
92
|
-
expect(find_in_flight_data({ flight_data, class_filters: [URLQuery] })).toBeNull();
|
|
93
|
-
expect(find_in_flight_data({ flight_data, class_filters: [RSCPayload] })).toEqual(flight_data[0]);
|
|
94
|
-
expect(find_in_flight_data({ flight_data: null })).toBeNull();
|
|
95
|
-
|
|
96
|
-
// Testing recursive data
|
|
97
|
-
// Need to construct recursive flight data properly using resolve_type
|
|
98
|
-
// But _recursive_data structure is dict.
|
|
99
|
-
// Python code: `resolve_type(**_recursive_data)`
|
|
100
|
-
// JS: `resolve_type(_recursive_data)`
|
|
101
|
-
const recursiveFDObject = resolve_type(_recursive_data);
|
|
102
|
-
const recursiveFD = { 0: recursiveFDObject };
|
|
103
|
-
|
|
104
|
-
const found = find_in_flight_data({ flight_data: recursiveFD, class_filters: [Data] });
|
|
105
|
-
expect(found.content).toEqual({ "profile": {} });
|
|
106
|
-
|
|
107
|
-
const foundNonRecursive = find_in_flight_data({ flight_data: recursiveFD, class_filters: [Data], recursive: false });
|
|
108
|
-
expect(foundNonRecursive).toBeNull();
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
test("find_build_id", () => {
|
|
112
|
-
expect(find_build_id(m_soundcloud_com_html)).toBe("1733156665");
|
|
113
|
-
expect(find_build_id(nextjs_org_html)).toBe("4mSOwJptzzPemGzzI8AOo");
|
|
114
|
-
expect(find_build_id(x_com_html)).toBeNull();
|
|
115
|
-
expect(find_build_id(swag_live_html)).toBe("giz3a1H7OUzfxgxRHIdMx");
|
|
116
|
-
// Recursive search
|
|
117
|
-
expect(find_build_id(club_fans_html)).toBe("n2xbxZXkzoS6U5w7CgB-T");
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
test("BeautifulFD", () => {
|
|
121
|
-
expect(() => new BeautifulFD(null)).toThrow(TypeError);
|
|
122
|
-
|
|
123
|
-
const fd = new BeautifulFD(club_fans_html);
|
|
124
|
-
expect(fd.find()).not.toBeNull();
|
|
125
|
-
expect(fd.find({ class_filters: [Data] })).toBeInstanceOf(Data);
|
|
126
|
-
// fd.find(["Data"]) -> string filter support?
|
|
127
|
-
// My implementation in tools.js supports class_filters.
|
|
128
|
-
// But BeautifulFD passes directly to find_iter etc.
|
|
129
|
-
// It doesn't auto-convert string to class.
|
|
130
|
-
// Python `BeautifulFD.find_iter` does convert string to class via `_tl2obj`.
|
|
131
|
-
// I implemented that in tools.js?
|
|
132
|
-
// Check `BeautifulFD.find_iter` in tools.js...
|
|
133
|
-
// No, I didn't implement string-to-class mapping in `finditer_in_flight_data`.
|
|
134
|
-
// I need to update `BeautifulFD` or `tools.js` to support string filters to match Python behavior.
|
|
135
|
-
// I will fix tools.js later. For now, assume it fails or I fix it.
|
|
136
|
-
|
|
137
|
-
// expect(() => fd.find({ class_filters: ["Datsdfdsfa"] })).toThrow();
|
|
138
|
-
|
|
139
|
-
// Iteration
|
|
140
|
-
// for (const [key, value] of fd) ... BeautifulFD is not iterable in JS unless I define [Symbol.iterator]
|
|
141
|
-
// I defined `as_list`.
|
|
142
|
-
// In python `__iter__` yields items.
|
|
143
|
-
// JS `BeautifulFD` does NOT have [Symbol.iterator] implemented yet.
|
|
144
|
-
|
|
145
|
-
// Boolean check
|
|
146
|
-
expect(fd.bool()).toBe(true);
|
|
147
|
-
|
|
148
|
-
const empty_bfd = new BeautifulFD("<html></html>");
|
|
149
|
-
expect(empty_bfd.bool()).toBe(false);
|
|
150
|
-
expect(empty_bfd.length).toBe(0);
|
|
151
|
-
expect(empty_bfd.as_list()).toBeInstanceOf(Array);
|
|
152
|
-
});
|
|
153
|
-
});
|
package/utils.test.js
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from "bun:test";
|
|
2
|
-
import { make_tree, join } from "./utils.js";
|
|
3
|
-
import * as cheerio from 'cheerio';
|
|
4
|
-
|
|
5
|
-
describe("utils", () => {
|
|
6
|
-
test("make_tree", () => {
|
|
7
|
-
const h = "<html>hello</html>";
|
|
8
|
-
// make_tree returns a cheerio object, which is a function
|
|
9
|
-
const $ = make_tree(h);
|
|
10
|
-
expect(typeof $).toBe("function");
|
|
11
|
-
// Verify it works
|
|
12
|
-
expect($("html").text()).toBe("hello");
|
|
13
|
-
|
|
14
|
-
// Pass existing cheerio object
|
|
15
|
-
const $2 = make_tree($);
|
|
16
|
-
expect($2).toBe($);
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
test("join", () => {
|
|
20
|
-
expect(join("hello", "world")).toBe("/hello/world");
|
|
21
|
-
expect(join("/hello///", "/world/")).toBe("/hello/world");
|
|
22
|
-
// Python's join("/a", "b") -> "/a/b".
|
|
23
|
-
// My implementation: join("/hello", "world") -> "hello/world"?
|
|
24
|
-
// Wait, Python's:
|
|
25
|
-
// l = [""]
|
|
26
|
-
// [arg.strip("/") for arg in args] -> ["hello", "world"]
|
|
27
|
-
// "/".join(["", "hello", "world"]) -> "/hello/world"
|
|
28
|
-
// So it PREPENDS a slash.
|
|
29
|
-
|
|
30
|
-
// My implementation:
|
|
31
|
-
// const parts = [""];
|
|
32
|
-
// parts.push(...args...)
|
|
33
|
-
// parts.join('/') -> "/hello/world"
|
|
34
|
-
// Yes, it matches.
|
|
35
|
-
|
|
36
|
-
expect(join("a", "b")).toBe("/a/b");
|
|
37
|
-
});
|
|
38
|
-
});
|