xscrape 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -3
- package/dist/index.d.cts +7 -6
- package/dist/index.d.ts +7 -6
- package/package.json +9 -7
package/README.md
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
<p align="center">
|
|
2
|
+
|
|
3
|
+
<h1 align="center">🕷️<br/><code>xscrape</code></h1>
|
|
4
|
+
<p align="center">Extract and transform HTML with your own schema, powered by <code>Standard Schema</code> compatibility.
|
|
5
|
+
<br/>
|
|
6
|
+
by <a href="https://github.com/johnie">@johnie</a>
|
|
7
|
+
</p>
|
|
8
|
+
</p>
|
|
9
|
+
<br/>
|
|
10
|
+
|
|
11
|
+
<p align="center">
|
|
12
|
+
<a href="https://opensource.org/licenses/MIT" rel="nofollow"><img src="https://img.shields.io/github/license/johnie/xscrape" alt="License"></a>
|
|
13
|
+
<a href="https://www.npmjs.com/package/xscrape" rel="nofollow"><img src="https://img.shields.io/npm/v/xscrape.svg" alt="npm"></a>
|
|
14
|
+
<a href="https://github.com/johnie/xscrape" rel="nofollow"><img src="https://img.shields.io/github/stars/johnie/xscrape" alt="stars"></a>
|
|
15
|
+
</p>
|
|
16
|
+
|
|
17
|
+
<br/>
|
|
18
|
+
<br/>
|
|
4
19
|
|
|
5
20
|
## Features
|
|
6
21
|
|
package/dist/index.d.cts
CHANGED
|
@@ -7,13 +7,14 @@ interface ExtractDescriptor {
|
|
|
7
7
|
value?: string | ExtractDescriptorFn | ExtractMap;
|
|
8
8
|
}
|
|
9
9
|
type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];
|
|
10
|
-
|
|
11
|
-
[key: string]: ExtractValue;
|
|
12
|
-
}
|
|
10
|
+
type ExtractMap = Record<string, ExtractValue>;
|
|
13
11
|
|
|
14
|
-
type
|
|
12
|
+
type SchemaAwareExtractMap<T> = {
|
|
13
|
+
[K in keyof T]: ExtractMap[string];
|
|
14
|
+
};
|
|
15
|
+
type ScraperConfig<S extends StandardSchemaV1, R extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>> = {
|
|
15
16
|
schema: S;
|
|
16
|
-
extract:
|
|
17
|
+
extract: SchemaAwareExtractMap<StandardSchemaV1.InferOutput<S>>;
|
|
17
18
|
transform?: (data: StandardSchemaV1.InferOutput<S>) => Promise<R> | R;
|
|
18
19
|
};
|
|
19
20
|
type ValidationResult<T> = {
|
|
@@ -26,6 +27,6 @@ type ScraperResult<T> = {
|
|
|
26
27
|
error?: unknown;
|
|
27
28
|
};
|
|
28
29
|
|
|
29
|
-
declare function defineScraper<S extends StandardSchemaV1,
|
|
30
|
+
declare function defineScraper<S extends StandardSchemaV1, T extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>, R extends T = T>(config: ScraperConfig<S, R>): (html: string) => Promise<ScraperResult<R>>;
|
|
30
31
|
|
|
31
32
|
export { type ScraperConfig, type ScraperResult, type ValidationResult, defineScraper };
|
package/dist/index.d.ts
CHANGED
|
@@ -7,13 +7,14 @@ interface ExtractDescriptor {
|
|
|
7
7
|
value?: string | ExtractDescriptorFn | ExtractMap;
|
|
8
8
|
}
|
|
9
9
|
type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];
|
|
10
|
-
|
|
11
|
-
[key: string]: ExtractValue;
|
|
12
|
-
}
|
|
10
|
+
type ExtractMap = Record<string, ExtractValue>;
|
|
13
11
|
|
|
14
|
-
type
|
|
12
|
+
type SchemaAwareExtractMap<T> = {
|
|
13
|
+
[K in keyof T]: ExtractMap[string];
|
|
14
|
+
};
|
|
15
|
+
type ScraperConfig<S extends StandardSchemaV1, R extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>> = {
|
|
15
16
|
schema: S;
|
|
16
|
-
extract:
|
|
17
|
+
extract: SchemaAwareExtractMap<StandardSchemaV1.InferOutput<S>>;
|
|
17
18
|
transform?: (data: StandardSchemaV1.InferOutput<S>) => Promise<R> | R;
|
|
18
19
|
};
|
|
19
20
|
type ValidationResult<T> = {
|
|
@@ -26,6 +27,6 @@ type ScraperResult<T> = {
|
|
|
26
27
|
error?: unknown;
|
|
27
28
|
};
|
|
28
29
|
|
|
29
|
-
declare function defineScraper<S extends StandardSchemaV1,
|
|
30
|
+
declare function defineScraper<S extends StandardSchemaV1, T extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>, R extends T = T>(config: ScraperConfig<S, R>): (html: string) => Promise<ScraperResult<R>>;
|
|
30
31
|
|
|
31
32
|
export { type ScraperConfig, type ScraperResult, type ValidationResult, defineScraper };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xscrape",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.2",
|
|
4
4
|
"description": "A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"exports": {
|
|
@@ -24,9 +24,10 @@
|
|
|
24
24
|
"crawler",
|
|
25
25
|
"scraper",
|
|
26
26
|
"zod",
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"effect-schema"
|
|
27
|
+
"valibot",
|
|
28
|
+
"arktype",
|
|
29
|
+
"effect-schema",
|
|
30
|
+
"standard-schema"
|
|
30
31
|
],
|
|
31
32
|
"author": "Johnie Hjelm <johnie@hjelm.im>",
|
|
32
33
|
"license": "MIT",
|
|
@@ -40,12 +41,13 @@
|
|
|
40
41
|
"homepage": "https://github.com/johnie/xscrape#readme",
|
|
41
42
|
"devDependencies": {
|
|
42
43
|
"@arethetypeswrong/cli": "^0.18.2",
|
|
44
|
+
"@biomejs/biome": "2.1.1",
|
|
43
45
|
"@changesets/changelog-github": "^0.5.1",
|
|
44
46
|
"@changesets/cli": "^2.29.5",
|
|
45
47
|
"arktype": "^2.1.20",
|
|
46
48
|
"effect": "^3.16.12",
|
|
47
49
|
"jsdom": "^26.1.0",
|
|
48
|
-
"
|
|
50
|
+
"lefthook": "^1.12.2",
|
|
49
51
|
"tsup": "^8.5.0",
|
|
50
52
|
"typescript": "^5.8.3",
|
|
51
53
|
"valibot": "^1.1.0",
|
|
@@ -64,8 +66,8 @@
|
|
|
64
66
|
"lint": "tsc",
|
|
65
67
|
"test": "vitest run",
|
|
66
68
|
"test:watch": "vitest",
|
|
67
|
-
"format": "
|
|
68
|
-
"check-format": "
|
|
69
|
+
"format": "biome format --write ./src",
|
|
70
|
+
"check-format": "biome check ./src",
|
|
69
71
|
"check-exports": "attw --pack .",
|
|
70
72
|
"local-release": "npm run ci && changeset version && changeset publish",
|
|
71
73
|
"release": "npm run ci && changeset publish"
|