@synstack/web 1.1.3 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +106 -1
- package/package.json +3 -3
package/README.md
CHANGED
@@ -1 +1,106 @@
|
|
1
|
-
# @synstack/
|
1
|
+
# @synstack/web
|
2
|
+
|
3
|
+
Web utilities for fetching and parsing web content
|
4
|
+
|
5
|
+
> [!WARNING]
|
6
|
+
> This package is included in the [@synstack/synscript](../synscript/README.md) package. It is not recommended to install both packages at the same time.
|
7
|
+
|
8
|
+
## What is it for?
|
9
|
+
|
10
|
+
This package provides utilities for fetching and parsing web content, including JSON data, plain text, and article extraction:
|
11
|
+
|
12
|
+
```typescript
|
13
|
+
import { fetchJson, fetchText, fetchArticle } from "@synstack/web";
|
14
|
+
|
15
|
+
// Fetch and validate JSON data
|
16
|
+
const data = await fetchJson("https://api.example.com/data", {
|
17
|
+
schema: myZodSchema,
|
18
|
+
});
|
19
|
+
|
20
|
+
// Fetch plain text content
|
21
|
+
const text = await fetchText("https://example.com/content.txt");
|
22
|
+
|
23
|
+
// Extract article content
|
24
|
+
const article = await fetchArticle("https://blog.example.com/post");
|
25
|
+
console.log(article.title); // Article title
|
26
|
+
console.log(article.content); // Article HTML content
|
27
|
+
```
|
28
|
+
|
29
|
+
## Installation
|
30
|
+
|
31
|
+
```bash
|
32
|
+
npm install @synstack/web
|
33
|
+
# or
|
34
|
+
yarn add @synstack/web
|
35
|
+
# or
|
36
|
+
pnpm add @synstack/web
|
37
|
+
```
|
38
|
+
|
39
|
+
## Features
|
40
|
+
|
41
|
+
### JSON Fetching
|
42
|
+
|
43
|
+
Fetch and validate JSON data with optional schema validation:
|
44
|
+
|
45
|
+
```typescript
|
46
|
+
import { fetchJson } from "@synstack/web";
|
47
|
+
import { z } from "zod";
|
48
|
+
|
49
|
+
// Define a schema for type safety
|
50
|
+
const userSchema = z.object({
|
51
|
+
id: z.number(),
|
52
|
+
name: z.string(),
|
53
|
+
});
|
54
|
+
|
55
|
+
// Fetch and validate JSON data
|
56
|
+
const user = await fetchJson("https://api.example.com/user", {
|
57
|
+
schema: userSchema,
|
58
|
+
});
|
59
|
+
```
|
60
|
+
|
61
|
+
### Text Fetching
|
62
|
+
|
63
|
+
Retrieve plain text content from URLs:
|
64
|
+
|
65
|
+
```typescript
|
66
|
+
import { fetchText } from "@synstack/web";
|
67
|
+
|
68
|
+
// Fetch text content
|
69
|
+
const content = await fetchText("https://example.com/content.txt");
|
70
|
+
```
|
71
|
+
|
72
|
+
### Article Extraction
|
73
|
+
|
74
|
+
Extract article content from web pages using Mozilla's Readability:
|
75
|
+
|
76
|
+
```typescript
|
77
|
+
import { fetchArticle } from "@synstack/web";
|
78
|
+
|
79
|
+
// Extract article content
|
80
|
+
const article = await fetchArticle("https://blog.example.com/post");
|
81
|
+
|
82
|
+
console.log({
|
83
|
+
title: article.title, // Article title
|
84
|
+
content: article.content, // Article HTML content
|
85
|
+
byline: article.byline, // Author information
|
86
|
+
siteName: article.siteName, // Website name
|
87
|
+
lang: article.lang, // Article language
|
88
|
+
publishedTime: article.publishedTime, // Publication time
|
89
|
+
});
|
90
|
+
```
|
91
|
+
|
92
|
+
### Error Handling
|
93
|
+
|
94
|
+
Handle article extraction errors:
|
95
|
+
|
96
|
+
```typescript
|
97
|
+
import { fetchArticle, ArticleNotFoundException } from "@synstack/web";
|
98
|
+
|
99
|
+
try {
|
100
|
+
const article = await fetchArticle("https://example.com/not-an-article");
|
101
|
+
} catch (error) {
|
102
|
+
if (error instanceof ArticleNotFoundException) {
|
103
|
+
console.error("Could not extract article:", error.message);
|
104
|
+
}
|
105
|
+
}
|
106
|
+
```
|
package/package.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
"publishConfig": {
|
5
5
|
"access": "public"
|
6
6
|
},
|
7
|
-
"version": "1.1.
|
7
|
+
"version": "1.1.5",
|
8
8
|
"description": "Web scraping utilities",
|
9
9
|
"keywords": [
|
10
10
|
"web",
|
@@ -20,7 +20,7 @@
|
|
20
20
|
"homepage": "https://github.com/pAIrprogio/synscript/tree/main/packages/web",
|
21
21
|
"repository": {
|
22
22
|
"type": "git",
|
23
|
-
"url": "https://github.com/pAIrprogio/
|
23
|
+
"url": "https://github.com/pAIrprogio/synscript.git",
|
24
24
|
"directory": "packages/web"
|
25
25
|
},
|
26
26
|
"license": "Apache-2.0",
|
@@ -63,5 +63,5 @@
|
|
63
63
|
"!src/**/*.test.ts",
|
64
64
|
"dist/**/*"
|
65
65
|
],
|
66
|
-
"gitHead": "
|
66
|
+
"gitHead": "886036553ab02c6c1b98289b1de64240de866521"
|
67
67
|
}
|