@lobehub/chat 1.96.6 → 1.96.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
@@ -2,6 +2,31 @@
|
|
2
2
|
|
3
3
|
# Changelog
|
4
4
|
|
5
|
+
### [Version 1.96.7](https://github.com/lobehub/lobe-chat/compare/v1.96.6...v1.96.7)
|
6
|
+
|
7
|
+
<sup>Released on **2025-06-23**</sup>
|
8
|
+
|
9
|
+
#### 💄 Styles
|
10
|
+
|
11
|
+
- **misc**: Add `blockAds` & `stealth` params for Browserless.
|
12
|
+
|
13
|
+
<br/>
|
14
|
+
|
15
|
+
<details>
|
16
|
+
<summary><kbd>Improvements and Fixes</kbd></summary>
|
17
|
+
|
18
|
+
#### Styles
|
19
|
+
|
20
|
+
- **misc**: Add `blockAds` & `stealth` params for Browserless, closes [#8255](https://github.com/lobehub/lobe-chat/issues/8255) ([2ff3efa](https://github.com/lobehub/lobe-chat/commit/2ff3efa))
|
21
|
+
|
22
|
+
</details>
|
23
|
+
|
24
|
+
<div align="right">
|
25
|
+
|
26
|
+
[](#readme-top)
|
27
|
+
|
28
|
+
</div>
|
29
|
+
|
5
30
|
### [Version 1.96.6](https://github.com/lobehub/lobe-chat/compare/v1.96.5...v1.96.6)
|
6
31
|
|
7
32
|
<sup>Released on **2025-06-23**</sup>
|
package/changelog/v1.json
CHANGED
@@ -88,6 +88,40 @@ BROWSERLESS_URL=https://chrome.browserless.io
|
|
88
88
|
|
89
89
|
---
|
90
90
|
|
91
|
+
## `BROWSERLESS_BLOCK_ADS`
|
92
|
+
|
93
|
+
Enables ad blocking functionality. When using [Browserless](https://www.browserless.io/) for web scraping, it automatically blocks common ad resources (such as scripts, images, trackers, etc.), improving scraping speed and page clarity.
|
94
|
+
|
95
|
+
```env
|
96
|
+
BROWSERLESS_BLOCK_ADS=1
|
97
|
+
```
|
98
|
+
|
99
|
+
> 📌 Supported values:
|
100
|
+
>
|
101
|
+
> * `1`: Enable ad blocking (recommended);
|
102
|
+
> * `0`: Disable ad blocking (default).
|
103
|
+
|
104
|
+
> ✅ It is recommended to use with `BROWSERLESS_STEALTH_MODE=1` to enhance stealth and scraping success rate.
|
105
|
+
|
106
|
+
---
|
107
|
+
|
108
|
+
## `BROWSERLESS_STEALTH_MODE`
|
109
|
+
|
110
|
+
Enables stealth mode. When using [Browserless](https://www.browserless.io/) for web scraping, it applies various anti-detection techniques (such as modifying the user agent, removing webdriver traits, simulating user interactions) to bypass anti-bot mechanisms.
|
111
|
+
|
112
|
+
```env
|
113
|
+
BROWSERLESS_STEALTH_MODE=1
|
114
|
+
```
|
115
|
+
|
116
|
+
> 📌 Supported values:
|
117
|
+
>
|
118
|
+
> * `1`: Enable stealth mode (recommended);
|
119
|
+
> * `0`: Disable stealth mode (default).
|
120
|
+
|
121
|
+
> ⚠️ Some websites use advanced anti-scraping techniques. Enabling stealth mode can significantly improve scraping success rate.
|
122
|
+
|
123
|
+
---
|
124
|
+
|
91
125
|
## `GOOGLE_PSE_ENGINE_ID`
|
92
126
|
|
93
127
|
Configure the Search Engine ID for Google Programmable Search Engine (Google PSE), used to restrict the search scope. Must be used alongside `GOOGLE_PSE_API_KEY`.
|
@@ -84,6 +84,40 @@ BROWSERLESS_URL=https://chrome.browserless.io
|
|
84
84
|
|
85
85
|
---
|
86
86
|
|
87
|
+
## `BROWSERLESS_BLOCK_ADS`
|
88
|
+
|
89
|
+
启用广告拦截功能,在使用 [Browserless](https://www.browserless.io/) 进行网页抓取时自动屏蔽常见广告资源(如脚本、图片、追踪器等),提高抓取速度与页面清晰度。
|
90
|
+
|
91
|
+
```env
|
92
|
+
BROWSERLESS_BLOCK_ADS=1
|
93
|
+
```
|
94
|
+
|
95
|
+
> 📌 支持的值:
|
96
|
+
>
|
97
|
+
> * `1`:启用广告拦截(推荐);
|
98
|
+
> * `0`:禁用广告拦截(默认)。
|
99
|
+
|
100
|
+
> ✅ 建议与 `BROWSERLESS_STEALTH_MODE=1` 一起使用,提高爬虫的隐蔽性和成功率。
|
101
|
+
|
102
|
+
---
|
103
|
+
|
104
|
+
## `BROWSERLESS_STEALTH_MODE`
|
105
|
+
|
106
|
+
启用隐身模式,在使用 [Browserless](https://www.browserless.io/) 抓取网页时,通过一系列防检测手段(如修改 UA、移除 webdriver 特征、模拟用户操作)来规避反爬虫机制。
|
107
|
+
|
108
|
+
```env
|
109
|
+
BROWSERLESS_STEALTH_MODE=1
|
110
|
+
```
|
111
|
+
|
112
|
+
> 📌 支持的值:
|
113
|
+
>
|
114
|
+
> * `1`:启用隐身模式(推荐);
|
115
|
+
> * `0`:禁用隐身模式(默认)。
|
116
|
+
|
117
|
+
> ⚠️ 某些网站存在高级反爬机制,启用隐身模式可以显著提升抓取成功率。
|
118
|
+
|
119
|
+
---
|
120
|
+
|
87
121
|
## `GOOGLE_PSE_ENGINE_ID`
|
88
122
|
|
89
123
|
配置 Google Programmable Search Engine(Google PSE)的搜索引擎 ID,用于限定搜索范围。需配合 `GOOGLE_PSE_API_KEY` 一起使用。
|
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@lobehub/chat",
|
3
|
-
"version": "1.96.
|
3
|
+
"version": "1.96.7",
|
4
4
|
"description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
|
5
5
|
"keywords": [
|
6
6
|
"framework",
|
@@ -10,6 +10,9 @@ const REJECT_REQUEST_PATTERN =
|
|
10
10
|
'.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[\\?#].*)?$';
|
11
11
|
const BROWSERLESS_TOKEN = process.env.BROWSERLESS_TOKEN;
|
12
12
|
|
13
|
+
const BROWSERLESS_BLOCK_ADS = process.env.BROWSERLESS_BLOCK_ADS === '1';
|
14
|
+
const BROWSERLESS_STEALTH_MODE = process.env.BROWSERLESS_STEALTH_MODE === '1';
|
15
|
+
|
13
16
|
class BrowserlessInitError extends Error {
|
14
17
|
constructor() {
|
15
18
|
super('`BROWSERLESS_URL` or `BROWSERLESS_TOKEN` are required');
|
@@ -30,7 +33,14 @@ export const browserless: CrawlImpl = async (url, { filterOptions }) => {
|
|
30
33
|
|
31
34
|
try {
|
32
35
|
const res = await fetch(
|
33
|
-
qs.stringifyUrl({
|
36
|
+
qs.stringifyUrl({
|
37
|
+
query: {
|
38
|
+
blockAds: BROWSERLESS_BLOCK_ADS,
|
39
|
+
launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
|
40
|
+
token: BROWSERLESS_TOKEN,
|
41
|
+
},
|
42
|
+
url: urlJoin(BASE_URL, '/content'),
|
43
|
+
}),
|
34
44
|
{
|
35
45
|
body: JSON.stringify(input),
|
36
46
|
headers: {
|