@lobehub/chat 1.96.6 → 1.96.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  # Changelog
4
4
 
5
+ ### [Version 1.96.7](https://github.com/lobehub/lobe-chat/compare/v1.96.6...v1.96.7)
6
+
7
+ <sup>Released on **2025-06-23**</sup>
8
+
9
+ #### 💄 Styles
10
+
11
+ - **misc**: Add `blockAds` & `stealth` params for Browserless.
12
+
13
+ <br/>
14
+
15
+ <details>
16
+ <summary><kbd>Improvements and Fixes</kbd></summary>
17
+
18
+ #### Styles
19
+
20
+ - **misc**: Add `blockAds` & `stealth` params for Browserless, closes [#8255](https://github.com/lobehub/lobe-chat/issues/8255) ([2ff3efa](https://github.com/lobehub/lobe-chat/commit/2ff3efa))
21
+
22
+ </details>
23
+
24
+ <div align="right">
25
+
26
+ [![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
27
+
28
+ </div>
29
+
5
30
  ### [Version 1.96.6](https://github.com/lobehub/lobe-chat/compare/v1.96.5...v1.96.6)
6
31
 
7
32
  <sup>Released on **2025-06-23**</sup>
package/changelog/v1.json CHANGED
@@ -1,4 +1,13 @@
1
1
  [
2
+ {
3
+ "children": {
4
+ "improvements": [
5
+ "Add blockAds & stealth params for Browserless."
6
+ ]
7
+ },
8
+ "date": "2025-06-23",
9
+ "version": "1.96.7"
10
+ },
2
11
  {
3
12
  "children": {
4
13
  "improvements": [
@@ -88,6 +88,40 @@ BROWSERLESS_URL=https://chrome.browserless.io
88
88
 
89
89
  ---
90
90
 
91
+ ## `BROWSERLESS_BLOCK_ADS`
92
+
93
+ Enables ad blocking functionality. When using [Browserless](https://www.browserless.io/) for web scraping, it automatically blocks common ad resources (such as scripts, images, trackers, etc.), improving scraping speed and page clarity.
94
+
95
+ ```env
96
+ BROWSERLESS_BLOCK_ADS=1
97
+ ```
98
+
99
+ > 📌 Supported values:
100
+ >
101
+ > * `1`: Enable ad blocking (recommended);
102
+ > * `0`: Disable ad blocking (default).
103
+
104
+ > ✅ It is recommended to use with `BROWSERLESS_STEALTH_MODE=1` to enhance stealth and scraping success rate.
105
+
106
+ ---
107
+
108
+ ## `BROWSERLESS_STEALTH_MODE`
109
+
110
+ Enables stealth mode. When using [Browserless](https://www.browserless.io/) for web scraping, it applies various anti-detection techniques (such as modifying the user agent, removing webdriver traits, simulating user interactions) to bypass anti-bot mechanisms.
111
+
112
+ ```env
113
+ BROWSERLESS_STEALTH_MODE=1
114
+ ```
115
+
116
+ > 📌 Supported values:
117
+ >
118
+ > * `1`: Enable stealth mode (recommended);
119
+ > * `0`: Disable stealth mode (default).
120
+
121
+ > ⚠️ Some websites use advanced anti-scraping techniques. Enabling stealth mode can significantly improve scraping success rate.
122
+
123
+ ---
124
+
91
125
  ## `GOOGLE_PSE_ENGINE_ID`
92
126
 
93
127
  Configure the Search Engine ID for Google Programmable Search Engine (Google PSE), used to restrict the search scope. Must be used alongside `GOOGLE_PSE_API_KEY`.
@@ -84,6 +84,40 @@ BROWSERLESS_URL=https://chrome.browserless.io
84
84
 
85
85
  ---
86
86
 
87
+ ## `BROWSERLESS_BLOCK_ADS`
88
+
89
+ 启用广告拦截功能,在使用 [Browserless](https://www.browserless.io/) 进行网页抓取时自动屏蔽常见广告资源(如脚本、图片、追踪器等),提高抓取速度与页面清晰度。
90
+
91
+ ```env
92
+ BROWSERLESS_BLOCK_ADS=1
93
+ ```
94
+
95
+ > 📌 支持的值:
96
+ >
97
+ > * `1`:启用广告拦截(推荐);
98
+ > * `0`:禁用广告拦截(默认)。
99
+
100
+ > ✅ 建议与 `BROWSERLESS_STEALTH_MODE=1` 一起使用,提高爬虫的隐蔽性和成功率。
101
+
102
+ ---
103
+
104
+ ## `BROWSERLESS_STEALTH_MODE`
105
+
106
+ 启用隐身模式,在使用 [Browserless](https://www.browserless.io/) 抓取网页时,通过一系列防检测手段(如修改 UA、移除 webdriver 特征、模拟用户操作)来规避反爬虫机制。
107
+
108
+ ```env
109
+ BROWSERLESS_STEALTH_MODE=1
110
+ ```
111
+
112
+ > 📌 支持的值:
113
+ >
114
+ > * `1`:启用隐身模式(推荐);
115
+ > * `0`:禁用隐身模式(默认)。
116
+
117
+ > ⚠️ 某些网站存在高级反爬机制,启用隐身模式可以显著提升抓取成功率。
118
+
119
+ ---
120
+
87
121
  ## `GOOGLE_PSE_ENGINE_ID`
88
122
 
89
123
  配置 Google Programmable Search Engine(Google PSE)的搜索引擎 ID,用于限定搜索范围。需配合 `GOOGLE_PSE_API_KEY` 一起使用。
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lobehub/chat",
3
- "version": "1.96.6",
3
+ "version": "1.96.7",
4
4
  "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.",
5
5
  "keywords": [
6
6
  "framework",
@@ -10,6 +10,9 @@ const REJECT_REQUEST_PATTERN =
10
10
  '.*\\.(?!(html|css|js|json|xml|webmanifest|txt|md)(\\?|#|$))[\\w-]+(?:[\\?#].*)?$';
11
11
  const BROWSERLESS_TOKEN = process.env.BROWSERLESS_TOKEN;
12
12
 
13
+ const BROWSERLESS_BLOCK_ADS = process.env.BROWSERLESS_BLOCK_ADS === '1';
14
+ const BROWSERLESS_STEALTH_MODE = process.env.BROWSERLESS_STEALTH_MODE === '1';
15
+
13
16
  class BrowserlessInitError extends Error {
14
17
  constructor() {
15
18
  super('`BROWSERLESS_URL` or `BROWSERLESS_TOKEN` are required');
@@ -30,7 +33,14 @@ export const browserless: CrawlImpl = async (url, { filterOptions }) => {
30
33
 
31
34
  try {
32
35
  const res = await fetch(
33
- qs.stringifyUrl({ query: { token: BROWSERLESS_TOKEN }, url: urlJoin(BASE_URL, '/content') }),
36
+ qs.stringifyUrl({
37
+ query: {
38
+ blockAds: BROWSERLESS_BLOCK_ADS,
39
+ launch: JSON.stringify({ stealth: BROWSERLESS_STEALTH_MODE }),
40
+ token: BROWSERLESS_TOKEN,
41
+ },
42
+ url: urlJoin(BASE_URL, '/content'),
43
+ }),
34
44
  {
35
45
  body: JSON.stringify(input),
36
46
  headers: {