arcade-brightdata 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,181 @@
1
+ .DS_Store
2
+ credentials.yaml
3
+ docker/credentials.yaml
4
+
5
+ *.lock
6
+
7
+ # example data
8
+ examples/data
9
+ scratch
10
+
11
+
12
+ docs/source
13
+
14
+ # From https://raw.githubusercontent.com/github/gitignore/main/Python.gitignore
15
+
16
+ # Byte-compiled / optimized / DLL files
17
+ __pycache__/
18
+ *.py[cod]
19
+ *$py.class
20
+
21
+ # C extensions
22
+ *.so
23
+
24
+ # Distribution / packaging
25
+ .Python
26
+ build/
27
+ develop-eggs/
28
+ dist/
29
+ downloads/
30
+ eggs/
31
+ .eggs/
32
+ lib/
33
+ lib64/
34
+ parts/
35
+ sdist/
36
+ var/
37
+ wheels/
38
+ share/python-wheels/
39
+ *.egg-info/
40
+ .installed.cfg
41
+ *.egg
42
+ MANIFEST
43
+
44
+ # PyInstaller
45
+ # Usually these files are written by a python script from a template
46
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
47
+ *.manifest
48
+ *.spec
49
+
50
+ # Installer logs
51
+ pip-log.txt
52
+ pip-delete-this-directory.txt
53
+
54
+ # Unit test / coverage reports
55
+ htmlcov/
56
+ .tox/
57
+ .nox/
58
+ .coverage
59
+ .coverage.*
60
+ .cache
61
+ nosetests.xml
62
+ coverage.xml
63
+ *.cover
64
+ *.py,cover
65
+ .hypothesis/
66
+ .pytest_cache/
67
+ cover/
68
+
69
+ # Translations
70
+ *.mo
71
+ *.pot
72
+
73
+ # Django stuff:
74
+ *.log
75
+ local_settings.py
76
+ db.sqlite3
77
+ db.sqlite3-journal
78
+
79
+ # Flask stuff:
80
+ instance/
81
+ .webassets-cache
82
+
83
+ # Scrapy stuff:
84
+ .scrapy
85
+
86
+ # Sphinx documentation
87
+ docs/_build/
88
+
89
+ # PyBuilder
90
+ .pybuilder/
91
+ target/
92
+
93
+ # Jupyter Notebook
94
+ .ipynb_checkpoints
95
+
96
+ # IPython
97
+ profile_default/
98
+ ipython_config.py
99
+
100
+ # IDE
101
+ *.code-workspace
102
+
103
+ # pyenv
104
+ # For a library or package, you might want to ignore these files since the code is
105
+ # intended to run in multiple environments; otherwise, check them in:
106
+ # .python-version
107
+
108
+ # pipenv
109
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
110
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
111
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
112
+ # install all needed dependencies.
113
+ #Pipfile.lock
114
+
115
+ # poetry
116
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
117
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
118
+ # commonly ignored for libraries.
119
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
120
+ poetry.lock
121
+
122
+ # pdm
123
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
124
+ #pdm.lock
125
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
126
+ # in version control.
127
+ # https://pdm.fming.dev/#use-with-ide
128
+ .pdm.toml
129
+
130
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131
+ __pypackages__/
132
+
133
+ # Celery stuff
134
+ celerybeat-schedule
135
+ celerybeat.pid
136
+
137
+ # SageMath parsed files
138
+ *.sage.py
139
+
140
+ # Environments
141
+ .env
142
+ .venv
143
+ env/
144
+ venv/
145
+ ENV/
146
+ env.bak/
147
+ venv.bak/
148
+
149
+ # Spyder project settings
150
+ .spyderproject
151
+ .spyproject
152
+
153
+ # Rope project settings
154
+ .ropeproject
155
+
156
+ # mkdocs documentation
157
+ /site
158
+
159
+ # mypy
160
+ .mypy_cache/
161
+ .dmypy.json
162
+ dmypy.json
163
+
164
+ # Pyre type checker
165
+ .pyre/
166
+
167
+ # pytype static type analyzer
168
+ .pytype/
169
+
170
+ # Cython debug symbols
171
+ cython_debug/
172
+
173
+ # PyCharm
174
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
175
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
176
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
177
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
178
+ #.idea/
179
+
180
+ # Docs
181
+ libs/arcade-mcp-server/site/*
@@ -0,0 +1,18 @@
1
+ files: ^arcade_brightdata/.*
2
+ repos:
3
+ - repo: https://github.com/pre-commit/pre-commit-hooks
4
+ rev: "v4.4.0"
5
+ hooks:
6
+ - id: check-case-conflict
7
+ - id: check-merge-conflict
8
+ - id: check-toml
9
+ - id: check-yaml
10
+ - id: end-of-file-fixer
11
+ - id: trailing-whitespace
12
+
13
+ - repo: https://github.com/astral-sh/ruff-pre-commit
14
+ rev: v0.6.7
15
+ hooks:
16
+ - id: ruff
17
+ args: [--fix]
18
+ - id: ruff-format
@@ -0,0 +1,44 @@
1
+ target-version = "py310"
2
+ line-length = 100
3
+ fix = true
4
+
5
+ [lint]
6
+ select = [
7
+ # flake8-2020
8
+ "YTT",
9
+ # flake8-bandit
10
+ "S",
11
+ # flake8-bugbear
12
+ "B",
13
+ # flake8-builtins
14
+ "A",
15
+ # flake8-comprehensions
16
+ "C4",
17
+ # flake8-debugger
18
+ "T10",
19
+ # flake8-simplify
20
+ "SIM",
21
+ # isort
22
+ "I",
23
+ # mccabe
24
+ "C90",
25
+ # pycodestyle
26
+ "E", "W",
27
+ # pyflakes
28
+ "F",
29
+ # pygrep-hooks
30
+ "PGH",
31
+ # pyupgrade
32
+ "UP",
33
+ # ruff
34
+ "RUF",
35
+ # tryceratops
36
+ "TRY",
37
+ ]
38
+
39
+ [lint.per-file-ignores]
40
+ "**/tests/*" = ["S101"]
41
+
42
+ [format]
43
+ preview = true
44
+ skip-magic-trailing-comma = false
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025, Arcade AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,55 @@
1
+ .PHONY: help
2
+
3
+ help:
4
+ @echo "🛠️ github Commands:\n"
5
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
6
+
7
+ .PHONY: install
8
+ install: ## Install the uv environment and install all packages with dependencies
9
+ @echo "🚀 Creating virtual environment and installing all packages using uv"
10
+ @uv sync --active --all-extras --no-sources
11
+ @if [ -f .pre-commit-config.yaml ]; then uv run --no-sources pre-commit install; fi
12
+ @echo "✅ All packages and dependencies installed via uv"
13
+
14
+ .PHONY: install-local
15
+ install-local: ## Install the uv environment and install all packages with dependencies with local Arcade sources
16
+ @echo "🚀 Creating virtual environment and installing all packages using uv"
17
+ @uv sync --active --all-extras
18
+ @if [ -f .pre-commit-config.yaml ]; then uv run pre-commit install; fi
19
+ @echo "✅ All packages and dependencies installed via uv"
20
+
21
+ .PHONY: build
22
+ build: clean-build ## Build wheel file using poetry
23
+ @echo "🚀 Creating wheel file"
24
+ uv build
25
+
26
+ .PHONY: clean-build
27
+ clean-build: ## clean build artifacts
28
+ @echo "🗑️ Cleaning dist directory"
29
+ rm -rf dist
30
+
31
+ .PHONY: test
32
+ test: ## Test the code with pytest
33
+ @echo "🚀 Testing code: Running pytest"
34
+ @uv run --no-sources pytest -W ignore -v --cov --cov-config=pyproject.toml --cov-report=xml
35
+
36
+ .PHONY: coverage
37
+ coverage: ## Generate coverage report
38
+ @echo "coverage report"
39
+ @uv run --no-sources coverage report
40
+ @echo "Generating coverage report"
41
+ @uv run --no-sources coverage html
42
+
43
+ .PHONY: bump-version
44
+ bump-version: ## Bump the version in the pyproject.toml file by a patch version
45
+ @echo "🚀 Bumping version in pyproject.toml"
46
+ uv version --no-sources --bump patch
47
+
48
+ .PHONY: check
49
+ check: ## Run code quality tools.
50
+ @if [ -f .pre-commit-config.yaml ]; then\
51
+ echo "🚀 Linting code: Running pre-commit";\
52
+ uv run --no-sources pre-commit run -a;\
53
+ fi
54
+ @echo "🚀 Static type checking: Running mypy"
55
+ @uv run --no-sources mypy --config-file=pyproject.toml
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.4
2
+ Name: arcade_brightdata
3
+ Version: 0.2.0
4
+ Summary: Search, Crawl and Scrape any site, at scale, without getting blocked
5
+ Author-email: meirk-brd <meirk@brightdata.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.10
8
+ Requires-Dist: arcade-tdk<4.0.0,>=3.0.0
9
+ Requires-Dist: requests>=2.32.5
10
+ Provides-Extra: dev
11
+ Requires-Dist: arcade-mcp[all]<2.0.0,>=1.2.0; extra == 'dev'
12
+ Requires-Dist: arcade-serve<4.0.0,>=3.0.0; extra == 'dev'
13
+ Requires-Dist: mypy<1.6.0,>=1.5.1; extra == 'dev'
14
+ Requires-Dist: pre-commit<3.5.0,>=3.4.0; extra == 'dev'
15
+ Requires-Dist: pytest-asyncio<0.25.0,>=0.24.0; extra == 'dev'
16
+ Requires-Dist: pytest-cov<4.1.0,>=4.0.0; extra == 'dev'
17
+ Requires-Dist: pytest-mock<3.12.0,>=3.11.1; extra == 'dev'
18
+ Requires-Dist: pytest<8.4.0,>=8.3.0; extra == 'dev'
19
+ Requires-Dist: ruff<0.8.0,>=0.7.4; extra == 'dev'
20
+ Requires-Dist: tox<4.12.0,>=4.11.1; extra == 'dev'
21
+ Requires-Dist: types-requests>=2.32.0; extra == 'dev'
@@ -0,0 +1,3 @@
1
+ from arcade_brightdata.tools import scrape_as_markdown, search_engine, web_data_feed
2
+
3
+ __all__ = ["scrape_as_markdown", "search_engine", "web_data_feed"]
@@ -0,0 +1,63 @@
1
+ import json
2
+ from typing import ClassVar
3
+ from urllib.parse import quote
4
+
5
+ import requests
6
+
7
+
8
+ class BrightDataClient:
9
+ """Engine for interacting with Bright Data API with connection management."""
10
+
11
+ _clients: ClassVar[dict[str, "BrightDataClient"]] = {}
12
+
13
+ def __init__(self, api_key: str, zone: str = "web_unlocker1") -> None:
14
+ """
15
+ Initialize with API token and default zone.
16
+ Args:
17
+ api_key (str): Your Bright Data API token
18
+ zone (str): Bright Data zone name
19
+ """
20
+ self.api_key = api_key
21
+ self.headers = {
22
+ "Content-Type": "application/json",
23
+ "Authorization": f"Bearer {self.api_key}",
24
+ }
25
+ self.zone = zone
26
+ self.endpoint = "https://api.brightdata.com/request"
27
+
28
+ @classmethod
29
+ def create_client(cls, api_key: str, zone: str = "web_unlocker1") -> "BrightDataClient":
30
+ """Create or get cached client instance using API key only."""
31
+ if api_key not in cls._clients:
32
+ cls._clients[api_key] = cls(api_key, zone)
33
+
34
+ # Update zone for this request (user controls zone per request)
35
+ client = cls._clients[api_key]
36
+ client.zone = zone
37
+ return client
38
+
39
+ @classmethod
40
+ def clear_cache(cls) -> None:
41
+ """Clear the client cache."""
42
+ cls._clients.clear()
43
+
44
+ def make_request(self, payload: dict) -> str:
45
+ """
46
+ Make a request to Bright Data API.
47
+ Args:
48
+ payload (Dict): Request payload
49
+ Returns:
50
+ str: Response text
51
+ """
52
+ response = requests.post(
53
+ self.endpoint, headers=self.headers, data=json.dumps(payload), timeout=30
54
+ )
55
+
56
+ response.raise_for_status()
57
+ result: str = response.text
58
+ return result
59
+
60
+ @staticmethod
61
+ def encode_query(query: str) -> str:
62
+ """URL encode a search query."""
63
+ return quote(query)
@@ -0,0 +1,7 @@
1
+ from arcade_brightdata.tools.bright_data_tools import (
2
+ scrape_as_markdown,
3
+ search_engine,
4
+ web_data_feed,
5
+ )
6
+
7
+ __all__ = ["scrape_as_markdown", "search_engine", "web_data_feed"]
@@ -0,0 +1,312 @@
1
+ import json
2
+ import time
3
+ from enum import Enum
4
+ from typing import Annotated, Any, cast
5
+
6
+ import requests
7
+ from arcade_core.errors import RetryableToolError
8
+ from arcade_tdk import ToolContext, tool
9
+
10
+ from arcade_brightdata.bright_data_client import BrightDataClient
11
+
12
+
13
+ class DeviceType(str, Enum):
14
+ MOBILE = "mobile"
15
+ IOS = "ios"
16
+ IPHONE = "iphone"
17
+ IPAD = "ipad"
18
+ ANDROID = "android"
19
+ ANDROID_TABLET = "android_tablet"
20
+
21
+
22
+ class SearchEngine(str, Enum):
23
+ GOOGLE = "google"
24
+ BING = "bing"
25
+ YANDEX = "yandex"
26
+
27
+
28
+ class SearchType(str, Enum):
29
+ IMAGES = "images"
30
+ SHOPPING = "shopping"
31
+ NEWS = "news"
32
+ JOBS = "jobs"
33
+
34
+
35
+ class SourceType(str, Enum):
36
+ AMAZON_PRODUCT = "amazon_product"
37
+ AMAZON_PRODUCT_REVIEWS = "amazon_product_reviews"
38
+ LINKEDIN_PERSON_PROFILE = "linkedin_person_profile"
39
+ LINKEDIN_COMPANY_PROFILE = "linkedin_company_profile"
40
+ ZOOMINFO_COMPANY_PROFILE = "zoominfo_company_profile"
41
+ INSTAGRAM_PROFILES = "instagram_profiles"
42
+ INSTAGRAM_POSTS = "instagram_posts"
43
+ INSTAGRAM_REELS = "instagram_reels"
44
+ INSTAGRAM_COMMENTS = "instagram_comments"
45
+ FACEBOOK_POSTS = "facebook_posts"
46
+ FACEBOOK_MARKETPLACE_LISTINGS = "facebook_marketplace_listings"
47
+ FACEBOOK_COMPANY_REVIEWS = "facebook_company_reviews"
48
+ X_POSTS = "x_posts"
49
+ ZILLOW_PROPERTIES_LISTING = "zillow_properties_listing"
50
+ BOOKING_HOTEL_LISTINGS = "booking_hotel_listings"
51
+ YOUTUBE_VIDEOS = "youtube_videos"
52
+
53
+
54
+ @tool(requires_secrets=["BRIGHTDATA_API_KEY", "BRIGHTDATA_ZONE"])
55
+ def scrape_as_markdown(
56
+ context: ToolContext,
57
+ url: Annotated[str, "URL to scrape"],
58
+ ) -> Annotated[str, "Scraped webpage content as Markdown"]:
59
+ """
60
+ Scrape a webpage and return content in Markdown format using Bright Data.
61
+
62
+ Examples:
63
+ scrape_as_markdown("https://example.com") -> "# Example Page\n\nContent..."
64
+ scrape_as_markdown("https://news.ycombinator.com") -> "# Hacker News\n..."
65
+ """
66
+ api_key = context.get_secret("BRIGHTDATA_API_KEY")
67
+ zone = context.get_secret("BRIGHTDATA_ZONE")
68
+ client = BrightDataClient.create_client(api_key=api_key, zone=zone)
69
+
70
+ payload = {"url": url, "zone": zone, "format": "raw", "data_format": "markdown"}
71
+ return client.make_request(payload)
72
+
73
+
74
+ @tool(requires_secrets=["BRIGHTDATA_API_KEY", "BRIGHTDATA_ZONE"])
75
+ def search_engine( # noqa: C901
76
+ context: ToolContext,
77
+ query: Annotated[str, "Search query"],
78
+ engine: Annotated[SearchEngine, "Search engine to use"] = SearchEngine.GOOGLE,
79
+ language: Annotated[str | None, "Two-letter language code"] = None,
80
+ country_code: Annotated[str | None, "Two-letter country code"] = None,
81
+ search_type: Annotated[SearchType | None, "Type of search"] = None,
82
+ start: Annotated[int | None, "Results pagination offset"] = None,
83
+ num_results: Annotated[int, "Number of results to return. The default is 10"] = 10,
84
+ location: Annotated[str | None, "Location for search results"] = None,
85
+ device: Annotated[DeviceType | None, "Device type"] = None,
86
+ return_json: Annotated[bool, "Return JSON instead of Markdown"] = False,
87
+ ) -> Annotated[str, "Search results as Markdown or JSON"]:
88
+ """
89
+ Search using Google, Bing, or Yandex with advanced parameters using Bright Data.
90
+
91
+ Examples:
92
+ search_engine("climate change") -> "# Search Results\n\n## Climate Change - Wikipedia\n..."
93
+ search_engine("Python tutorials", engine="bing", num_results=5) -> "# Bing Results\n..."
94
+ search_engine("cats", search_type="images", country_code="us") -> "# Image Results\n..."
95
+ """
96
+ api_key = context.get_secret("BRIGHTDATA_API_KEY")
97
+ zone = context.get_secret("BRIGHTDATA_ZONE")
98
+ client = BrightDataClient.create_client(api_key=api_key, zone=zone)
99
+
100
+ encoded_query = BrightDataClient.encode_query(query)
101
+
102
+ base_urls = {
103
+ SearchEngine.GOOGLE: f"https://www.google.com/search?q={encoded_query}",
104
+ SearchEngine.BING: f"https://www.bing.com/search?q={encoded_query}",
105
+ SearchEngine.YANDEX: f"https://yandex.com/search/?text={encoded_query}",
106
+ }
107
+
108
+ search_url = base_urls[engine]
109
+
110
+ if engine == SearchEngine.GOOGLE:
111
+ params = []
112
+
113
+ if language:
114
+ params.append(f"hl={language}")
115
+
116
+ if country_code:
117
+ params.append(f"gl={country_code}")
118
+
119
+ if search_type:
120
+ if search_type == SearchType.JOBS:
121
+ params.append("ibp=htl;jobs")
122
+ else:
123
+ search_types = {
124
+ SearchType.IMAGES: "isch",
125
+ SearchType.SHOPPING: "shop",
126
+ SearchType.NEWS: "nws",
127
+ }
128
+ tbm_value = search_types.get(search_type, search_type)
129
+ params.append(f"tbm={tbm_value}")
130
+
131
+ if start is not None:
132
+ params.append(f"start={start}")
133
+
134
+ if num_results:
135
+ params.append(f"num={num_results}")
136
+
137
+ if location:
138
+ params.append(f"uule={BrightDataClient.encode_query(location)}")
139
+
140
+ if device:
141
+ device_value = "1"
142
+
143
+ if device.value in ["ios", "iphone"]:
144
+ device_value = "ios"
145
+ elif device.value == "ipad":
146
+ device_value = "ios_tablet"
147
+ elif device.value == "android":
148
+ device_value = "android"
149
+ elif device.value == "android_tablet":
150
+ device_value = "android_tablet"
151
+
152
+ params.append(f"brd_mobile={device_value}")
153
+
154
+ if return_json:
155
+ params.append("brd_json=1")
156
+
157
+ if params:
158
+ search_url += "&" + "&".join(params)
159
+
160
+ payload = {
161
+ "url": search_url,
162
+ "zone": zone,
163
+ "format": "raw",
164
+ "data_format": "markdown" if not return_json else "raw",
165
+ }
166
+
167
+ return client.make_request(payload)
168
+
169
+
170
+ @tool(requires_secrets=["BRIGHTDATA_API_KEY"])
171
+ def web_data_feed(
172
+ context: ToolContext,
173
+ source_type: Annotated[SourceType, "Type of data source"],
174
+ url: Annotated[str, "URL of the web resource to extract data from"],
175
+ num_of_reviews: Annotated[
176
+ int | None,
177
+ (
178
+ "Number of reviews to retrieve. Only applicable for "
179
+ "facebook_company_reviews. Default is None"
180
+ ),
181
+ ] = None,
182
+ timeout: Annotated[int, "Maximum time in seconds to wait for data retrieval"] = 600,
183
+ polling_interval: Annotated[int, "Time in seconds between polling attempts"] = 1,
184
+ ) -> Annotated[str, "Structured data from the requested source as JSON"]:
185
+ """
186
+ Extract structured data from various websites like LinkedIn, Amazon, Instagram, etc.
187
+ NEVER MADE UP LINKS - IF LINKS ARE NEEDED, EXECUTE search_engine FIRST.
188
+ Supported source types:
189
+ - amazon_product, amazon_product_reviews
190
+ - linkedin_person_profile, linkedin_company_profile
191
+ - zoominfo_company_profile
192
+ - instagram_profiles, instagram_posts, instagram_reels, instagram_comments
193
+ - facebook_posts, facebook_marketplace_listings, facebook_company_reviews
194
+ - x_posts
195
+ - zillow_properties_listing
196
+ - booking_hotel_listings
197
+ - youtube_videos
198
+
199
+ Examples:
200
+ web_data_feed("amazon_product", "https://amazon.com/dp/B08N5WRWNW")
201
+ -> "{\"title\": \"Product Name\", ...}"
202
+ web_data_feed("linkedin_person_profile", "https://linkedin.com/in/johndoe")
203
+ -> "{\"name\": \"John Doe\", ...}"
204
+ web_data_feed(
205
+ "facebook_company_reviews", "https://facebook.com/company", num_of_reviews=50
206
+ ) -> "[{\"review\": \"...\", ...}]"
207
+ """
208
+ api_key = context.get_secret("BRIGHTDATA_API_KEY")
209
+ client = BrightDataClient.create_client(api_key=api_key)
210
+ if num_of_reviews is not None and source_type != SourceType.FACEBOOK_COMPANY_REVIEWS:
211
+ msg = (
212
+ f"num_of_reviews parameter is only applicable for facebook_company_reviews, "
213
+ f"not for {source_type.value}"
214
+ )
215
+ prompt = (
216
+ "The num_of_reviews parameter should only be used with "
217
+ "facebook_company_reviews source type."
218
+ )
219
+ raise RetryableToolError(msg, additional_prompt_content=prompt)
220
+ data = _extract_structured_data(
221
+ client=client,
222
+ source_type=source_type,
223
+ url=url,
224
+ num_of_reviews=num_of_reviews,
225
+ timeout=timeout,
226
+ polling_interval=polling_interval,
227
+ )
228
+ return json.dumps(data, indent=2)
229
+
230
+
231
+ def _extract_structured_data(
232
+ client: BrightDataClient,
233
+ source_type: SourceType,
234
+ url: str,
235
+ num_of_reviews: int | None = None,
236
+ timeout: int = 600,
237
+ polling_interval: int = 1,
238
+ ) -> dict[str, Any]:
239
+ """
240
+ Extract structured data from various sources.
241
+ """
242
+ datasets = {
243
+ SourceType.AMAZON_PRODUCT: "gd_l7q7dkf244hwjntr0",
244
+ SourceType.AMAZON_PRODUCT_REVIEWS: "gd_le8e811kzy4ggddlq",
245
+ SourceType.LINKEDIN_PERSON_PROFILE: "gd_l1viktl72bvl7bjuj0",
246
+ SourceType.LINKEDIN_COMPANY_PROFILE: "gd_l1vikfnt1wgvvqz95w",
247
+ SourceType.ZOOMINFO_COMPANY_PROFILE: "gd_m0ci4a4ivx3j5l6nx",
248
+ SourceType.INSTAGRAM_PROFILES: "gd_l1vikfch901nx3by4",
249
+ SourceType.INSTAGRAM_POSTS: "gd_lk5ns7kz21pck8jpis",
250
+ SourceType.INSTAGRAM_REELS: "gd_lyclm20il4r5helnj",
251
+ SourceType.INSTAGRAM_COMMENTS: "gd_ltppn085pokosxh13",
252
+ SourceType.FACEBOOK_POSTS: "gd_lyclm1571iy3mv57zw",
253
+ SourceType.FACEBOOK_MARKETPLACE_LISTINGS: "gd_lvt9iwuh6fbcwmx1a",
254
+ SourceType.FACEBOOK_COMPANY_REVIEWS: "gd_m0dtqpiu1mbcyc2g86",
255
+ SourceType.X_POSTS: "gd_lwxkxvnf1cynvib9co",
256
+ SourceType.ZILLOW_PROPERTIES_LISTING: "gd_lfqkr8wm13ixtbd8f5",
257
+ SourceType.BOOKING_HOTEL_LISTINGS: "gd_m5mbdl081229ln6t4a",
258
+ SourceType.YOUTUBE_VIDEOS: "gd_m5mbdl081229ln6t4a",
259
+ }
260
+
261
+ dataset_id = datasets[source_type]
262
+
263
+ request_data = {"url": url}
264
+ if source_type == SourceType.FACEBOOK_COMPANY_REVIEWS and num_of_reviews is not None:
265
+ request_data["num_of_reviews"] = str(num_of_reviews)
266
+
267
+ trigger_response = requests.post(
268
+ "https://api.brightdata.com/datasets/v3/trigger",
269
+ params={"dataset_id": dataset_id, "include_errors": "true"},
270
+ headers=client.headers,
271
+ json=[request_data],
272
+ timeout=30,
273
+ )
274
+
275
+ trigger_data = trigger_response.json()
276
+ if not trigger_data.get("snapshot_id"):
277
+ msg = "No snapshot ID returned from trigger request"
278
+ prompt = "Invalid input provided, use search_engine to get the relevant data first"
279
+ raise RetryableToolError(msg, additional_prompt_content=prompt)
280
+
281
+ snapshot_id = trigger_data["snapshot_id"]
282
+
283
+ attempts = 0
284
+ max_attempts = timeout
285
+
286
+ while attempts < max_attempts:
287
+ try:
288
+ snapshot_response = requests.get(
289
+ f"https://api.brightdata.com/datasets/v3/snapshot/{snapshot_id}",
290
+ params={"format": "json"},
291
+ headers=client.headers,
292
+ timeout=30,
293
+ )
294
+
295
+ snapshot_data = cast(dict[str, Any], snapshot_response.json())
296
+
297
+ if isinstance(snapshot_data, dict) and snapshot_data.get("status") in (
298
+ "running",
299
+ "building",
300
+ ):
301
+ attempts += 1
302
+ time.sleep(polling_interval)
303
+ continue
304
+ else:
305
+ return snapshot_data
306
+
307
+ except Exception:
308
+ attempts += 1
309
+ time.sleep(polling_interval)
310
+
311
+ msg = f"Timeout after {max_attempts} seconds waiting for {source_type.value} data"
312
+ raise TimeoutError(msg)
@@ -0,0 +1,60 @@
1
+ [build-system]
2
+ requires = [ "hatchling",]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "arcade_brightdata"
7
+ version = "0.2.0"
8
+ description = "Search, Crawl and Scrape any site, at scale, without getting blocked"
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "arcade-tdk>=3.0.0,<4.0.0",
12
+ "requests>=2.32.5",
13
+ ]
14
+ [[project.authors]]
15
+ name = "meirk-brd"
16
+ email = "meirk@brightdata.com"
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "arcade-mcp[all]>=1.2.0,<2.0.0",
21
+ "arcade-serve>=3.0.0,<4.0.0",
22
+ "pytest>=8.3.0,<8.4.0",
23
+ "pytest-cov>=4.0.0,<4.1.0",
24
+ "pytest-mock>=3.11.1,<3.12.0",
25
+ "pytest-asyncio>=0.24.0,<0.25.0",
26
+ "mypy>=1.5.1,<1.6.0",
27
+ "pre-commit>=3.4.0,<3.5.0",
28
+ "tox>=4.11.1,<4.12.0",
29
+ "ruff>=0.7.4,<0.8.0",
30
+ "types-requests>=2.32.0",
31
+ ]
32
+ # Tell Arcade.dev that this package is a toolkit
33
+ [project.entry-points.arcade_toolkits]
34
+ toolkit_name = "arcade_brightdata"
35
+
36
+ [tool.mypy]
37
+ files = [ "arcade_brightdata/**/*.py",]
38
+ python_version = "3.10"
39
+ disallow_untyped_defs = "True"
40
+ disallow_any_unimported = "True"
41
+ no_implicit_optional = "True"
42
+ check_untyped_defs = "True"
43
+ warn_return_any = "True"
44
+ warn_unused_ignores = "True"
45
+ show_error_codes = "True"
46
+ ignore_missing_imports = "True"
47
+
48
+ [tool.uv.sources]
49
+ arcade-mcp = { path = "../../", editable = true }
50
+ arcade-serve = { path = "../../libs/arcade-serve/", editable = true }
51
+ arcade-tdk = { path = "../../libs/arcade-tdk/", editable = true }
52
+
53
+ [tool.pytest.ini_options]
54
+ testpaths = [ "tests",]
55
+
56
+ [tool.coverage.report]
57
+ skip_empty = true
58
+
59
+ [tool.hatch.build.targets.wheel]
60
+ packages = [ "arcade_brightdata",]
File without changes
@@ -0,0 +1,414 @@
1
+ from os import environ
2
+ from unittest.mock import Mock, patch
3
+
4
+ import pytest
5
+ import requests
6
+ from arcade_tdk import ToolContext, ToolSecretItem
7
+ from arcade_tdk.errors import ToolExecutionError
8
+
9
+ from arcade_brightdata.bright_data_client import BrightDataClient
10
+ from arcade_brightdata.tools.bright_data_tools import (
11
+ DeviceType,
12
+ SourceType,
13
+ scrape_as_markdown,
14
+ search_engine,
15
+ web_data_feed,
16
+ )
17
+
18
+ BRIGHTDATA_API_KEY = environ.get("TEST_BRIGHTDATA_API_KEY") or "api-key"
19
+ BRIGHTDATA_ZONE = environ.get("TEST_BRIGHTDATA_ZONE") or "unblocker"
20
+
21
+
22
+ @pytest.fixture
23
+ def mock_context():
24
+ context = ToolContext()
25
+ context.secrets = []
26
+ context.secrets.append(ToolSecretItem(key="BRIGHTDATA_API_KEY", value=BRIGHTDATA_API_KEY))
27
+ context.secrets.append(ToolSecretItem(key="BRIGHTDATA_ZONE", value=BRIGHTDATA_ZONE))
28
+ return context
29
+
30
+
31
+ @pytest.fixture(autouse=True)
32
+ def cleanup_engines():
33
+ """Clean up bright data clients after each test to prevent connection leaks."""
34
+ yield
35
+ BrightDataClient.clear_cache()
36
+
37
+
38
+ class TestBrightDataClient:
39
+ def test_get_instance_creates_new_client(self):
40
+ client1 = BrightDataClient.create_client("test_key_1", "zone1")
41
+ client2 = BrightDataClient.create_client("test_key_2", "zone2")
42
+
43
+ assert client1 != client2
44
+ assert client1.api_key == "test_key_1"
45
+ assert client1.zone == "zone1"
46
+ assert client2.api_key == "test_key_2"
47
+ assert client2.zone == "zone2"
48
+
49
+ def test_get_instance_returns_cached_client(self):
50
+ client1 = BrightDataClient.create_client("test_key", "zone1")
51
+ client2 = BrightDataClient.create_client("test_key", "zone1")
52
+
53
+ assert client1 is client2
54
+
55
+ def test_clear_cache(self):
56
+ client1 = BrightDataClient.create_client("test_key", "zone1")
57
+ BrightDataClient.clear_cache()
58
+ client2 = BrightDataClient.create_client("test_key", "zone1")
59
+
60
+ assert client1 is not client2
61
+
62
+ def test_encode_query(self):
63
+ result = BrightDataClient.encode_query("hello world test")
64
+ assert result == "hello%20world%20test"
65
+
66
+ @patch("requests.post")
67
+ def test_make_request_success(self, mock_post):
68
+ mock_response = Mock()
69
+ mock_response.status_code = 200
70
+ mock_response.text = "Success response"
71
+ mock_post.return_value = mock_response
72
+
73
+ client = BrightDataClient("test_key", "test_zone")
74
+ result = client.make_request({"url": "https://example.com"})
75
+
76
+ assert result == "Success response"
77
+ mock_post.assert_called_once()
78
+
79
+ @patch("requests.post")
80
+ def test_make_request_failure(self, mock_post):
81
+ mock_response = Mock()
82
+ mock_response.status_code = 400
83
+ mock_response.text = "Bad Request"
84
+ mock_response.raise_for_status.side_effect = requests.exceptions.HTTPError(
85
+ "400 Client Error"
86
+ )
87
+ mock_post.return_value = mock_response
88
+
89
+ client = BrightDataClient("test_key", "test_zone")
90
+
91
+ with pytest.raises(requests.exceptions.HTTPError):
92
+ client.make_request({"url": "https://example.com"})
93
+
94
+
95
+ class TestScrapeAsMarkdown:
96
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
97
+ def test_scrape_as_markdown_success(self, mock_engine_class, mock_context):
98
+ mock_client = Mock()
99
+ mock_client.make_request.return_value = "# Test Page\n\nContent here"
100
+ mock_engine_class.create_client.return_value = mock_client
101
+
102
+ result = scrape_as_markdown(mock_context, "https://example.com")
103
+
104
+ assert result == "# Test Page\n\nContent here"
105
+ mock_engine_class.create_client.assert_called_once_with(
106
+ api_key=BRIGHTDATA_API_KEY, zone=BRIGHTDATA_ZONE
107
+ )
108
+ mock_client.make_request.assert_called_once_with({
109
+ "url": "https://example.com",
110
+ "zone": BRIGHTDATA_ZONE,
111
+ "format": "raw",
112
+ "data_format": "markdown",
113
+ })
114
+
115
+
116
+ class TestSearchEngine:
117
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
118
+ def test_search_engine_google_basic(self, mock_engine_class, mock_context):
119
+ mock_client = Mock()
120
+ mock_client.make_request.return_value = "# Search Results\n\nResult 1\nResult 2"
121
+ mock_engine_class.create_client.return_value = mock_client
122
+ mock_engine_class.encode_query.return_value = "test%20query"
123
+
124
+ result = search_engine(mock_context, "test query")
125
+
126
+ assert result == "# Search Results\n\nResult 1\nResult 2"
127
+ mock_engine_class.create_client.assert_called_once_with(
128
+ api_key=BRIGHTDATA_API_KEY, zone=BRIGHTDATA_ZONE
129
+ )
130
+
131
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
132
+ def test_search_engine_bing(self, mock_engine_class, mock_context):
133
+ mock_client = Mock()
134
+ mock_client.make_request.return_value = "# Bing Results"
135
+ mock_engine_class.create_client.return_value = mock_client
136
+ mock_engine_class.encode_query.return_value = "test%20query"
137
+
138
+ result = search_engine(mock_context, "test query", engine="bing")
139
+
140
+ assert result == "# Bing Results"
141
+ expected_payload = {
142
+ "url": "https://www.bing.com/search?q=test%20query",
143
+ "zone": BRIGHTDATA_ZONE,
144
+ "format": "raw",
145
+ "data_format": "markdown",
146
+ }
147
+ mock_client.make_request.assert_called_once_with(expected_payload)
148
+
149
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
150
+ def test_search_engine_google_with_parameters(self, mock_engine_class, mock_context):
151
+ mock_client = Mock()
152
+ mock_client.make_request.return_value = "# Google Results with params"
153
+ mock_engine_class.create_client.return_value = mock_client
154
+ mock_engine_class.encode_query.side_effect = lambda x: x.replace(" ", "%20")
155
+
156
+ result = search_engine(
157
+ mock_context,
158
+ "test query",
159
+ language="en",
160
+ country_code="us",
161
+ search_type="images",
162
+ start=10,
163
+ num_results=20,
164
+ location="New York",
165
+ device=DeviceType.MOBILE,
166
+ return_json=True,
167
+ )
168
+
169
+ assert result == "# Google Results with params"
170
+ call_args = mock_client.make_request.call_args[0][0]
171
+
172
+ assert "hl=en" in call_args["url"]
173
+ assert "gl=us" in call_args["url"]
174
+ assert "tbm=isch" in call_args["url"]
175
+ assert "start=10" in call_args["url"]
176
+ assert "num=20" in call_args["url"]
177
+ assert "brd_mobile=1" in call_args["url"]
178
+ assert "brd_json=1" in call_args["url"]
179
+ assert call_args["data_format"] == "raw"
180
+
181
+ def test_search_engine_invalid_engine(self, mock_context):
182
+ with pytest.raises(ToolExecutionError):
183
+ search_engine(mock_context, "test query", engine="invalid_engine")
184
+
185
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
186
+ def test_search_engine_google_jobs(self, mock_engine_class, mock_context):
187
+ mock_client = Mock()
188
+ mock_client.make_request.return_value = "# Job Results"
189
+ mock_engine_class.create_client.return_value = mock_client
190
+ mock_engine_class.encode_query.return_value = "python%20developer"
191
+
192
+ result = search_engine(mock_context, "python developer", search_type="jobs")
193
+
194
+ assert result == "# Job Results"
195
+ call_args = mock_client.make_request.call_args[0][0]
196
+ assert "ibp=htl;jobs" in call_args["url"]
197
+
198
+
199
+ class TestWebDataFeed:
200
+ @patch("arcade_brightdata.tools.bright_data_tools._extract_structured_data")
201
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
202
+ def test_web_data_feed_success(self, mock_engine_class, mock_extract, mock_context):
203
+ mock_client = Mock()
204
+ mock_engine_class.create_client.return_value = mock_client
205
+ mock_extract.return_value = {"title": "Test Product", "price": "$19.99"}
206
+
207
+ result = web_data_feed(mock_context, "amazon_product", "https://amazon.com/dp/B08N5WRWNW")
208
+
209
+ expected_json = '{\n "title": "Test Product",\n "price": "$19.99"\n}'
210
+ assert result == expected_json
211
+
212
+ mock_engine_class.create_client.assert_called_once_with(api_key=BRIGHTDATA_API_KEY)
213
+ mock_extract.assert_called_once_with(
214
+ client=mock_client,
215
+ source_type=SourceType.AMAZON_PRODUCT,
216
+ url="https://amazon.com/dp/B08N5WRWNW",
217
+ num_of_reviews=None,
218
+ timeout=600,
219
+ polling_interval=1,
220
+ )
221
+
222
+ @patch("arcade_brightdata.tools.bright_data_tools._extract_structured_data")
223
+ @patch("arcade_brightdata.tools.bright_data_tools.BrightDataClient")
224
+ def test_web_data_feed_with_reviews(self, mock_engine_class, mock_extract, mock_context):
225
+ mock_client = Mock()
226
+ mock_engine_class.create_client.return_value = mock_client
227
+ mock_extract.return_value = [{"review": "Great product!", "rating": 5}]
228
+
229
+ result = web_data_feed(
230
+ mock_context,
231
+ "facebook_company_reviews",
232
+ "https://facebook.com/company",
233
+ num_of_reviews=50,
234
+ timeout=300,
235
+ polling_interval=2,
236
+ )
237
+
238
+ expected_json = '[\n {\n "review": "Great product!",\n "rating": 5\n }\n]'
239
+ assert result == expected_json
240
+
241
+ mock_extract.assert_called_once_with(
242
+ client=mock_client,
243
+ source_type=SourceType.FACEBOOK_COMPANY_REVIEWS,
244
+ url="https://facebook.com/company",
245
+ num_of_reviews=50,
246
+ timeout=300,
247
+ polling_interval=2,
248
+ )
249
+
250
+
251
+ class TestExtractStructuredData:
252
+ @patch("requests.get")
253
+ @patch("requests.post")
254
+ def test_extract_structured_data_success(self, mock_post, mock_get):
255
+ from arcade_brightdata.tools.bright_data_tools import _extract_structured_data
256
+
257
+ client = BrightDataClient("test_key", "test_zone")
258
+
259
+ mock_trigger_response = Mock()
260
+ mock_trigger_response.json.return_value = {"snapshot_id": "snap_123"}
261
+ mock_post.return_value = mock_trigger_response
262
+
263
+ mock_snapshot_response = Mock()
264
+ mock_snapshot_response.json.return_value = {"data": "extracted_data"}
265
+ mock_get.return_value = mock_snapshot_response
266
+
267
+ result = _extract_structured_data(
268
+ client=client,
269
+ source_type=SourceType.AMAZON_PRODUCT,
270
+ url="https://amazon.com/dp/TEST",
271
+ timeout=10,
272
+ polling_interval=0.1,
273
+ )
274
+
275
+ assert result == {"data": "extracted_data"}
276
+
277
+ mock_post.assert_called_once()
278
+ trigger_call = mock_post.call_args
279
+ assert "gd_l7q7dkf244hwjntr0" in str(trigger_call) # Amazon product dataset ID
280
+
281
+ mock_get.assert_called_once()
282
+ snapshot_call = mock_get.call_args
283
+ assert "snap_123" in str(snapshot_call)
284
+
285
+ @patch("requests.get")
286
+ @patch("requests.post")
287
+ def test_extract_structured_data_with_polling(self, mock_post, mock_get):
288
+ from arcade_brightdata.tools.bright_data_tools import _extract_structured_data
289
+
290
+ client = BrightDataClient("test_key", "test_zone")
291
+
292
+ mock_trigger_response = Mock()
293
+ mock_trigger_response.json.return_value = {"snapshot_id": "snap_123"}
294
+ mock_post.return_value = mock_trigger_response
295
+
296
+ running_response = Mock()
297
+ running_response.json.return_value = {"status": "running"}
298
+
299
+ complete_response = Mock()
300
+ complete_response.json.return_value = {"data": "final_data"}
301
+
302
+ mock_get.side_effect = [running_response, complete_response]
303
+
304
+ result = _extract_structured_data(
305
+ client=client,
306
+ source_type=SourceType.LINKEDIN_PERSON_PROFILE,
307
+ url="https://linkedin.com/in/test",
308
+ timeout=10,
309
+ polling_interval=0.1,
310
+ )
311
+
312
+ assert result == {"data": "final_data"}
313
+ assert mock_get.call_count == 2
314
+
315
+ @patch("requests.post")
316
+ def test_extract_structured_data_invalid_source_type(self, mock_post):
317
+ from arcade_brightdata.tools.bright_data_tools import _extract_structured_data
318
+
319
+ client = BrightDataClient("test_key", "test_zone")
320
+
321
+ # Create a mock SourceType that doesn't exist in the datasets dict
322
+ class InvalidSourceType:
323
+ value = "invalid_source"
324
+
325
+ with pytest.raises(KeyError):
326
+ _extract_structured_data(
327
+ client=client, source_type=InvalidSourceType(), url="https://example.com"
328
+ )
329
+
330
+ @patch("requests.get")
331
+ @patch("requests.post")
332
+ def test_extract_structured_data_no_snapshot_id(self, mock_post, mock_get):
333
+ from arcade_brightdata.tools.bright_data_tools import _extract_structured_data
334
+
335
+ client = BrightDataClient("test_key", "test_zone")
336
+
337
+ # Mock trigger response without snapshot_id
338
+ mock_trigger_response = Mock()
339
+ mock_trigger_response.json.return_value = {}
340
+ mock_post.return_value = mock_trigger_response
341
+
342
+ with pytest.raises(Exception) as exc_info:
343
+ _extract_structured_data(
344
+ client=client,
345
+ source_type=SourceType.AMAZON_PRODUCT,
346
+ url="https://amazon.com/dp/TEST",
347
+ )
348
+
349
+ assert "No snapshot ID returned from trigger request" in str(exc_info.value)
350
+
351
+ @patch("requests.get")
352
+ @patch("requests.post")
353
+ @patch("time.sleep")
354
+ def test_extract_structured_data_timeout(self, mock_sleep, mock_post, mock_get):
355
+ from arcade_brightdata.tools.bright_data_tools import _extract_structured_data
356
+
357
+ client = BrightDataClient("test_key", "test_zone")
358
+
359
+ # Mock trigger response
360
+ mock_trigger_response = Mock()
361
+ mock_trigger_response.json.return_value = {"snapshot_id": "snap_123"}
362
+ mock_post.return_value = mock_trigger_response
363
+
364
+ # Mock snapshot response that always returns running
365
+ mock_snapshot_response = Mock()
366
+ mock_snapshot_response.json.return_value = {"status": "running"}
367
+ mock_get.return_value = mock_snapshot_response
368
+
369
+ with pytest.raises(TimeoutError) as exc_info:
370
+ _extract_structured_data(
371
+ client=client,
372
+ source_type=SourceType.AMAZON_PRODUCT,
373
+ url="https://amazon.com/dp/TEST",
374
+ timeout=2,
375
+ polling_interval=0.1,
376
+ )
377
+
378
+ assert "Timeout after 2 seconds waiting for amazon_product data" in str(exc_info.value)
379
+
380
+
381
+ class TestIntegration:
382
+ """Integration tests that test the full flow without mocking internal components."""
383
+
384
+ @patch("requests.post")
385
+ def test_scrape_as_markdown_integration(self, mock_post, mock_context):
386
+ mock_response = Mock()
387
+ mock_response.status_code = 200
388
+ mock_response.text = "# Integration Test\n\nThis is a test page"
389
+ mock_post.return_value = mock_response
390
+
391
+ result = scrape_as_markdown(mock_context, "https://example.com")
392
+
393
+ assert result == "# Integration Test\n\nThis is a test page"
394
+
395
+ # Verify the request was made correctly
396
+ call_args = mock_post.call_args
397
+ assert call_args[1]["headers"]["Authorization"] == f"Bearer {BRIGHTDATA_API_KEY}"
398
+ assert "https://api.brightdata.com/request" in str(call_args)
399
+
400
+ @patch("requests.post")
401
+ def test_search_engine_integration(self, mock_post, mock_context):
402
+ mock_response = Mock()
403
+ mock_response.status_code = 200
404
+ mock_response.text = "# Search Results\n\n1. First result\n2. Second result"
405
+ mock_post.return_value = mock_response
406
+
407
+ result = search_engine(mock_context, "test query", engine="google")
408
+
409
+ assert result == "# Search Results\n\n1. First result\n2. Second result"
410
+
411
+ call_args = mock_post.call_args
412
+ payload = call_args[1]["data"]
413
+ assert '"url": "https://www.google.com/search?q=test%20query' in payload
414
+ assert '"data_format": "markdown"' in payload