firecrawl 2.4.2__tar.gz → 2.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of firecrawl might be problematic. Click here for more details.
- {firecrawl-2.4.2 → firecrawl-2.5.0}/PKG-INFO +7 -7
- {firecrawl-2.4.2 → firecrawl-2.5.0}/README.md +6 -6
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/__init__.py +2 -2
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/firecrawl.py +13 -1
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl.egg-info/PKG-INFO +7 -7
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl.egg-info/top_level.txt +2 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/LICENSE +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/__tests__/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/__tests__/e2e_withAuth/test.py +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/__tests__/v1/e2e_withAuth/__init__.py +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl/__tests__/v1/e2e_withAuth/test.py +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl.egg-info/SOURCES.txt +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl.egg-info/dependency_links.txt +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/firecrawl.egg-info/requires.txt +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/pyproject.toml +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/setup.cfg +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/setup.py +0 -0
- {firecrawl-2.4.2 → firecrawl-2.5.0}/tests/test_change_tracking.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -40,13 +40,13 @@ Requires-Dist: nest-asyncio
|
|
|
40
40
|
Requires-Dist: pydantic
|
|
41
41
|
Requires-Dist: aiohttp
|
|
42
42
|
|
|
43
|
-
# Firecrawl
|
|
43
|
+
# Firecrawl Python SDK
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
|
46
46
|
|
|
47
47
|
## Installation
|
|
48
48
|
|
|
49
|
-
To install the Firecrawl
|
|
49
|
+
To install the Firecrawl Python SDK, you can use pip:
|
|
50
50
|
|
|
51
51
|
```bash
|
|
52
52
|
pip install firecrawl-py
|
|
@@ -60,16 +60,16 @@ pip install firecrawl-py
|
|
|
60
60
|
Here's an example of how to use the SDK:
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from firecrawl import FirecrawlApp
|
|
63
|
+
from firecrawl import FirecrawlApp, ScrapeOptions
|
|
64
64
|
|
|
65
65
|
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
|
66
66
|
|
|
67
67
|
# Scrape a website:
|
|
68
|
-
|
|
68
|
+
data = app.scrape_url(
|
|
69
69
|
'https://firecrawl.dev',
|
|
70
70
|
formats=['markdown', 'html']
|
|
71
71
|
)
|
|
72
|
-
print(
|
|
72
|
+
print(data)
|
|
73
73
|
|
|
74
74
|
# Crawl a website:
|
|
75
75
|
crawl_status = app.crawl_url(
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
# Firecrawl
|
|
1
|
+
# Firecrawl Python SDK
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
|
-
To install the Firecrawl
|
|
7
|
+
To install the Firecrawl Python SDK, you can use pip:
|
|
8
8
|
|
|
9
9
|
```bash
|
|
10
10
|
pip install firecrawl-py
|
|
@@ -18,16 +18,16 @@ pip install firecrawl-py
|
|
|
18
18
|
Here's an example of how to use the SDK:
|
|
19
19
|
|
|
20
20
|
```python
|
|
21
|
-
from firecrawl import FirecrawlApp
|
|
21
|
+
from firecrawl import FirecrawlApp, ScrapeOptions
|
|
22
22
|
|
|
23
23
|
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
|
24
24
|
|
|
25
25
|
# Scrape a website:
|
|
26
|
-
|
|
26
|
+
data = app.scrape_url(
|
|
27
27
|
'https://firecrawl.dev',
|
|
28
28
|
formats=['markdown', 'html']
|
|
29
29
|
)
|
|
30
|
-
print(
|
|
30
|
+
print(data)
|
|
31
31
|
|
|
32
32
|
# Crawl a website:
|
|
33
33
|
crawl_status = app.crawl_url(
|
|
@@ -11,9 +11,9 @@ For more information visit https://github.com/firecrawl/
|
|
|
11
11
|
import logging
|
|
12
12
|
import os
|
|
13
13
|
|
|
14
|
-
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions # noqa
|
|
14
|
+
from .firecrawl import FirecrawlApp, JsonConfig, ScrapeOptions, ChangeTrackingOptions # noqa
|
|
15
15
|
|
|
16
|
-
__version__ = "2.
|
|
16
|
+
__version__ = "2.5.0"
|
|
17
17
|
|
|
18
18
|
# Define the logger for the Firecrawl project
|
|
19
19
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
|
@@ -135,6 +135,12 @@ class WebhookConfig(pydantic.BaseModel):
|
|
|
135
135
|
metadata: Optional[Dict[str, str]] = None
|
|
136
136
|
events: Optional[List[Literal["completed", "failed", "page", "started"]]] = None
|
|
137
137
|
|
|
138
|
+
class ChangeTrackingOptions(pydantic.BaseModel):
|
|
139
|
+
"""Configuration for change tracking."""
|
|
140
|
+
modes: Optional[List[Literal["git-diff", "json"]]] = None
|
|
141
|
+
schema: Optional[Any] = None
|
|
142
|
+
prompt: Optional[str] = None
|
|
143
|
+
|
|
138
144
|
class ScrapeOptions(pydantic.BaseModel):
|
|
139
145
|
"""Parameters for scraping operations."""
|
|
140
146
|
formats: Optional[List[Literal["markdown", "html", "rawHtml", "content", "links", "screenshot", "screenshot@fullPage", "extract", "json", "changeTracking"]]] = None
|
|
@@ -150,6 +156,7 @@ class ScrapeOptions(pydantic.BaseModel):
|
|
|
150
156
|
removeBase64Images: Optional[bool] = None
|
|
151
157
|
blockAds: Optional[bool] = None
|
|
152
158
|
proxy: Optional[Literal["basic", "stealth"]] = None
|
|
159
|
+
changeTrackingOptions: Optional[ChangeTrackingOptions] = None
|
|
153
160
|
|
|
154
161
|
class WaitAction(pydantic.BaseModel):
|
|
155
162
|
"""Wait action to perform during scraping."""
|
|
@@ -454,6 +461,7 @@ class FirecrawlApp:
|
|
|
454
461
|
extract: Optional[JsonConfig] = None,
|
|
455
462
|
json_options: Optional[JsonConfig] = None,
|
|
456
463
|
actions: Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]] = None,
|
|
464
|
+
change_tracking_options: Optional[ChangeTrackingOptions] = None,
|
|
457
465
|
**kwargs) -> ScrapeResponse[Any]:
|
|
458
466
|
"""
|
|
459
467
|
Scrape and extract content from a URL.
|
|
@@ -475,6 +483,7 @@ class FirecrawlApp:
|
|
|
475
483
|
extract (Optional[JsonConfig]): Content extraction settings
|
|
476
484
|
json_options (Optional[JsonConfig]): JSON extraction settings
|
|
477
485
|
actions (Optional[List[Union[WaitAction, ScreenshotAction, ClickAction, WriteAction, PressAction, ScrollAction, ScrapeAction, ExecuteJavascriptAction]]]): Actions to perform
|
|
486
|
+
change_tracking_options (Optional[ChangeTrackingOptions]): Change tracking settings
|
|
478
487
|
|
|
479
488
|
|
|
480
489
|
Returns:
|
|
@@ -530,6 +539,9 @@ class FirecrawlApp:
|
|
|
530
539
|
scrape_params['jsonOptions'] = json_options.dict(exclude_none=True)
|
|
531
540
|
if actions:
|
|
532
541
|
scrape_params['actions'] = [action.dict(exclude_none=True) for action in actions]
|
|
542
|
+
if change_tracking_options:
|
|
543
|
+
scrape_params['changeTrackingOptions'] = change_tracking_options.dict(exclude_none=True)
|
|
544
|
+
|
|
533
545
|
scrape_params.update(kwargs)
|
|
534
546
|
|
|
535
547
|
# Make request
|
|
@@ -2424,7 +2436,7 @@ class FirecrawlApp:
|
|
|
2424
2436
|
method_params = {
|
|
2425
2437
|
"scrape_url": {"formats", "include_tags", "exclude_tags", "only_main_content", "wait_for",
|
|
2426
2438
|
"timeout", "location", "mobile", "skip_tls_verification", "remove_base64_images",
|
|
2427
|
-
"block_ads", "proxy", "extract", "json_options", "actions"},
|
|
2439
|
+
"block_ads", "proxy", "extract", "json_options", "actions", "change_tracking_options"},
|
|
2428
2440
|
"search": {"limit", "tbs", "filter", "lang", "country", "location", "timeout", "scrape_options"},
|
|
2429
2441
|
"crawl_url": {"include_paths", "exclude_paths", "max_depth", "max_discovery_depth", "limit",
|
|
2430
2442
|
"allow_backward_links", "allow_external_links", "ignore_sitemap", "scrape_options",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: firecrawl
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.5.0
|
|
4
4
|
Summary: Python SDK for Firecrawl API
|
|
5
5
|
Home-page: https://github.com/mendableai/firecrawl
|
|
6
6
|
Author: Mendable.ai
|
|
@@ -40,13 +40,13 @@ Requires-Dist: nest-asyncio
|
|
|
40
40
|
Requires-Dist: pydantic
|
|
41
41
|
Requires-Dist: aiohttp
|
|
42
42
|
|
|
43
|
-
# Firecrawl
|
|
43
|
+
# Firecrawl Python SDK
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
The Firecrawl Python SDK is a library that allows you to easily scrape and crawl websites, and output the data in a format ready for use with language models (LLMs). It provides a simple and intuitive interface for interacting with the Firecrawl API.
|
|
46
46
|
|
|
47
47
|
## Installation
|
|
48
48
|
|
|
49
|
-
To install the Firecrawl
|
|
49
|
+
To install the Firecrawl Python SDK, you can use pip:
|
|
50
50
|
|
|
51
51
|
```bash
|
|
52
52
|
pip install firecrawl-py
|
|
@@ -60,16 +60,16 @@ pip install firecrawl-py
|
|
|
60
60
|
Here's an example of how to use the SDK:
|
|
61
61
|
|
|
62
62
|
```python
|
|
63
|
-
from firecrawl import FirecrawlApp
|
|
63
|
+
from firecrawl import FirecrawlApp, ScrapeOptions
|
|
64
64
|
|
|
65
65
|
app = FirecrawlApp(api_key="fc-YOUR_API_KEY")
|
|
66
66
|
|
|
67
67
|
# Scrape a website:
|
|
68
|
-
|
|
68
|
+
data = app.scrape_url(
|
|
69
69
|
'https://firecrawl.dev',
|
|
70
70
|
formats=['markdown', 'html']
|
|
71
71
|
)
|
|
72
|
-
print(
|
|
72
|
+
print(data)
|
|
73
73
|
|
|
74
74
|
# Crawl a website:
|
|
75
75
|
crawl_status = app.crawl_url(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|