lfx-firecrawl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ """lfx-firecrawl: Firecrawl bundle.
2
+
3
+ Distribution unit ``lfx-firecrawl``. At runtime Langflow's loader
4
+ discovers ``extension.json`` shipped alongside this ``__init__.py`` and
5
+ registers the bundle's components under the namespaced IDs
6
+ ``ext:firecrawl:<Class>@official``.
7
+ """
8
+
9
+ from lfx_firecrawl.components.firecrawl.firecrawl_crawl_api import FirecrawlCrawlApi
10
+ from lfx_firecrawl.components.firecrawl.firecrawl_map_api import FirecrawlMapApi
11
+ from lfx_firecrawl.components.firecrawl.firecrawl_scrape_api import FirecrawlScrapeApi
12
+ from lfx_firecrawl.components.firecrawl.firecrawl_search_api import FirecrawlSearchApi
13
+
14
+ __all__ = ["FirecrawlCrawlApi", "FirecrawlMapApi", "FirecrawlScrapeApi", "FirecrawlSearchApi"]
@@ -0,0 +1,13 @@
1
+ """Component re-exports for the ``firecrawl`` bundle.
2
+
3
+ Saved-flow migration entries that target ``lfx.components.firecrawl.<Class>``
4
+ resolve through this package, so the moved Component class(es) must be
5
+ importable from here by name.
6
+ """
7
+
8
+ from .firecrawl_crawl_api import FirecrawlCrawlApi
9
+ from .firecrawl_map_api import FirecrawlMapApi
10
+ from .firecrawl_scrape_api import FirecrawlScrapeApi
11
+ from .firecrawl_search_api import FirecrawlSearchApi
12
+
13
+ __all__ = ["FirecrawlCrawlApi", "FirecrawlMapApi", "FirecrawlScrapeApi", "FirecrawlSearchApi"]
@@ -0,0 +1,109 @@
1
+ import re
2
+
3
+ from lfx.custom.custom_component.component import Component
4
+ from lfx.io import DataInput, IntInput, MultilineInput, Output, SecretStrInput, StrInput
5
+ from lfx.schema.data import Data
6
+
7
+ _CAMEL_TO_SNAKE_RE = re.compile(r"(?<!^)(?=[A-Z])")
8
+
9
+
10
+ def _to_snake_case_kwargs(params: dict) -> dict:
11
+ """Convert camelCase option keys to snake_case keyword arguments.
12
+
13
+ The firecrawl-py v1 convention uses camelCase, while the v2 SDK expects
14
+ snake_case keyword arguments. Keys that are already snake_case are passed
15
+ through unchanged.
16
+ """
17
+ return {_CAMEL_TO_SNAKE_RE.sub("_", key).lower(): value for key, value in params.items()}
18
+
19
+
20
+ class FirecrawlCrawlApi(Component):
21
+ display_name: str = "Firecrawl Crawl API"
22
+ description: str = "Crawls a URL and returns the results."
23
+ name = "FirecrawlCrawlApi"
24
+
25
+ documentation: str = "https://docs.firecrawl.dev/v1/api-reference/endpoint/crawl-post"
26
+
27
+ inputs = [
28
+ SecretStrInput(
29
+ name="api_key",
30
+ display_name="Firecrawl API Key",
31
+ required=True,
32
+ password=True,
33
+ info="The API key to use Firecrawl API.",
34
+ ),
35
+ MultilineInput(
36
+ name="url",
37
+ display_name="URL",
38
+ required=True,
39
+ info="The URL to scrape.",
40
+ tool_mode=True,
41
+ ),
42
+ IntInput(
43
+ name="timeout",
44
+ display_name="Timeout",
45
+ info="Timeout in milliseconds for the request.",
46
+ ),
47
+ StrInput(
48
+ name="idempotency_key",
49
+ display_name="Idempotency Key",
50
+ info="Optional idempotency key to ensure unique requests.",
51
+ ),
52
+ DataInput(
53
+ name="crawlerOptions",
54
+ display_name="Crawler Options",
55
+ info="The crawler options to send with the request.",
56
+ ),
57
+ DataInput(
58
+ name="scrapeOptions",
59
+ display_name="Scrape Options",
60
+ info="The page options to send with the request.",
61
+ ),
62
+ ]
63
+
64
+ outputs = [
65
+ Output(display_name="JSON", name="data", method="crawl"),
66
+ ]
67
+ idempotency_key: str | None = None
68
+
69
+ def crawl(self) -> Data:
70
+ try:
71
+ from firecrawl import Firecrawl
72
+ from firecrawl.v2.types import ScrapeOptions
73
+ except ImportError as e:
74
+ msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
75
+ raise ImportError(msg) from e
76
+
77
+ params = dict(self.crawlerOptions.__dict__.get("data", {})) if self.crawlerOptions else {}
78
+ scrape_options_dict = dict(self.scrapeOptions.__dict__.get("data", {})) if self.scrapeOptions else {}
79
+
80
+ # Set default values for crawl parameters.
81
+ # Note: firecrawl-py v2 renamed several options. "maxDepth" -> "max_discovery_depth"
82
+ # and "allowBackwardLinks" -> "crawl_entire_domain".
83
+ params.setdefault("maxDepth", 2)
84
+ params.setdefault("limit", 10000)
85
+ params.setdefault("allowExternalLinks", False)
86
+ params.setdefault("allowBackwardLinks", False)
87
+ params.setdefault("ignoreQueryParameters", False)
88
+
89
+ # Ensure onlyMainContent is explicitly set if not provided.
90
+ scrape_options_dict.setdefault("onlyMainContent", True)
91
+
92
+ # Translate legacy v1 camelCase option names to the firecrawl-py v2 keyword args.
93
+ kwargs = _to_snake_case_kwargs(params)
94
+ if "max_depth" in kwargs:
95
+ kwargs["max_discovery_depth"] = kwargs.pop("max_depth")
96
+ if "allow_backward_links" in kwargs:
97
+ kwargs["crawl_entire_domain"] = kwargs.pop("allow_backward_links")
98
+ # v2 removed "ignore_sitemap"; it is now the "sitemap" mode enum.
99
+ if kwargs.pop("ignore_sitemap", False):
100
+ kwargs["sitemap"] = "skip"
101
+
102
+ # Build the typed ScrapeOptions object expected by v2 from the (snake_cased) dict.
103
+ scrape_kwargs = _to_snake_case_kwargs(scrape_options_dict)
104
+ kwargs["scrape_options"] = ScrapeOptions(**scrape_kwargs)
105
+
106
+ app = Firecrawl(api_key=self.api_key)
107
+ # v2 polls to completion and returns a typed CrawlJob object.
108
+ crawl_job = app.crawl(self.url, **kwargs)
109
+ return Data(data={"results": crawl_job.model_dump()})
@@ -0,0 +1,94 @@
1
+ from lfx.custom.custom_component.component import Component
2
+ from lfx.io import (
3
+ BoolInput,
4
+ MultilineInput,
5
+ Output,
6
+ SecretStrInput,
7
+ )
8
+ from lfx.schema.data import Data
9
+
10
+
11
+ class FirecrawlMapApi(Component):
12
+ display_name: str = "Firecrawl Map API"
13
+ description: str = "Maps a URL and returns the results."
14
+ name = "FirecrawlMapApi"
15
+
16
+ documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/map"
17
+
18
+ inputs = [
19
+ SecretStrInput(
20
+ name="api_key",
21
+ display_name="Firecrawl API Key",
22
+ required=True,
23
+ password=True,
24
+ info="The API key to use Firecrawl API.",
25
+ ),
26
+ MultilineInput(
27
+ name="urls",
28
+ display_name="URLs",
29
+ required=True,
30
+ info="List of URLs to create maps from (separated by commas or new lines).",
31
+ tool_mode=True,
32
+ ),
33
+ BoolInput(
34
+ name="ignore_sitemap",
35
+ display_name="Ignore Sitemap",
36
+ info="When true, the sitemap.xml file will be ignored during crawling.",
37
+ ),
38
+ BoolInput(
39
+ name="sitemap_only",
40
+ display_name="Sitemap Only",
41
+ info="When true, only links found in the sitemap will be returned.",
42
+ ),
43
+ BoolInput(
44
+ name="include_subdomains",
45
+ display_name="Include Subdomains",
46
+ info="When true, subdomains of the provided URL will also be scanned.",
47
+ ),
48
+ ]
49
+
50
+ outputs = [
51
+ Output(display_name="JSON", name="data", method="map"),
52
+ ]
53
+
54
+ def map(self) -> Data:
55
+ try:
56
+ from firecrawl import Firecrawl
57
+ except ImportError as e:
58
+ msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
59
+ raise ImportError(msg) from e
60
+
61
+ # Validate URLs
62
+ if not self.urls:
63
+ msg = "URLs are required"
64
+ raise ValueError(msg)
65
+
66
+ # Split and validate URLs (handle both commas and newlines)
67
+ urls = [url.strip() for url in self.urls.replace("\n", ",").split(",") if url.strip()]
68
+ if not urls:
69
+ msg = "No valid URLs provided"
70
+ raise ValueError(msg)
71
+
72
+ # firecrawl-py v2 replaced the separate "ignoreSitemap"/"sitemapOnly" flags with a
73
+ # single "sitemap" mode: "only" (sitemap only), "skip" (ignore sitemap), or the
74
+ # default mixed behavior.
75
+ kwargs: dict = {"include_subdomains": self.include_subdomains}
76
+ if self.sitemap_only:
77
+ kwargs["sitemap"] = "only"
78
+ elif self.ignore_sitemap:
79
+ kwargs["sitemap"] = "skip"
80
+
81
+ app = Firecrawl(api_key=self.api_key)
82
+
83
+ # Map all provided URLs and combine results.
84
+ combined_links = []
85
+ for url in urls:
86
+ result = app.map(url, **kwargs)
87
+ # v2 returns a typed MapData object whose .links is a list of typed
88
+ # SearchResult objects; serialize each to a dict for downstream consumers.
89
+ links = getattr(result, "links", None) or []
90
+ combined_links.extend(link.model_dump() if hasattr(link, "model_dump") else link for link in links)
91
+
92
+ map_result = {"success": True, "links": combined_links}
93
+
94
+ return Data(data=map_result)
@@ -0,0 +1,97 @@
1
+ import re
2
+
3
+ from lfx.custom.custom_component.component import Component
4
+ from lfx.io import (
5
+ DataInput,
6
+ IntInput,
7
+ MultilineInput,
8
+ Output,
9
+ SecretStrInput,
10
+ )
11
+ from lfx.schema.data import Data
12
+
13
+ _CAMEL_TO_SNAKE_RE = re.compile(r"(?<!^)(?=[A-Z])")
14
+
15
+
16
+ def _to_snake_case_kwargs(params: dict) -> dict:
17
+ """Convert camelCase option keys to snake_case keyword arguments.
18
+
19
+ The firecrawl-py v1 convention uses camelCase, while the v2 SDK expects
20
+ snake_case keyword arguments. Keys that are already snake_case are passed
21
+ through unchanged.
22
+ """
23
+ return {_CAMEL_TO_SNAKE_RE.sub("_", key).lower(): value for key, value in params.items()}
24
+
25
+
26
+ class FirecrawlScrapeApi(Component):
27
+ display_name: str = "Firecrawl Scrape API"
28
+ description: str = "Scrapes a URL and returns the results."
29
+ name = "FirecrawlScrapeApi"
30
+
31
+ documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
32
+
33
+ inputs = [
34
+ SecretStrInput(
35
+ name="api_key",
36
+ display_name="Firecrawl API Key",
37
+ required=True,
38
+ password=True,
39
+ info="The API key to use Firecrawl API.",
40
+ ),
41
+ MultilineInput(
42
+ name="url",
43
+ display_name="URL",
44
+ required=True,
45
+ info="The URL to scrape.",
46
+ tool_mode=True,
47
+ ),
48
+ IntInput(
49
+ name="timeout",
50
+ display_name="Timeout",
51
+ info="Timeout in milliseconds for the request.",
52
+ ),
53
+ DataInput(
54
+ name="scrapeOptions",
55
+ display_name="Scrape Options",
56
+ info="The page options to send with the request.",
57
+ ),
58
+ DataInput(
59
+ name="extractorOptions",
60
+ display_name="Extractor Options",
61
+ info="The extractor options to send with the request.",
62
+ ),
63
+ ]
64
+
65
+ outputs = [
66
+ Output(display_name="JSON", name="data", method="scrape"),
67
+ ]
68
+
69
+ def scrape(self) -> Data:
70
+ try:
71
+ from firecrawl import Firecrawl
72
+ except ImportError as e:
73
+ msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
74
+ raise ImportError(msg) from e
75
+
76
+ params = dict(self.scrapeOptions.__dict__.get("data", {})) if self.scrapeOptions else {}
77
+
78
+ # Set default values for parameters
79
+ params.setdefault("formats", ["markdown"]) # Default output format
80
+ params.setdefault("onlyMainContent", True) # Default to only main content
81
+ if self.timeout:
82
+ params.setdefault("timeout", self.timeout)
83
+
84
+ kwargs = _to_snake_case_kwargs(params)
85
+
86
+ # In firecrawl-py v2, structured extraction is requested via a "json" format entry
87
+ # on the scrape call rather than a separate extractor option.
88
+ extractor_options_dict = self.extractorOptions.__dict__.get("data", {}) if self.extractorOptions else {}
89
+ if extractor_options_dict:
90
+ formats = list(kwargs.get("formats", []))
91
+ formats.append({"type": "json", **_to_snake_case_kwargs(extractor_options_dict)})
92
+ kwargs["formats"] = formats
93
+
94
+ app = Firecrawl(api_key=self.api_key)
95
+ document = app.scrape(self.url, **kwargs)
96
+ # v2 returns a typed Document object; serialize to a dict for downstream consumers.
97
+ return Data(data=document.model_dump())
@@ -0,0 +1,76 @@
1
+ from lfx.custom.custom_component.component import Component
2
+ from lfx.io import (
3
+ IntInput,
4
+ MultilineInput,
5
+ Output,
6
+ SecretStrInput,
7
+ StrInput,
8
+ )
9
+ from lfx.schema.data import Data
10
+
11
+
12
+ class FirecrawlSearchApi(Component):
13
+ display_name: str = "Firecrawl Search API"
14
+ description: str = "Searches the web and returns the results."
15
+ name = "FirecrawlSearchApi"
16
+
17
+ documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/search"
18
+
19
+ inputs = [
20
+ SecretStrInput(
21
+ name="api_key",
22
+ display_name="Firecrawl API Key",
23
+ required=True,
24
+ password=True,
25
+ info="The API key to use Firecrawl API.",
26
+ ),
27
+ MultilineInput(
28
+ name="query",
29
+ display_name="Query",
30
+ required=True,
31
+ info="The search query to run.",
32
+ tool_mode=True,
33
+ ),
34
+ IntInput(
35
+ name="limit",
36
+ display_name="Limit",
37
+ info="Maximum number of results to return.",
38
+ value=5,
39
+ ),
40
+ StrInput(
41
+ name="location",
42
+ display_name="Location",
43
+ info="Location to bias the search results (e.g. a country or region).",
44
+ advanced=True,
45
+ ),
46
+ ]
47
+
48
+ outputs = [
49
+ Output(display_name="JSON", name="data", method="search"),
50
+ ]
51
+
52
+ def search(self) -> Data:
53
+ try:
54
+ from firecrawl import Firecrawl
55
+ except ImportError as e:
56
+ msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
57
+ raise ImportError(msg) from e
58
+
59
+ if not self.query:
60
+ msg = "Query is required"
61
+ raise ValueError(msg)
62
+
63
+ kwargs: dict = {}
64
+ if self.limit:
65
+ kwargs["limit"] = self.limit
66
+ if self.location:
67
+ kwargs["location"] = self.location
68
+
69
+ app = Firecrawl(api_key=self.api_key)
70
+ result = app.search(self.query, **kwargs)
71
+
72
+ # v2 returns a typed SearchData object (results grouped by source);
73
+ # serialize to a dict for downstream consumers.
74
+ search_result = result.model_dump() if hasattr(result, "model_dump") else result
75
+
76
+ return Data(data=search_result)
@@ -0,0 +1,16 @@
1
+ {
2
+ "$schema": "https://schemas.langflow.org/extension/v1.json",
3
+ "id": "lfx-firecrawl",
4
+ "version": "0.1.0",
5
+ "name": "Firecrawl",
6
+ "description": "Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone Langflow Extension Bundle.",
7
+ "lfx": {
8
+ "compat": ["1"]
9
+ },
10
+ "bundles": [
11
+ {
12
+ "name": "firecrawl",
13
+ "path": "components/firecrawl"
14
+ }
15
+ ]
16
+ }
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: lfx-firecrawl
3
+ Version: 0.1.0
4
+ Summary: Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone Langflow Extension Bundle.
5
+ Project-URL: Homepage, https://github.com/langflow-ai/langflow
6
+ Project-URL: Documentation, https://docs.langflow.org/extensions
7
+ Project-URL: Repository, https://github.com/langflow-ai/langflow
8
+ Author-email: Langflow <contact@langflow.org>
9
+ License: MIT
10
+ Keywords: bundle,extension,firecrawl,langflow,lfx
11
+ Requires-Python: <3.15,>=3.10
12
+ Requires-Dist: firecrawl-py<5.0.0,>=4.0.0
13
+ Requires-Dist: lfx<2.0.0,>=1.11.0.dev0
14
+ Description-Content-Type: text/markdown
15
+
16
+ # lfx-firecrawl
17
+
18
+ Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone
19
+ Langflow Extension Bundle, built against the firecrawl-py v2 SDK.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install lfx-firecrawl
25
+ ```
26
+
27
+ The bundle is registered automatically via the `langflow.extensions`
28
+ entry-point. After install, restart your Langflow server; the bundle's
29
+ components will appear in the palette under the `firecrawl` group.
30
+
31
+ ## Develop
32
+
33
+ ```bash
34
+ cd src/bundles/firecrawl
35
+ pip install -e .
36
+ lfx extension validate src/lfx_firecrawl
37
+ ```
38
+
39
+ ## Migration
40
+
41
+ Saved flows referencing the legacy class name(s) or the old import paths
42
+ under `lfx.components.firecrawl.*` are rewritten to the new namespaced
43
+ IDs by the migration table in
44
+ `src/lfx/src/lfx/extension/migration/migration_table.json`.
@@ -0,0 +1,11 @@
1
+ lfx_firecrawl/__init__.py,sha256=LSgReguH1y01WCNbQ-moqIknCDnT1ShNoV2WQrAHZNM,712
2
+ lfx_firecrawl/extension.json,sha256=SrOl7qR09aEmJ-MiXC_QjhDjpIhzKQ3v9033f9fsKGs,390
3
+ lfx_firecrawl/components/firecrawl/__init__.py,sha256=r22GFArvunu6fi1Ed-AUxmITGEMLy9UU7OXU6L-AGAo,539
4
+ lfx_firecrawl/components/firecrawl/firecrawl_crawl_api.py,sha256=EwGDi68PU7ePuJhAgXhXZJQ4xAn2e5ph95-2uTDHDA0,4266
5
+ lfx_firecrawl/components/firecrawl/firecrawl_map_api.py,sha256=VqI_MTVJLbYaS_t2mS8ELOKfcBpxfYXYNzXqmM7iqQE,3304
6
+ lfx_firecrawl/components/firecrawl/firecrawl_scrape_api.py,sha256=ICSPl8JIXs4vKpaLE2AMlVvnpjiLuds9ciCPJlQB09M,3395
7
+ lfx_firecrawl/components/firecrawl/firecrawl_search_api.py,sha256=9syAHHJnAi21gE81zgwxewHc49Ik2mui4GTpEZ2Qwkk,2269
8
+ lfx_firecrawl-0.1.0.dist-info/METADATA,sha256=70UFD_Rl7xKltzfwWtLd8Sg09GUIrqFqnp9TpsXVPgQ,1397
9
+ lfx_firecrawl-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
10
+ lfx_firecrawl-0.1.0.dist-info/entry_points.txt,sha256=AVgggm-Bt6-8xHeIx_gzT305TJ1kcgMe7PX-TXkKZj0,52
11
+ lfx_firecrawl-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [langflow.extensions]
2
+ lfx-firecrawl = lfx_firecrawl