PyPI - lfx-firecrawl - Versions diffs - 0.1.0__py3-none-any.whl - Mend

lfx-firecrawl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

lfx_firecrawl/__init__.py +14 -0
lfx_firecrawl/components/firecrawl/__init__.py +13 -0
lfx_firecrawl/components/firecrawl/firecrawl_crawl_api.py +109 -0
lfx_firecrawl/components/firecrawl/firecrawl_map_api.py +94 -0
lfx_firecrawl/components/firecrawl/firecrawl_scrape_api.py +97 -0
lfx_firecrawl/components/firecrawl/firecrawl_search_api.py +76 -0
lfx_firecrawl/extension.json +16 -0
lfx_firecrawl-0.1.0.dist-info/METADATA +44 -0
lfx_firecrawl-0.1.0.dist-info/RECORD +11 -0
lfx_firecrawl-0.1.0.dist-info/WHEEL +4 -0
lfx_firecrawl-0.1.0.dist-info/entry_points.txt +2 -0

lfx_firecrawl/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""lfx-firecrawl: Firecrawl bundle.
+Distribution unit ``lfx-firecrawl``.  At runtime Langflow's loader
+discovers ``extension.json`` shipped alongside this ``__init__.py`` and
+registers the bundle's components under the namespaced IDs
+``ext:firecrawl:<Class>@official``.
+"""
+from lfx_firecrawl.components.firecrawl.firecrawl_crawl_api import FirecrawlCrawlApi
+from lfx_firecrawl.components.firecrawl.firecrawl_map_api import FirecrawlMapApi
+from lfx_firecrawl.components.firecrawl.firecrawl_scrape_api import FirecrawlScrapeApi
+from lfx_firecrawl.components.firecrawl.firecrawl_search_api import FirecrawlSearchApi
+__all__ = ["FirecrawlCrawlApi", "FirecrawlMapApi", "FirecrawlScrapeApi", "FirecrawlSearchApi"]

lfx_firecrawl/components/firecrawl/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""Component re-exports for the ``firecrawl`` bundle.
+Saved-flow migration entries that target ``lfx.components.firecrawl.<Class>``
+resolve through this package, so the moved Component class(es) must be
+importable from here by name.
+"""
+from .firecrawl_crawl_api import FirecrawlCrawlApi
+from .firecrawl_map_api import FirecrawlMapApi
+from .firecrawl_scrape_api import FirecrawlScrapeApi
+from .firecrawl_search_api import FirecrawlSearchApi
+__all__ = ["FirecrawlCrawlApi", "FirecrawlMapApi", "FirecrawlScrapeApi", "FirecrawlSearchApi"]

lfx_firecrawl/components/firecrawl/firecrawl_crawl_api.py ADDED Viewed

@@ -0,0 +1,109 @@
+import re
+from lfx.custom.custom_component.component import Component
+from lfx.io import DataInput, IntInput, MultilineInput, Output, SecretStrInput, StrInput
+from lfx.schema.data import Data
+_CAMEL_TO_SNAKE_RE = re.compile(r"(?<!^)(?=[A-Z])")
+def _to_snake_case_kwargs(params: dict) -> dict:
+    """Convert camelCase option keys to snake_case keyword arguments.
+    The firecrawl-py v1 convention uses camelCase, while the v2 SDK expects
+    snake_case keyword arguments. Keys that are already snake_case are passed
+    through unchanged.
+    """
+    return {_CAMEL_TO_SNAKE_RE.sub("_", key).lower(): value for key, value in params.items()}
+class FirecrawlCrawlApi(Component):
+    display_name: str = "Firecrawl Crawl API"
+    description: str = "Crawls a URL and returns the results."
+    name = "FirecrawlCrawlApi"
+    documentation: str = "https://docs.firecrawl.dev/v1/api-reference/endpoint/crawl-post"
+    inputs = [
+        SecretStrInput(
+            name="api_key",
+            display_name="Firecrawl API Key",
+            required=True,
+            password=True,
+            info="The API key to use Firecrawl API.",
+        ),
+        MultilineInput(
+            name="url",
+            display_name="URL",
+            required=True,
+            info="The URL to scrape.",
+            tool_mode=True,
+        ),
+        IntInput(
+            name="timeout",
+            display_name="Timeout",
+            info="Timeout in milliseconds for the request.",
+        ),
+        StrInput(
+            name="idempotency_key",
+            display_name="Idempotency Key",
+            info="Optional idempotency key to ensure unique requests.",
+        ),
+        DataInput(
+            name="crawlerOptions",
+            display_name="Crawler Options",
+            info="The crawler options to send with the request.",
+        ),
+        DataInput(
+            name="scrapeOptions",
+            display_name="Scrape Options",
+            info="The page options to send with the request.",
+        ),
+    ]
+    outputs = [
+        Output(display_name="JSON", name="data", method="crawl"),
+    ]
+    idempotency_key: str | None = None
+    def crawl(self) -> Data:
+        try:
+            from firecrawl import Firecrawl
+            from firecrawl.v2.types import ScrapeOptions
+        except ImportError as e:
+            msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
+            raise ImportError(msg) from e
+        params = dict(self.crawlerOptions.__dict__.get("data", {})) if self.crawlerOptions else {}
+        scrape_options_dict = dict(self.scrapeOptions.__dict__.get("data", {})) if self.scrapeOptions else {}
+        # Set default values for crawl parameters.
+        # Note: firecrawl-py v2 renamed several options. "maxDepth" -> "max_discovery_depth"
+        # and "allowBackwardLinks" -> "crawl_entire_domain".
+        params.setdefault("maxDepth", 2)
+        params.setdefault("limit", 10000)
+        params.setdefault("allowExternalLinks", False)
+        params.setdefault("allowBackwardLinks", False)
+        params.setdefault("ignoreQueryParameters", False)
+        # Ensure onlyMainContent is explicitly set if not provided.
+        scrape_options_dict.setdefault("onlyMainContent", True)
+        # Translate legacy v1 camelCase option names to the firecrawl-py v2 keyword args.
+        kwargs = _to_snake_case_kwargs(params)
+        if "max_depth" in kwargs:
+            kwargs["max_discovery_depth"] = kwargs.pop("max_depth")
+        if "allow_backward_links" in kwargs:
+            kwargs["crawl_entire_domain"] = kwargs.pop("allow_backward_links")
+        # v2 removed "ignore_sitemap"; it is now the "sitemap" mode enum.
+        if kwargs.pop("ignore_sitemap", False):
+            kwargs["sitemap"] = "skip"
+        # Build the typed ScrapeOptions object expected by v2 from the (snake_cased) dict.
+        scrape_kwargs = _to_snake_case_kwargs(scrape_options_dict)
+        kwargs["scrape_options"] = ScrapeOptions(**scrape_kwargs)
+        app = Firecrawl(api_key=self.api_key)
+        # v2 polls to completion and returns a typed CrawlJob object.
+        crawl_job = app.crawl(self.url, **kwargs)
+        return Data(data={"results": crawl_job.model_dump()})

lfx_firecrawl/components/firecrawl/firecrawl_map_api.py ADDED Viewed

@@ -0,0 +1,94 @@
+from lfx.custom.custom_component.component import Component
+from lfx.io import (
+    BoolInput,
+    MultilineInput,
+    Output,
+    SecretStrInput,
+)
+from lfx.schema.data import Data
+class FirecrawlMapApi(Component):
+    display_name: str = "Firecrawl Map API"
+    description: str = "Maps a URL and returns the results."
+    name = "FirecrawlMapApi"
+    documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/map"
+    inputs = [
+        SecretStrInput(
+            name="api_key",
+            display_name="Firecrawl API Key",
+            required=True,
+            password=True,
+            info="The API key to use Firecrawl API.",
+        ),
+        MultilineInput(
+            name="urls",
+            display_name="URLs",
+            required=True,
+            info="List of URLs to create maps from (separated by commas or new lines).",
+            tool_mode=True,
+        ),
+        BoolInput(
+            name="ignore_sitemap",
+            display_name="Ignore Sitemap",
+            info="When true, the sitemap.xml file will be ignored during crawling.",
+        ),
+        BoolInput(
+            name="sitemap_only",
+            display_name="Sitemap Only",
+            info="When true, only links found in the sitemap will be returned.",
+        ),
+        BoolInput(
+            name="include_subdomains",
+            display_name="Include Subdomains",
+            info="When true, subdomains of the provided URL will also be scanned.",
+        ),
+    ]
+    outputs = [
+        Output(display_name="JSON", name="data", method="map"),
+    ]
+    def map(self) -> Data:
+        try:
+            from firecrawl import Firecrawl
+        except ImportError as e:
+            msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
+            raise ImportError(msg) from e
+        # Validate URLs
+        if not self.urls:
+            msg = "URLs are required"
+            raise ValueError(msg)
+        # Split and validate URLs (handle both commas and newlines)
+        urls = [url.strip() for url in self.urls.replace("\n", ",").split(",") if url.strip()]
+        if not urls:
+            msg = "No valid URLs provided"
+            raise ValueError(msg)
+        # firecrawl-py v2 replaced the separate "ignoreSitemap"/"sitemapOnly" flags with a
+        # single "sitemap" mode: "only" (sitemap only), "skip" (ignore sitemap), or the
+        # default mixed behavior.
+        kwargs: dict = {"include_subdomains": self.include_subdomains}
+        if self.sitemap_only:
+            kwargs["sitemap"] = "only"
+        elif self.ignore_sitemap:
+            kwargs["sitemap"] = "skip"
+        app = Firecrawl(api_key=self.api_key)
+        # Map all provided URLs and combine results.
+        combined_links = []
+        for url in urls:
+            result = app.map(url, **kwargs)
+            # v2 returns a typed MapData object whose .links is a list of typed
+            # SearchResult objects; serialize each to a dict for downstream consumers.
+            links = getattr(result, "links", None) or []
+            combined_links.extend(link.model_dump() if hasattr(link, "model_dump") else link for link in links)
+        map_result = {"success": True, "links": combined_links}
+        return Data(data=map_result)

lfx_firecrawl/components/firecrawl/firecrawl_scrape_api.py ADDED Viewed

@@ -0,0 +1,97 @@
+import re
+from lfx.custom.custom_component.component import Component
+from lfx.io import (
+    DataInput,
+    IntInput,
+    MultilineInput,
+    Output,
+    SecretStrInput,
+)
+from lfx.schema.data import Data
+_CAMEL_TO_SNAKE_RE = re.compile(r"(?<!^)(?=[A-Z])")
+def _to_snake_case_kwargs(params: dict) -> dict:
+    """Convert camelCase option keys to snake_case keyword arguments.
+    The firecrawl-py v1 convention uses camelCase, while the v2 SDK expects
+    snake_case keyword arguments. Keys that are already snake_case are passed
+    through unchanged.
+    """
+    return {_CAMEL_TO_SNAKE_RE.sub("_", key).lower(): value for key, value in params.items()}
+class FirecrawlScrapeApi(Component):
+    display_name: str = "Firecrawl Scrape API"
+    description: str = "Scrapes a URL and returns the results."
+    name = "FirecrawlScrapeApi"
+    documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/scrape"
+    inputs = [
+        SecretStrInput(
+            name="api_key",
+            display_name="Firecrawl API Key",
+            required=True,
+            password=True,
+            info="The API key to use Firecrawl API.",
+        ),
+        MultilineInput(
+            name="url",
+            display_name="URL",
+            required=True,
+            info="The URL to scrape.",
+            tool_mode=True,
+        ),
+        IntInput(
+            name="timeout",
+            display_name="Timeout",
+            info="Timeout in milliseconds for the request.",
+        ),
+        DataInput(
+            name="scrapeOptions",
+            display_name="Scrape Options",
+            info="The page options to send with the request.",
+        ),
+        DataInput(
+            name="extractorOptions",
+            display_name="Extractor Options",
+            info="The extractor options to send with the request.",
+        ),
+    ]
+    outputs = [
+        Output(display_name="JSON", name="data", method="scrape"),
+    ]
+    def scrape(self) -> Data:
+        try:
+            from firecrawl import Firecrawl
+        except ImportError as e:
+            msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
+            raise ImportError(msg) from e
+        params = dict(self.scrapeOptions.__dict__.get("data", {})) if self.scrapeOptions else {}
+        # Set default values for parameters
+        params.setdefault("formats", ["markdown"])  # Default output format
+        params.setdefault("onlyMainContent", True)  # Default to only main content
+        if self.timeout:
+            params.setdefault("timeout", self.timeout)
+        kwargs = _to_snake_case_kwargs(params)
+        # In firecrawl-py v2, structured extraction is requested via a "json" format entry
+        # on the scrape call rather than a separate extractor option.
+        extractor_options_dict = self.extractorOptions.__dict__.get("data", {}) if self.extractorOptions else {}
+        if extractor_options_dict:
+            formats = list(kwargs.get("formats", []))
+            formats.append({"type": "json", **_to_snake_case_kwargs(extractor_options_dict)})
+            kwargs["formats"] = formats
+        app = Firecrawl(api_key=self.api_key)
+        document = app.scrape(self.url, **kwargs)
+        # v2 returns a typed Document object; serialize to a dict for downstream consumers.
+        return Data(data=document.model_dump())

lfx_firecrawl/components/firecrawl/firecrawl_search_api.py ADDED Viewed

@@ -0,0 +1,76 @@
+from lfx.custom.custom_component.component import Component
+from lfx.io import (
+    IntInput,
+    MultilineInput,
+    Output,
+    SecretStrInput,
+    StrInput,
+)
+from lfx.schema.data import Data
+class FirecrawlSearchApi(Component):
+    display_name: str = "Firecrawl Search API"
+    description: str = "Searches the web and returns the results."
+    name = "FirecrawlSearchApi"
+    documentation: str = "https://docs.firecrawl.dev/api-reference/endpoint/search"
+    inputs = [
+        SecretStrInput(
+            name="api_key",
+            display_name="Firecrawl API Key",
+            required=True,
+            password=True,
+            info="The API key to use Firecrawl API.",
+        ),
+        MultilineInput(
+            name="query",
+            display_name="Query",
+            required=True,
+            info="The search query to run.",
+            tool_mode=True,
+        ),
+        IntInput(
+            name="limit",
+            display_name="Limit",
+            info="Maximum number of results to return.",
+            value=5,
+        ),
+        StrInput(
+            name="location",
+            display_name="Location",
+            info="Location to bias the search results (e.g. a country or region).",
+            advanced=True,
+        ),
+    ]
+    outputs = [
+        Output(display_name="JSON", name="data", method="search"),
+    ]
+    def search(self) -> Data:
+        try:
+            from firecrawl import Firecrawl
+        except ImportError as e:
+            msg = "Could not import firecrawl integration package. Please install it with `pip install firecrawl-py`."
+            raise ImportError(msg) from e
+        if not self.query:
+            msg = "Query is required"
+            raise ValueError(msg)
+        kwargs: dict = {}
+        if self.limit:
+            kwargs["limit"] = self.limit
+        if self.location:
+            kwargs["location"] = self.location
+        app = Firecrawl(api_key=self.api_key)
+        result = app.search(self.query, **kwargs)
+        # v2 returns a typed SearchData object (results grouped by source);
+        # serialize to a dict for downstream consumers.
+        search_result = result.model_dump() if hasattr(result, "model_dump") else result
+        return Data(data=search_result)

lfx_firecrawl/extension.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "$schema": "https://schemas.langflow.org/extension/v1.json",
+  "id": "lfx-firecrawl",
+  "version": "0.1.0",
+  "name": "Firecrawl",
+  "description": "Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone Langflow Extension Bundle.",
+  "lfx": {
+    "compat": ["1"]
+  },
+  "bundles": [
+    {
+      "name": "firecrawl",
+      "path": "components/firecrawl"
+    }
+  ]
+}

lfx_firecrawl-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,44 @@
+Metadata-Version: 2.4
+Name: lfx-firecrawl
+Version: 0.1.0
+Summary: Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone Langflow Extension Bundle.
+Project-URL: Homepage, https://github.com/langflow-ai/langflow
+Project-URL: Documentation, https://docs.langflow.org/extensions
+Project-URL: Repository, https://github.com/langflow-ai/langflow
+Author-email: Langflow <contact@langflow.org>
+License: MIT
+Keywords: bundle,extension,firecrawl,langflow,lfx
+Requires-Python: <3.15,>=3.10
+Requires-Dist: firecrawl-py<5.0.0,>=4.0.0
+Requires-Dist: lfx<2.0.0,>=1.11.0.dev0
+Description-Content-Type: text/markdown
+# lfx-firecrawl
+Firecrawl components (Scrape, Crawl, Map, and Search APIs) as a standalone
+Langflow Extension Bundle, built against the firecrawl-py v2 SDK.
+## Install
+```bash
+pip install lfx-firecrawl
+```
+The bundle is registered automatically via the `langflow.extensions`
+entry-point.  After install, restart your Langflow server; the bundle's
+components will appear in the palette under the `firecrawl` group.
+## Develop
+```bash
+cd src/bundles/firecrawl
+pip install -e .
+lfx extension validate src/lfx_firecrawl
+```
+## Migration
+Saved flows referencing the legacy class name(s) or the old import paths
+under `lfx.components.firecrawl.*` are rewritten to the new namespaced
+IDs by the migration table in
+`src/lfx/src/lfx/extension/migration/migration_table.json`.

lfx_firecrawl-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,11 @@
+lfx_firecrawl/__init__.py,sha256=LSgReguH1y01WCNbQ-moqIknCDnT1ShNoV2WQrAHZNM,712
+lfx_firecrawl/extension.json,sha256=SrOl7qR09aEmJ-MiXC_QjhDjpIhzKQ3v9033f9fsKGs,390
+lfx_firecrawl/components/firecrawl/__init__.py,sha256=r22GFArvunu6fi1Ed-AUxmITGEMLy9UU7OXU6L-AGAo,539
+lfx_firecrawl/components/firecrawl/firecrawl_crawl_api.py,sha256=EwGDi68PU7ePuJhAgXhXZJQ4xAn2e5ph95-2uTDHDA0,4266
+lfx_firecrawl/components/firecrawl/firecrawl_map_api.py,sha256=VqI_MTVJLbYaS_t2mS8ELOKfcBpxfYXYNzXqmM7iqQE,3304
+lfx_firecrawl/components/firecrawl/firecrawl_scrape_api.py,sha256=ICSPl8JIXs4vKpaLE2AMlVvnpjiLuds9ciCPJlQB09M,3395
+lfx_firecrawl/components/firecrawl/firecrawl_search_api.py,sha256=9syAHHJnAi21gE81zgwxewHc49Ik2mui4GTpEZ2Qwkk,2269
+lfx_firecrawl-0.1.0.dist-info/METADATA,sha256=70UFD_Rl7xKltzfwWtLd8Sg09GUIrqFqnp9TpsXVPgQ,1397
+lfx_firecrawl-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+lfx_firecrawl-0.1.0.dist-info/entry_points.txt,sha256=AVgggm-Bt6-8xHeIx_gzT305TJ1kcgMe7PX-TXkKZj0,52
+lfx_firecrawl-0.1.0.dist-info/RECORD,,

lfx_firecrawl-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

lfx_firecrawl-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [langflow.extensions]
2	+ lfx-firecrawl = lfx_firecrawl