intentkit 0.6.0.dev11__py3-none-any.whl → 0.6.0.dev12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of intentkit might be problematic. Click here for more details.

intentkit/__init__.py CHANGED
@@ -3,7 +3,7 @@
3
3
  A powerful platform for building AI agents with blockchain and cryptocurrency capabilities.
4
4
  """
5
5
 
6
- __version__ = "0.6.0-dev.11"
6
+ __version__ = "0.6.0-dev.12"
7
7
  __author__ = "hyacinthus"
8
8
  __email__ = "hyacinthus@gmail.com"
9
9
 
@@ -121,6 +121,7 @@ class Config:
121
121
  self.moralis_api_key = self.load("MORALIS_API_KEY")
122
122
  self.tavily_api_key = self.load("TAVILY_API_KEY")
123
123
  self.cookiefun_api_key = self.load("COOKIEFUN_API_KEY")
124
+ self.firecrawl_api_key = self.load("FIRECRAWL_API_KEY")
124
125
  # Sentry
125
126
  self.sentry_dsn = self.load("SENTRY_DSN")
126
127
  self.sentry_sample_rate = float(self.load("SENTRY_SAMPLE_RATE", "0.1"))
@@ -539,6 +539,10 @@
539
539
  "title": "Web Scraper & Content Indexing",
540
540
  "$ref": "../skills/web_scraper/schema.json"
541
541
  },
542
+ "firecrawl": {
543
+ "title": "Firecrawl Web Scraping",
544
+ "$ref": "../skills/firecrawl/schema.json"
545
+ },
542
546
  "aixbt": {
543
547
  "title": "AIXBT",
544
548
  "$ref": "../skills/aixbt/schema.json"
@@ -0,0 +1,178 @@
1
+ # Firecrawl Skills
2
+
3
+ The Firecrawl skills provide advanced web scraping and content indexing capabilities using the Firecrawl API. These skills can handle JavaScript-heavy websites, PDFs, and provide automatic content indexing for intelligent querying.
4
+
5
+ ## Skills Overview
6
+
7
+ ### 1. firecrawl_scrape
8
+ Scrapes a single webpage and optionally indexes the content for future querying.
9
+
10
+ **Parameters:**
11
+ - `url` (required): The URL to scrape
12
+ - `formats` (optional): Output formats - markdown, html, rawHtml, screenshot, links, extract (default: ["markdown"])
13
+ - `include_tags` (optional): HTML tags to include (e.g., ["h1", "h2", "p"])
14
+ - `exclude_tags` (optional): HTML tags to exclude
15
+ - `only_main_content` (optional): Extract only main content (default: true)
16
+ - `index_content` (optional): Whether to index content for querying (default: true)
17
+ - `chunk_size` (optional): Size of text chunks for indexing (default: 1000)
18
+ - `chunk_overlap` (optional): Overlap between chunks (default: 200)
19
+
20
+ ### 2. firecrawl_crawl
21
+ Crawls multiple pages from a website and indexes all content.
22
+
23
+ **Parameters:**
24
+ - `url` (required): The base URL to start crawling
25
+ - `include_paths` (optional): URL patterns to include (e.g., ["/docs/*"])
26
+ - `exclude_paths` (optional): URL patterns to exclude
27
+ - `max_depth` (optional): Maximum crawl depth (default: 2)
28
+ - `limit` (optional): Maximum number of pages to crawl (default: 5)
29
+ - `index_content` (optional): Whether to index content for querying (default: true)
30
+ - `chunk_size` (optional): Size of text chunks for indexing (default: 1000)
31
+ - `chunk_overlap` (optional): Overlap between chunks (default: 200)
32
+
33
+ ### 3. firecrawl_query_indexed_content
34
+ Queries previously indexed Firecrawl content using semantic search.
35
+
36
+ **Parameters:**
37
+ - `query` (required): The search query
38
+ - `limit` (optional): Maximum number of results to return (1-10, default: 4)
39
+
40
+ ## API Key Configuration
41
+ Set your Firecrawl API key as an environment variable:
42
+ ```bash
43
+ export FIRECRAWL_API_KEY=fc-your-api-key-here
44
+ ```
45
+
46
+ ## Testing Instructions
47
+
48
+ ### Step 1: Create an Agent with Firecrawl Skills
49
+
50
+ 1. **Create a new agent** via the API or UI with the following skills:
51
+ ```json
52
+ {
53
+ "skills": [
54
+ "firecrawl_scrape",
55
+ "firecrawl_crawl",
56
+ "firecrawl_query_indexed_content"
57
+ ]
58
+ }
59
+ ```
60
+
61
+ 2. **Note the agent ID** for testing
62
+
63
+ ### Step 2: Test Single Page Scraping
64
+
65
+ **Test scraping a documentation homepage:**
66
+ ```
67
+ Prompt: "Use firecrawl_scrape to scrape https://docs.joincommonwealth.xyz/ and index the content for future querying"
68
+ ```
69
+
70
+ **Expected Result:**
71
+ - Content successfully scraped
72
+ - Content automatically indexed with metadata
73
+ - Confirmation of chunk creation and indexing
74
+
75
+ ### Step 3: Test Content Crawling
76
+
77
+ **Test crawling multiple pages:**
78
+ ```
79
+ Prompt: "Use firecrawl_crawl to crawl https://docs.joincommonwealth.xyz/ with max_depth=2 and limit=3 to index multiple documentation pages"
80
+ ```
81
+
82
+ **Expected Result:**
83
+ - Multiple pages crawled and scraped
84
+ - Each page indexed separately
85
+ - Batch processing confirmation
86
+
87
+ ### Step 4: Test Content Querying
88
+
89
+ **Test querying indexed content:**
90
+ ```
91
+ Prompt: "Use firecrawl_query_indexed_content to search for 'What is All Street and what is its purpose?' in the indexed content"
92
+ ```
93
+
94
+ **Expected Result:**
95
+ - Relevant content retrieved from indexed documents
96
+ - Results tagged with [Firecrawl Scrape] or [Firecrawl Crawl]
97
+ - Source URLs and metadata included
98
+
99
+ ### Step 5: Test Advanced Scraping Options
100
+
101
+ **Test with specific formatting:**
102
+ ```
103
+ Prompt: "Use firecrawl_scrape to scrape https://docs.joincommonwealth.xyz/all-street-manifesto with formats=['markdown', 'html'] and include_tags=['h1', 'h2', 'p'] and index_content=true"
104
+ ```
105
+
106
+ **Expected Result:**
107
+ - Content in both markdown and HTML formats
108
+ - Only specified HTML tags included
109
+ - Content indexed for querying
110
+
111
+ ### Step 6: Test Multiple Queries
112
+
113
+ **Test different query types:**
114
+ ```
115
+ Prompt: "Use firecrawl_query_indexed_content to search for 'democratize finance' in the indexed content"
116
+ ```
117
+
118
+ **Expected Result:**
119
+ - Relevant content retrieved from Firecrawl's independent vector store
120
+ - Results tagged with [Firecrawl Scrape] or [Firecrawl Crawl]
121
+ - Source URLs and metadata included
122
+
123
+ ## Common Use Cases
124
+
125
+ ### Documentation Indexing
126
+ ```
127
+ 1. Scrape main documentation page
128
+ 2. Crawl related documentation sections
129
+ 3. Query for specific technical information
130
+ ```
131
+
132
+ ### Competitive Analysis
133
+ ```
134
+ 1. Scrape competitor websites
135
+ 2. Index product information and features
136
+ 3. Query for specific comparisons
137
+ ```
138
+
139
+ ### Research and Knowledge Base
140
+ ```
141
+ 1. Crawl research papers or articles
142
+ 2. Index academic or technical content
143
+ 3. Query for specific concepts or methodologies
144
+ ```
145
+
146
+ ## Troubleshooting
147
+
148
+ ### Common Issues
149
+
150
+ 1. **API Key Not Found**
151
+ - Ensure `FIRECRAWL_API_KEY` environment variable is set
152
+ - Restart the IntentKit server after setting the key
153
+
154
+ 2. **Scraping Failures**
155
+ - Check if the URL is accessible
156
+ - Verify Firecrawl API quota and limits
157
+ - Some websites may block scraping
158
+
159
+ 3. **Indexing Errors**
160
+ - Ensure OpenAI API key is configured for embeddings
161
+ - Check if content is too large for processing
162
+ - Verify vector store permissions
163
+
164
+ 4. **Query Returns No Results**
165
+ - Ensure content was successfully indexed
166
+ - Try broader or different search terms
167
+ - Check if vector store contains data
168
+
169
+ ## Features and Benefits
170
+
171
+ - **JavaScript Rendering**: Handles SPAs and dynamic content
172
+ - **PDF Support**: Can scrape and index PDF documents
173
+ - **Intelligent Chunking**: Optimized text splitting for better search
174
+ - **Independent Storage**: Uses its own dedicated vector store for Firecrawl content
175
+ - **Metadata Rich**: Includes source URLs, timestamps, and content types
176
+ - **Semantic Search**: Uses OpenAI embeddings for intelligent querying
177
+ - **Batch Processing**: Efficient handling of multiple pages
178
+ - **Content Filtering**: Flexible include/exclude options for targeted scraping
@@ -0,0 +1,99 @@
1
+ """Firecrawl skills for web scraping and crawling."""
2
+
3
+ import logging
4
+ from typing import TypedDict
5
+
6
+ from intentkit.abstracts.skill import SkillStoreABC
7
+ from intentkit.skills.base import SkillConfig, SkillState
8
+ from intentkit.skills.firecrawl.base import FirecrawlBaseTool
9
+ from intentkit.skills.firecrawl.crawl import FirecrawlCrawl
10
+ from intentkit.skills.firecrawl.query import FirecrawlQueryIndexedContent
11
+ from intentkit.skills.firecrawl.scrape import FirecrawlScrape
12
+
13
+ # Cache skills at the system level, because they are stateless
14
+ _cache: dict[str, FirecrawlBaseTool] = {}
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SkillStates(TypedDict):
20
+ firecrawl_scrape: SkillState
21
+ firecrawl_crawl: SkillState
22
+ firecrawl_query_indexed_content: SkillState
23
+
24
+
25
+ class Config(SkillConfig):
26
+ """Configuration for Firecrawl skills."""
27
+
28
+ states: SkillStates
29
+ api_key: str = ""
30
+ api_key_provider: str = "agent_owner"
31
+ rate_limit_number: int = 100
32
+ rate_limit_minutes: int = 60
33
+
34
+
35
+ async def get_skills(
36
+ config: "Config",
37
+ is_private: bool,
38
+ store: SkillStoreABC,
39
+ **_,
40
+ ) -> list[FirecrawlBaseTool]:
41
+ """Get all Firecrawl skills.
42
+
43
+ Args:
44
+ config: The configuration for Firecrawl skills.
45
+ is_private: Whether to include private skills.
46
+ store: The skill store for persisting data.
47
+
48
+ Returns:
49
+ A list of Firecrawl skills.
50
+ """
51
+ available_skills = []
52
+
53
+ # Include skills based on their state
54
+ for skill_name, state in config["states"].items():
55
+ if state == "disabled":
56
+ continue
57
+ elif state == "public" or (state == "private" and is_private):
58
+ available_skills.append(skill_name)
59
+
60
+ # Get each skill using the cached getter
61
+ return [get_firecrawl_skill(name, store) for name in available_skills]
62
+
63
+
64
+ def get_firecrawl_skill(
65
+ name: str,
66
+ store: SkillStoreABC,
67
+ ) -> FirecrawlBaseTool:
68
+ """Get a Firecrawl skill by name.
69
+
70
+ Args:
71
+ name: The name of the skill to get
72
+ store: The skill store for persisting data
73
+
74
+ Returns:
75
+ The requested Firecrawl skill
76
+
77
+ Raises:
78
+ ValueError: If the skill name is unknown
79
+ """
80
+ if name == "firecrawl_scrape":
81
+ if name not in _cache:
82
+ _cache[name] = FirecrawlScrape(
83
+ skill_store=store,
84
+ )
85
+ return _cache[name]
86
+ elif name == "firecrawl_crawl":
87
+ if name not in _cache:
88
+ _cache[name] = FirecrawlCrawl(
89
+ skill_store=store,
90
+ )
91
+ return _cache[name]
92
+ elif name == "firecrawl_query_indexed_content":
93
+ if name not in _cache:
94
+ _cache[name] = FirecrawlQueryIndexedContent(
95
+ skill_store=store,
96
+ )
97
+ return _cache[name]
98
+ else:
99
+ raise ValueError(f"Unknown Firecrawl skill: {name}")
@@ -0,0 +1,28 @@
1
+ from typing import Type
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from intentkit.abstracts.skill import SkillStoreABC
6
+ from intentkit.skills.base import IntentKitSkill, SkillContext
7
+
8
+
9
+ class FirecrawlBaseTool(IntentKitSkill):
10
+ """Base class for Firecrawl tools."""
11
+
12
+ name: str = Field(description="The name of the tool")
13
+ description: str = Field(description="A description of what the tool does")
14
+ args_schema: Type[BaseModel]
15
+ skill_store: SkillStoreABC = Field(
16
+ description="The skill store for persisting data"
17
+ )
18
+
19
+ def get_api_key(self, context: SkillContext) -> str:
20
+ """Get the Firecrawl API key from configuration."""
21
+ skill_config = context.config
22
+ if skill_config.get("api_key_provider") == "agent_owner":
23
+ return skill_config.get("api_key")
24
+ return self.skill_store.get_system_config("firecrawl_api_key")
25
+
26
+ @property
27
+ def category(self) -> str:
28
+ return "firecrawl"