intentkit 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of intentkit might be problematic. Click here for more details.

Files changed (94) hide show
  1. intentkit/__init__.py +1 -1
  2. intentkit/abstracts/skill.py +12 -0
  3. intentkit/clients/cdp.py +114 -16
  4. intentkit/config/config.py +12 -4
  5. intentkit/core/engine.py +39 -31
  6. intentkit/core/node.py +8 -4
  7. intentkit/core/prompt.py +5 -6
  8. intentkit/core/skill.py +11 -0
  9. intentkit/models/agent.py +2 -9
  10. intentkit/models/agent_data.py +18 -0
  11. intentkit/models/agent_schema.json +12 -0
  12. intentkit/models/chat.py +50 -0
  13. intentkit/models/skill.py +19 -0
  14. intentkit/skills/base.py +37 -17
  15. intentkit/skills/cdp/__init__.py +6 -14
  16. intentkit/skills/cdp/get_balance.py +77 -25
  17. intentkit/skills/cdp/schema.json +0 -64
  18. intentkit/skills/cryptocompare/fetch_news.py +2 -2
  19. intentkit/skills/cryptocompare/fetch_price.py +2 -2
  20. intentkit/skills/cryptocompare/fetch_top_exchanges.py +2 -2
  21. intentkit/skills/cryptocompare/fetch_top_market_cap.py +2 -2
  22. intentkit/skills/cryptocompare/fetch_top_volume.py +2 -2
  23. intentkit/skills/cryptocompare/fetch_trading_signals.py +2 -2
  24. intentkit/skills/defillama/base.py +3 -3
  25. intentkit/skills/enso/base.py +27 -4
  26. intentkit/skills/enso/networks.py +1 -1
  27. intentkit/skills/enso/route.py +24 -23
  28. intentkit/skills/enso/tokens.py +1 -1
  29. intentkit/skills/enso/wallet.py +27 -23
  30. intentkit/skills/firecrawl/README.md +211 -0
  31. intentkit/skills/firecrawl/__init__.py +107 -0
  32. intentkit/skills/firecrawl/base.py +28 -0
  33. intentkit/skills/firecrawl/clear.py +87 -0
  34. intentkit/skills/firecrawl/crawl.py +399 -0
  35. intentkit/skills/firecrawl/firecrawl.png +0 -0
  36. intentkit/skills/firecrawl/query.py +123 -0
  37. intentkit/skills/firecrawl/schema.json +153 -0
  38. intentkit/skills/firecrawl/scrape.py +318 -0
  39. intentkit/skills/firecrawl/utils.py +306 -0
  40. intentkit/skills/heurist/image_generation_animagine_xl.py +1 -1
  41. intentkit/skills/heurist/image_generation_arthemy_comics.py +1 -1
  42. intentkit/skills/heurist/image_generation_arthemy_real.py +1 -1
  43. intentkit/skills/heurist/image_generation_braindance.py +1 -1
  44. intentkit/skills/heurist/image_generation_cyber_realistic_xl.py +1 -1
  45. intentkit/skills/heurist/image_generation_flux_1_dev.py +1 -1
  46. intentkit/skills/heurist/image_generation_sdxl.py +1 -1
  47. intentkit/skills/http/README.md +78 -0
  48. intentkit/skills/http/__init__.py +100 -0
  49. intentkit/skills/http/base.py +21 -0
  50. intentkit/skills/http/get.py +96 -0
  51. intentkit/skills/http/http.svg +15 -0
  52. intentkit/skills/http/post.py +113 -0
  53. intentkit/skills/http/put.py +113 -0
  54. intentkit/skills/http/schema.json +80 -0
  55. intentkit/skills/lifi/token_execute.py +1 -1
  56. intentkit/skills/openai/dalle_image_generation.py +1 -1
  57. intentkit/skills/openai/gpt_image_generation.py +1 -1
  58. intentkit/skills/openai/gpt_image_to_image.py +1 -1
  59. intentkit/skills/supabase/__init__.py +116 -0
  60. intentkit/skills/supabase/base.py +72 -0
  61. intentkit/skills/supabase/delete_data.py +102 -0
  62. intentkit/skills/supabase/fetch_data.py +120 -0
  63. intentkit/skills/supabase/insert_data.py +70 -0
  64. intentkit/skills/supabase/invoke_function.py +74 -0
  65. intentkit/skills/supabase/schema.json +170 -0
  66. intentkit/skills/supabase/supabase.svg +15 -0
  67. intentkit/skills/supabase/update_data.py +105 -0
  68. intentkit/skills/supabase/upsert_data.py +77 -0
  69. intentkit/skills/system/read_agent_api_key.py +1 -1
  70. intentkit/skills/system/regenerate_agent_api_key.py +1 -1
  71. intentkit/skills/token/base.py +1 -39
  72. intentkit/skills/twitter/follow_user.py +3 -3
  73. intentkit/skills/twitter/get_mentions.py +6 -6
  74. intentkit/skills/twitter/get_timeline.py +5 -5
  75. intentkit/skills/twitter/get_user_by_username.py +3 -3
  76. intentkit/skills/twitter/get_user_tweets.py +5 -5
  77. intentkit/skills/twitter/like_tweet.py +3 -3
  78. intentkit/skills/twitter/post_tweet.py +4 -4
  79. intentkit/skills/twitter/reply_tweet.py +4 -4
  80. intentkit/skills/twitter/retweet.py +3 -3
  81. intentkit/skills/twitter/search_tweets.py +5 -5
  82. intentkit/skills/unrealspeech/text_to_speech.py +1 -1
  83. intentkit/skills/web_scraper/README.md +35 -4
  84. intentkit/skills/web_scraper/__init__.py +16 -0
  85. intentkit/skills/web_scraper/document_indexer.py +143 -0
  86. intentkit/skills/web_scraper/schema.json +28 -0
  87. intentkit/skills/web_scraper/scrape_and_index.py +135 -200
  88. intentkit/skills/web_scraper/utils.py +684 -0
  89. intentkit/skills/web_scraper/website_indexer.py +456 -0
  90. intentkit/utils/logging.py +1 -1
  91. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/METADATA +1 -1
  92. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/RECORD +94 -63
  93. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/WHEEL +0 -0
  94. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,19 +1,18 @@
1
- import asyncio
2
- import base64
3
1
  import logging
4
- import os
5
- import tempfile
6
2
  from typing import List, Type
7
- from urllib.parse import urlparse
8
3
 
9
- from langchain_community.document_loaders import WebBaseLoader
10
- from langchain_community.vectorstores import FAISS
11
4
  from langchain_core.runnables import RunnableConfig
12
- from langchain_openai import OpenAIEmbeddings
13
- from langchain_text_splitters import RecursiveCharacterTextSplitter
14
5
  from pydantic import BaseModel, Field
15
6
 
16
7
  from intentkit.skills.web_scraper.base import WebScraperBaseTool
8
+ from intentkit.skills.web_scraper.utils import (
9
+ DEFAULT_CHUNK_OVERLAP,
10
+ DEFAULT_CHUNK_SIZE,
11
+ MetadataManager,
12
+ ResponseFormatter,
13
+ VectorStoreManager,
14
+ scrape_and_index_urls,
15
+ )
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
@@ -24,17 +23,17 @@ class ScrapeAndIndexInput(BaseModel):
24
23
  urls: List[str] = Field(
25
24
  description="List of URLs to scrape and index. Each URL should be a valid web address starting with http:// or https://",
26
25
  min_items=1,
27
- max_items=10,
26
+ max_items=25,
28
27
  )
29
28
  chunk_size: int = Field(
30
29
  description="Size of text chunks for indexing (default: 1000)",
31
- default=1000,
30
+ default=DEFAULT_CHUNK_SIZE,
32
31
  ge=100,
33
32
  le=4000,
34
33
  )
35
34
  chunk_overlap: int = Field(
36
35
  description="Overlap between chunks (default: 200)",
37
- default=200,
36
+ default=DEFAULT_CHUNK_OVERLAP,
38
37
  ge=0,
39
38
  le=1000,
40
39
  )
@@ -71,151 +70,92 @@ class ScrapeAndIndex(WebScraperBaseTool):
71
70
  )
72
71
  args_schema: Type[BaseModel] = ScrapeAndIndexInput
73
72
 
74
- def _validate_urls(self, urls: List[str]) -> List[str]:
75
- """Validate and filter URLs."""
76
- valid_urls = []
77
- for url in urls:
78
- try:
79
- parsed = urlparse(url)
80
- if parsed.scheme in ["http", "https"] and parsed.netloc:
81
- valid_urls.append(url)
82
- else:
83
- logger.warning(f"Invalid URL format: {url}")
84
- except Exception as e:
85
- logger.warning(f"Error parsing URL {url}: {e}")
86
- return valid_urls
87
-
88
73
  async def _arun(
89
74
  self,
90
75
  urls: List[str],
91
- chunk_size: int = 1000,
92
- chunk_overlap: int = 200,
76
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
77
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
93
78
  config: RunnableConfig = None,
94
79
  **kwargs,
95
80
  ) -> str:
96
81
  """Scrape URLs and index content into vector store."""
97
82
  try:
98
- # Validate URLs
99
- valid_urls = self._validate_urls(urls)
100
- if not valid_urls:
101
- return "Error: No valid URLs provided. URLs must start with http:// or https://"
83
+ # Get agent context - throw error if not available
84
+ if not config:
85
+ raise ValueError("Configuration is required but not provided")
102
86
 
103
- # Get agent context for storage
104
- context = self.context_from_config(config) if config else None
105
- agent_id = context.agent.id if context else "default"
87
+ context = self.context_from_config(config)
88
+ if not context or not context.agent_id:
89
+ raise ValueError("Agent ID is required but not found in configuration")
106
90
 
107
- # Load documents from URLs
108
- logger.info(f"Scraping {len(valid_urls)} URLs...")
109
- loader = WebBaseLoader(
110
- web_paths=valid_urls,
111
- requests_per_second=2, # Be respectful to servers
112
- show_progress=True,
113
- )
91
+ agent_id = context.agent_id
114
92
 
115
- # Configure loader for better content extraction
116
- loader.requests_kwargs = {
117
- "verify": True,
118
- "timeout": 30,
119
- }
120
-
121
- documents = await asyncio.to_thread(loader.load)
93
+ logger.info(
94
+ f"[{agent_id}] Starting scrape and index operation with {len(urls)} URLs"
95
+ )
122
96
 
123
- if not documents:
124
- return "Error: No content could be extracted from the provided URLs."
97
+ # Use the utility function to scrape and index URLs
98
+ total_chunks, was_merged, valid_urls = await scrape_and_index_urls(
99
+ urls, agent_id, self.skill_store, chunk_size, chunk_overlap
100
+ )
125
101
 
126
- # Split documents into chunks
127
- text_splitter = RecursiveCharacterTextSplitter(
128
- chunk_size=chunk_size,
129
- chunk_overlap=chunk_overlap,
130
- length_function=len,
102
+ logger.info(
103
+ f"[{agent_id}] Scraping completed: {total_chunks} chunks indexed, merged: {was_merged}"
131
104
  )
132
- split_docs = text_splitter.split_documents(documents)
133
105
 
134
- if not split_docs:
135
- return "Error: No content could be processed into chunks."
106
+ if not valid_urls:
107
+ logger.error(f"[{agent_id}] No valid URLs provided")
108
+ return "Error: No valid URLs provided. URLs must start with http:// or https://"
136
109
 
137
- # Create embeddings and vector store
138
- api_key = self.skill_store.get_system_config("openai_api_key")
139
- embeddings = OpenAIEmbeddings(api_key=api_key)
110
+ if total_chunks == 0:
111
+ logger.error(f"[{agent_id}] No content extracted from URLs")
112
+ return "Error: No content could be extracted from the provided URLs."
140
113
 
141
- # Create vector store
142
- vector_store = FAISS.from_documents(split_docs, embeddings)
114
+ # Get current storage size for response
115
+ vs_manager = VectorStoreManager(self.skill_store)
116
+ current_size = await vs_manager.get_content_size(agent_id)
117
+ size_limit_reached = len(valid_urls) < len(urls)
143
118
 
144
- # Store the vector store for this agent using a temporary directory
145
- vector_store_key = f"vector_store_{agent_id}"
146
- metadata_key = f"indexed_urls_{agent_id}"
147
-
148
- # Save vector store to temporary directory and encode to base64
149
- with tempfile.TemporaryDirectory() as temp_dir:
150
- vector_store.save_local(temp_dir)
151
-
152
- # Read and encode all files in the temporary directory
153
- encoded_files = {}
154
- for filename in os.listdir(temp_dir):
155
- file_path = os.path.join(temp_dir, filename)
156
- if os.path.isfile(file_path):
157
- with open(file_path, "rb") as f:
158
- encoded_files[filename] = base64.b64encode(f.read()).decode(
159
- "utf-8"
160
- )
161
-
162
- # Store vector store data
163
- await self.skill_store.save_agent_skill_data(
164
- agent_id=agent_id,
165
- skill="web_scraper",
166
- key=vector_store_key,
167
- data={
168
- "faiss_files": encoded_files,
169
- "chunk_size": chunk_size,
170
- "chunk_overlap": chunk_overlap,
171
- },
119
+ # Update metadata
120
+ metadata_manager = MetadataManager(self.skill_store)
121
+ new_metadata = metadata_manager.create_url_metadata(
122
+ valid_urls, [], "scrape_and_index"
172
123
  )
124
+ await metadata_manager.update_metadata(agent_id, new_metadata)
173
125
 
174
- # Store metadata about indexed URLs
175
- existing_metadata = (
176
- await self.skill_store.get_agent_skill_data(
177
- agent_id, "web_scraper", metadata_key
178
- )
179
- or {}
180
- )
181
- existing_metadata.update(
182
- {
183
- url: {
184
- "indexed_at": str(asyncio.get_event_loop().time()),
185
- "chunks": len(
186
- [
187
- doc
188
- for doc in split_docs
189
- if doc.metadata.get("source") == url
190
- ]
191
- ),
192
- }
193
- for url in valid_urls
194
- }
195
- )
126
+ logger.info(f"[{agent_id}] Metadata updated successfully")
196
127
 
197
- await self.skill_store.save_agent_skill_data(
198
- agent_id=agent_id,
199
- skill="web_scraper",
200
- key=metadata_key,
201
- data=existing_metadata,
128
+ # Format response
129
+ response = ResponseFormatter.format_indexing_response(
130
+ "scraped and indexed",
131
+ valid_urls,
132
+ total_chunks,
133
+ chunk_size,
134
+ chunk_overlap,
135
+ was_merged,
136
+ current_size_bytes=current_size,
137
+ size_limit_reached=size_limit_reached,
138
+ total_requested_urls=len(urls),
202
139
  )
203
140
 
204
- total_chunks = len(split_docs)
205
- successful_urls = len(valid_urls)
206
-
207
- return (
208
- f"Successfully scraped and indexed {successful_urls} URLs:\n"
209
- f"{'• ' + chr(10) + '• '.join(valid_urls)}\n\n"
210
- f"Total chunks created: {total_chunks}\n"
211
- f"Chunk size: {chunk_size} characters\n"
212
- f"Chunk overlap: {chunk_overlap} characters\n\n"
213
- f"The content is now indexed and can be queried using the query_indexed_content tool."
141
+ logger.info(
142
+ f"[{agent_id}] Scrape and index operation completed successfully"
214
143
  )
144
+ return response
215
145
 
216
146
  except Exception as e:
217
- logger.error(f"Error in scrape_and_index: {e}")
218
- return f"Error scraping and indexing URLs: {str(e)}"
147
+ # Extract agent_id for error logging if possible
148
+ agent_id = "UNKNOWN"
149
+ try:
150
+ if config:
151
+ context = self.context_from_config(config)
152
+ if context and context.agent_id:
153
+ agent_id = context.agent_id
154
+ except Exception:
155
+ pass
156
+
157
+ logger.error(f"[{agent_id}] Error in ScrapeAndIndex: {e}", exc_info=True)
158
+ raise type(e)(f"[agent:{agent_id}]: {e}") from e
219
159
 
220
160
 
221
161
  class QueryIndexedContent(WebScraperBaseTool):
@@ -242,86 +182,81 @@ class QueryIndexedContent(WebScraperBaseTool):
242
182
  ) -> str:
243
183
  """Query the indexed content."""
244
184
  try:
245
- # Get agent context for storage
246
- context = self.context_from_config(config) if config else None
247
- agent_id = context.agent.id if context else "default"
185
+ # Get agent context - throw error if not available
186
+ if not config:
187
+ raise ValueError("Configuration is required but not provided")
188
+
189
+ context = self.context_from_config(config)
190
+ if not context or not context.agent_id:
191
+ raise ValueError("Agent ID is required but not found in configuration")
192
+
193
+ agent_id = context.agent_id
194
+
195
+ logger.info(f"[{agent_id}] Starting query operation: '{query}'")
248
196
 
249
197
  # Retrieve vector store
250
198
  vector_store_key = f"vector_store_{agent_id}"
251
- metadata_key = f"indexed_urls_{agent_id}"
199
+
200
+ logger.info(f"[{agent_id}] Looking for vector store: {vector_store_key}")
252
201
 
253
202
  stored_data = await self.skill_store.get_agent_skill_data(
254
203
  agent_id, "web_scraper", vector_store_key
255
204
  )
205
+
206
+ if not stored_data:
207
+ logger.warning(f"[{agent_id}] No vector store found")
208
+ return "No indexed content found. Please use the scrape_and_index tool first to scrape and index some web content before querying."
209
+
256
210
  if not stored_data or "faiss_files" not in stored_data:
257
- return (
258
- "No indexed content found. Please use the scrape_and_index tool first "
259
- "to scrape and index some web content before querying."
260
- )
261
-
262
- # Restore vector store from base64 encoded files
263
- api_key = self.skill_store.get_system_config("openai_api_key")
264
- embeddings = OpenAIEmbeddings(api_key=api_key)
265
-
266
- with tempfile.TemporaryDirectory() as temp_dir:
267
- # Decode and write files to temporary directory
268
- for filename, encoded_content in stored_data["faiss_files"].items():
269
- file_path = os.path.join(temp_dir, filename)
270
- with open(file_path, "wb") as f:
271
- f.write(base64.b64decode(encoded_content))
272
-
273
- # Load the vector store from the temporary directory
274
- vector_store = FAISS.load_local(
275
- temp_dir,
276
- embeddings,
277
- allow_dangerous_deserialization=True, # Safe since we control the serialization
278
- )
279
-
280
- # Perform similarity search
281
- relevant_docs = vector_store.similarity_search(query, k=max_results)
282
-
283
- if not relevant_docs:
284
- return f"No relevant content found for query: '{query}'"
285
-
286
- # Get metadata about indexed URLs
287
- metadata = (
288
- await self.skill_store.get_agent_skill_data(
289
- agent_id, "web_scraper", metadata_key
290
- )
291
- or {}
211
+ logger.warning(f"[{agent_id}] Invalid stored data structure")
212
+ return "No indexed content found. Please use the scrape_and_index tool first to scrape and index some web content before querying."
213
+
214
+ # Create embeddings and decode vector store
215
+ logger.info(f"[{agent_id}] Decoding vector store")
216
+ vs_manager = VectorStoreManager(self.skill_store)
217
+ embeddings = vs_manager.create_embeddings()
218
+ vector_store = vs_manager.decode_vector_store(
219
+ stored_data["faiss_files"], embeddings
292
220
  )
293
221
 
294
- # Format response
295
- response_parts = [
296
- f"Found {len(relevant_docs)} relevant pieces of content for: '{query}'\n",
297
- "=" * 50,
298
- ]
299
-
300
- for i, doc in enumerate(relevant_docs, 1):
301
- source_url = doc.metadata.get("source", "Unknown source")
302
- title = doc.metadata.get("title", "No title")
303
-
304
- response_parts.extend(
305
- [
306
- f"\n{i}. Source: {source_url}",
307
- f" Title: {title}",
308
- f" Content:\n {doc.page_content[:500]}{'...' if len(doc.page_content) > 500 else ''}",
309
- "",
310
- ]
311
- )
312
-
313
- # Add summary of indexed content
314
- response_parts.extend(
315
- [
316
- "\n" + "=" * 50,
317
- f"Total indexed URLs: {len(metadata)}",
318
- "Indexed sources:",
319
- *[f"• {url}" for url in metadata.keys()],
320
- ]
222
+ logger.info(
223
+ f"[{agent_id}] Vector store loaded, index count: {vector_store.index.ntotal}"
224
+ )
225
+
226
+ # Perform similarity search
227
+ docs = vector_store.similarity_search(query, k=max_results)
228
+ logger.info(f"[{agent_id}] Found {len(docs)} similar documents")
229
+
230
+ if not docs:
231
+ logger.info(f"[{agent_id}] No relevant documents found for query")
232
+ return f"No relevant information found for your query: '{query}'. The indexed content may not contain information related to your search."
233
+
234
+ # Format results
235
+ results = []
236
+ for i, doc in enumerate(docs, 1):
237
+ content = doc.page_content.strip()
238
+ source = doc.metadata.get("source", "Unknown")
239
+ results.append(f"**Source {i}:** {source}\n{content}")
240
+
241
+ response = "\n\n".join(results)
242
+ logger.info(
243
+ f"[{agent_id}] Query completed successfully, returning {len(response)} chars"
321
244
  )
322
245
 
323
- return "\n".join(response_parts)
246
+ return response
324
247
 
325
248
  except Exception as e:
326
- logger.error(f"Error in query_indexed_content: {e}")
327
- return f"Error querying indexed content: {str(e)}"
249
+ # Extract agent_id for error logging if possible
250
+ agent_id = "UNKNOWN"
251
+ try:
252
+ if config:
253
+ context = self.context_from_config(config)
254
+ if context and context.agent_id:
255
+ agent_id = context.agent_id
256
+ except Exception:
257
+ pass
258
+
259
+ logger.error(
260
+ f"[{agent_id}] Error in QueryIndexedContent: {e}", exc_info=True
261
+ )
262
+ raise type(e)(f"[agent:{agent_id}]: {e}") from e