intentkit 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of intentkit might be problematic. Click here for more details.

Files changed (94) hide show
  1. intentkit/__init__.py +1 -1
  2. intentkit/abstracts/skill.py +12 -0
  3. intentkit/clients/cdp.py +114 -16
  4. intentkit/config/config.py +12 -4
  5. intentkit/core/engine.py +39 -31
  6. intentkit/core/node.py +8 -4
  7. intentkit/core/prompt.py +5 -6
  8. intentkit/core/skill.py +11 -0
  9. intentkit/models/agent.py +2 -9
  10. intentkit/models/agent_data.py +18 -0
  11. intentkit/models/agent_schema.json +12 -0
  12. intentkit/models/chat.py +50 -0
  13. intentkit/models/skill.py +19 -0
  14. intentkit/skills/base.py +37 -17
  15. intentkit/skills/cdp/__init__.py +6 -14
  16. intentkit/skills/cdp/get_balance.py +77 -25
  17. intentkit/skills/cdp/schema.json +0 -64
  18. intentkit/skills/cryptocompare/fetch_news.py +2 -2
  19. intentkit/skills/cryptocompare/fetch_price.py +2 -2
  20. intentkit/skills/cryptocompare/fetch_top_exchanges.py +2 -2
  21. intentkit/skills/cryptocompare/fetch_top_market_cap.py +2 -2
  22. intentkit/skills/cryptocompare/fetch_top_volume.py +2 -2
  23. intentkit/skills/cryptocompare/fetch_trading_signals.py +2 -2
  24. intentkit/skills/defillama/base.py +3 -3
  25. intentkit/skills/enso/base.py +27 -4
  26. intentkit/skills/enso/networks.py +1 -1
  27. intentkit/skills/enso/route.py +24 -23
  28. intentkit/skills/enso/tokens.py +1 -1
  29. intentkit/skills/enso/wallet.py +27 -23
  30. intentkit/skills/firecrawl/README.md +211 -0
  31. intentkit/skills/firecrawl/__init__.py +107 -0
  32. intentkit/skills/firecrawl/base.py +28 -0
  33. intentkit/skills/firecrawl/clear.py +87 -0
  34. intentkit/skills/firecrawl/crawl.py +399 -0
  35. intentkit/skills/firecrawl/firecrawl.png +0 -0
  36. intentkit/skills/firecrawl/query.py +123 -0
  37. intentkit/skills/firecrawl/schema.json +153 -0
  38. intentkit/skills/firecrawl/scrape.py +318 -0
  39. intentkit/skills/firecrawl/utils.py +306 -0
  40. intentkit/skills/heurist/image_generation_animagine_xl.py +1 -1
  41. intentkit/skills/heurist/image_generation_arthemy_comics.py +1 -1
  42. intentkit/skills/heurist/image_generation_arthemy_real.py +1 -1
  43. intentkit/skills/heurist/image_generation_braindance.py +1 -1
  44. intentkit/skills/heurist/image_generation_cyber_realistic_xl.py +1 -1
  45. intentkit/skills/heurist/image_generation_flux_1_dev.py +1 -1
  46. intentkit/skills/heurist/image_generation_sdxl.py +1 -1
  47. intentkit/skills/http/README.md +78 -0
  48. intentkit/skills/http/__init__.py +100 -0
  49. intentkit/skills/http/base.py +21 -0
  50. intentkit/skills/http/get.py +96 -0
  51. intentkit/skills/http/http.svg +15 -0
  52. intentkit/skills/http/post.py +113 -0
  53. intentkit/skills/http/put.py +113 -0
  54. intentkit/skills/http/schema.json +80 -0
  55. intentkit/skills/lifi/token_execute.py +1 -1
  56. intentkit/skills/openai/dalle_image_generation.py +1 -1
  57. intentkit/skills/openai/gpt_image_generation.py +1 -1
  58. intentkit/skills/openai/gpt_image_to_image.py +1 -1
  59. intentkit/skills/supabase/__init__.py +116 -0
  60. intentkit/skills/supabase/base.py +72 -0
  61. intentkit/skills/supabase/delete_data.py +102 -0
  62. intentkit/skills/supabase/fetch_data.py +120 -0
  63. intentkit/skills/supabase/insert_data.py +70 -0
  64. intentkit/skills/supabase/invoke_function.py +74 -0
  65. intentkit/skills/supabase/schema.json +170 -0
  66. intentkit/skills/supabase/supabase.svg +15 -0
  67. intentkit/skills/supabase/update_data.py +105 -0
  68. intentkit/skills/supabase/upsert_data.py +77 -0
  69. intentkit/skills/system/read_agent_api_key.py +1 -1
  70. intentkit/skills/system/regenerate_agent_api_key.py +1 -1
  71. intentkit/skills/token/base.py +1 -39
  72. intentkit/skills/twitter/follow_user.py +3 -3
  73. intentkit/skills/twitter/get_mentions.py +6 -6
  74. intentkit/skills/twitter/get_timeline.py +5 -5
  75. intentkit/skills/twitter/get_user_by_username.py +3 -3
  76. intentkit/skills/twitter/get_user_tweets.py +5 -5
  77. intentkit/skills/twitter/like_tweet.py +3 -3
  78. intentkit/skills/twitter/post_tweet.py +4 -4
  79. intentkit/skills/twitter/reply_tweet.py +4 -4
  80. intentkit/skills/twitter/retweet.py +3 -3
  81. intentkit/skills/twitter/search_tweets.py +5 -5
  82. intentkit/skills/unrealspeech/text_to_speech.py +1 -1
  83. intentkit/skills/web_scraper/README.md +35 -4
  84. intentkit/skills/web_scraper/__init__.py +16 -0
  85. intentkit/skills/web_scraper/document_indexer.py +143 -0
  86. intentkit/skills/web_scraper/schema.json +28 -0
  87. intentkit/skills/web_scraper/scrape_and_index.py +135 -200
  88. intentkit/skills/web_scraper/utils.py +684 -0
  89. intentkit/skills/web_scraper/website_indexer.py +456 -0
  90. intentkit/utils/logging.py +1 -1
  91. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/METADATA +1 -1
  92. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/RECORD +94 -63
  93. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/WHEEL +0 -0
  94. {intentkit-0.5.2.dist-info → intentkit-0.6.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,456 @@
1
+ import logging
2
+ from typing import List, Type
3
+ from urllib.parse import urljoin, urlparse
4
+
5
+ import httpx
6
+ import openai
7
+ from langchain_core.runnables import RunnableConfig
8
+ from pydantic import BaseModel, Field
9
+
10
+ from intentkit.skills.web_scraper.base import WebScraperBaseTool
11
+ from intentkit.skills.web_scraper.utils import (
12
+ DEFAULT_CHUNK_OVERLAP,
13
+ DEFAULT_CHUNK_SIZE,
14
+ MetadataManager,
15
+ ResponseFormatter,
16
+ VectorStoreManager,
17
+ scrape_and_index_urls,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class WebsiteIndexerInput(BaseModel):
24
+ """Input for WebsiteIndexer tool."""
25
+
26
+ base_url: str = Field(
27
+ description="Base URL of the website to index (e.g., https://example.com). The tool will discover sitemaps and extract all URLs",
28
+ min_length=1,
29
+ )
30
+ max_urls: int = Field(
31
+ description="Maximum number of URLs to scrape from the sitemap (default: 50)",
32
+ default=50,
33
+ ge=1,
34
+ le=200,
35
+ )
36
+ chunk_size: int = Field(
37
+ description="Size of text chunks for indexing (default: 1000)",
38
+ default=DEFAULT_CHUNK_SIZE,
39
+ ge=100,
40
+ le=4000,
41
+ )
42
+ chunk_overlap: int = Field(
43
+ description="Overlap between chunks (default: 200)",
44
+ default=DEFAULT_CHUNK_OVERLAP,
45
+ ge=0,
46
+ le=1000,
47
+ )
48
+ include_patterns: List[str] = Field(
49
+ description="URL patterns to include (e.g., ['/blog/', '/docs/']). If empty, all URLs are included",
50
+ default=[],
51
+ )
52
+ exclude_patterns: List[str] = Field(
53
+ description="URL patterns to exclude (e.g., ['/admin/', '/private/'])",
54
+ default=[],
55
+ )
56
+
57
+
58
+ class WebsiteIndexer(WebScraperBaseTool):
59
+ """Tool for discovering and indexing entire websites using AI-powered sitemap analysis.
60
+
61
+ This tool discovers sitemaps from robots.txt, extracts URLs from sitemap XML using GPT-4o-mini for
62
+ robust parsing of various sitemap formats, and then delegates to the proven scrape_and_index tool
63
+ for reliable content indexing.
64
+ """
65
+
66
+ name: str = "web_scraper_website_indexer"
67
+ description: str = (
68
+ "Index an entire website by discovering sitemaps and extracting URLs efficiently. "
69
+ "This tool finds sitemaps from robots.txt, parses the XML content to extract URLs, "
70
+ "and then uses the reliable scrape_and_index functionality for content indexing."
71
+ )
72
+ args_schema: Type[BaseModel] = WebsiteIndexerInput
73
+
74
+ def _normalize_url(self, url: str) -> str:
75
+ """Normalize URL by ensuring it has a proper scheme."""
76
+ if not url.startswith(("http://", "https://")):
77
+ return f"https://{url}"
78
+ return url
79
+
80
+ async def _get_robots_txt(self, base_url: str) -> str:
81
+ """Fetch robots.txt content."""
82
+ robots_url = urljoin(base_url, "/robots.txt")
83
+
84
+ # Import headers from utils
85
+ from intentkit.skills.web_scraper.utils import DEFAULT_HEADERS, FALLBACK_HEADERS
86
+
87
+ # Try with primary headers first
88
+ async with httpx.AsyncClient(timeout=30, headers=DEFAULT_HEADERS) as client:
89
+ try:
90
+ response = await client.get(robots_url)
91
+ if response.status_code == 200:
92
+ return response.text
93
+ except Exception as e:
94
+ logger.warning(
95
+ f"Primary headers failed for robots.txt from {robots_url}: {e}"
96
+ )
97
+
98
+ # Try with fallback headers
99
+ async with httpx.AsyncClient(timeout=30, headers=FALLBACK_HEADERS) as client:
100
+ try:
101
+ response = await client.get(robots_url)
102
+ if response.status_code == 200:
103
+ return response.text
104
+ except Exception as e:
105
+ logger.warning(f"Could not fetch robots.txt from {robots_url}: {e}")
106
+ return ""
107
+
108
+ def _extract_sitemaps_from_robots(
109
+ self, robots_content: str, base_url: str
110
+ ) -> List[str]:
111
+ """Extract sitemap URLs from robots.txt content."""
112
+ sitemaps = []
113
+
114
+ for line in robots_content.split("\n"):
115
+ line = line.strip()
116
+ if line.lower().startswith("sitemap:"):
117
+ sitemap_url = line.split(":", 1)[1].strip()
118
+ # Make relative URLs absolute
119
+ if sitemap_url.startswith("/"):
120
+ sitemap_url = urljoin(base_url, sitemap_url)
121
+ sitemaps.append(sitemap_url)
122
+
123
+ return sitemaps
124
+
125
+ def _get_common_sitemap_patterns(self, base_url: str) -> List[str]:
126
+ """Generate common sitemap URL patterns."""
127
+ return [
128
+ urljoin(base_url, "/sitemap.xml"),
129
+ urljoin(base_url, "/sitemap_index.xml"),
130
+ urljoin(base_url, "/sitemaps/sitemap.xml"),
131
+ urljoin(base_url, "/sitemap/sitemap.xml"),
132
+ urljoin(base_url, "/wp-sitemap.xml"), # WordPress
133
+ ]
134
+
135
+ async def _fetch_sitemap_content(self, sitemap_url: str) -> str:
136
+ """Fetch sitemap XML content."""
137
+ # Import headers from utils
138
+ from intentkit.skills.web_scraper.utils import DEFAULT_HEADERS, FALLBACK_HEADERS
139
+
140
+ # Try with primary headers first
141
+ async with httpx.AsyncClient(timeout=30, headers=DEFAULT_HEADERS) as client:
142
+ try:
143
+ response = await client.get(sitemap_url)
144
+ if response.status_code == 200:
145
+ return response.text
146
+ except Exception as e:
147
+ logger.warning(
148
+ f"Primary headers failed for sitemap from {sitemap_url}: {e}"
149
+ )
150
+
151
+ # Try with fallback headers
152
+ async with httpx.AsyncClient(timeout=30, headers=FALLBACK_HEADERS) as client:
153
+ try:
154
+ response = await client.get(sitemap_url)
155
+ if response.status_code == 200:
156
+ return response.text
157
+ except Exception as e:
158
+ logger.warning(f"Could not fetch sitemap from {sitemap_url}: {e}")
159
+ return ""
160
+
161
+ async def _get_all_sitemap_content(self, base_url: str) -> tuple[str, List[str]]:
162
+ """Get all sitemap content for AI analysis."""
163
+ all_content = []
164
+ found_sitemaps = []
165
+ processed_sitemaps = set()
166
+
167
+ # First, try to get sitemaps from robots.txt
168
+ robots_content = await self._get_robots_txt(base_url)
169
+ sitemap_urls = self._extract_sitemaps_from_robots(robots_content, base_url)
170
+
171
+ # If no sitemaps found in robots.txt, try common patterns
172
+ if not sitemap_urls:
173
+ sitemap_urls = self._get_common_sitemap_patterns(base_url)
174
+
175
+ logger.info(f"Checking {len(sitemap_urls)} potential sitemap URLs...")
176
+
177
+ # Process each sitemap URL
178
+ sitemaps_to_process = sitemap_urls[:]
179
+
180
+ while sitemaps_to_process:
181
+ sitemap_url = sitemaps_to_process.pop(0)
182
+
183
+ if sitemap_url in processed_sitemaps:
184
+ continue
185
+
186
+ processed_sitemaps.add(sitemap_url)
187
+
188
+ xml_content = await self._fetch_sitemap_content(sitemap_url)
189
+ if not xml_content:
190
+ continue
191
+
192
+ found_sitemaps.append(sitemap_url)
193
+ all_content.append(f"<!-- Sitemap: {sitemap_url} -->\n{xml_content}\n")
194
+
195
+ # Check if this contains references to other sitemaps (sitemap index)
196
+ if "<sitemap>" in xml_content.lower() and "<loc>" in xml_content.lower():
197
+ # This might be a sitemap index - we'll let AI handle parsing it
198
+ pass
199
+
200
+ combined_xml = "\n".join(all_content) if all_content else ""
201
+ return combined_xml, found_sitemaps
202
+
203
+ def _create_ai_extraction_prompt(
204
+ self, sitemap_xml: str, include_patterns: List[str], exclude_patterns: List[str]
205
+ ) -> str:
206
+ """Create a prompt for AI to extract URLs from sitemap XML."""
207
+ filter_instructions = ""
208
+ if include_patterns:
209
+ filter_instructions += f"\n- INCLUDE only URLs containing these patterns: {', '.join(include_patterns)}"
210
+ if exclude_patterns:
211
+ filter_instructions += f"\n- EXCLUDE URLs containing these patterns: {', '.join(exclude_patterns)}"
212
+
213
+ return f"""Analyze this sitemap XML and extract all valid webpage URLs.
214
+
215
+ SITEMAP XML CONTENT:
216
+ {sitemap_xml}
217
+
218
+ INSTRUCTIONS:
219
+ - Extract only URLs from <loc> tags that point to actual web pages
220
+ - Handle both standard sitemap format and sitemap index format
221
+ - Ignore any URLs ending in .xml, .rss, .atom (these are feeds/sitemaps, not pages)
222
+ - Skip any sitemap index entries that point to other sitemaps
223
+ - Handle text-based sitemaps (simple URL lists)
224
+ - Return only unique, valid HTTP/HTTPS URLs
225
+ - Format as a simple list, one URL per line{filter_instructions}
226
+
227
+ Extract the URLs now:"""
228
+
229
+ def _parse_ai_response(self, ai_response: str) -> List[str]:
230
+ """Parse AI response to extract clean URLs."""
231
+ urls = []
232
+
233
+ for line in ai_response.strip().split("\n"):
234
+ line = line.strip()
235
+ # Remove any markdown formatting, bullets, numbering
236
+ line = line.lstrip("- •*123456789. ")
237
+
238
+ # Check if it looks like a URL
239
+ if line.startswith(("http://", "https://")):
240
+ # Basic validation
241
+ try:
242
+ parsed = urlparse(line)
243
+ if parsed.netloc and not line.endswith((".xml", ".rss", ".atom")):
244
+ urls.append(line)
245
+ except Exception:
246
+ continue
247
+
248
+ return list(set(urls)) # Remove duplicates
249
+
250
+ async def _call_ai_model(self, prompt: str, context) -> str:
251
+ """Call OpenAI GPT-4o-mini to extract URLs from sitemap content."""
252
+ try:
253
+ # Get OpenAI API key using the standard pattern
254
+ from intentkit.skills.openai.base import OpenAIBaseTool
255
+
256
+ temp_tool = OpenAIBaseTool(skill_store=self.skill_store)
257
+ api_key = temp_tool.get_api_key(context)
258
+
259
+ # Initialize OpenAI client
260
+ client = openai.AsyncOpenAI(api_key=api_key)
261
+
262
+ # Call the API
263
+ response = await client.chat.completions.create(
264
+ model="gpt-4o-mini",
265
+ messages=[
266
+ {
267
+ "role": "system",
268
+ "content": "You are an expert at parsing XML sitemaps and extracting webpage URLs. Always return only clean, valid URLs, one per line.",
269
+ },
270
+ {"role": "user", "content": prompt},
271
+ ],
272
+ max_tokens=2000,
273
+ temperature=0.1,
274
+ )
275
+
276
+ return response.choices[0].message.content.strip()
277
+
278
+ except Exception as e:
279
+ logger.error(f"Error calling OpenAI API: {e}")
280
+ raise
281
+
282
+ async def _arun(
283
+ self,
284
+ base_url: str,
285
+ max_urls: int = 50,
286
+ chunk_size: int = DEFAULT_CHUNK_SIZE,
287
+ chunk_overlap: int = DEFAULT_CHUNK_OVERLAP,
288
+ include_patterns: List[str] = None,
289
+ exclude_patterns: List[str] = None,
290
+ config: RunnableConfig = None,
291
+ **kwargs,
292
+ ) -> str:
293
+ """Discover website sitemaps, extract URLs with AI, and delegate to scrape_and_index."""
294
+ try:
295
+ # Normalize inputs
296
+ base_url = self._normalize_url(base_url)
297
+ include_patterns = include_patterns or []
298
+ exclude_patterns = exclude_patterns or []
299
+
300
+ # Validate base URL
301
+ parsed_url = urlparse(base_url)
302
+ if not parsed_url.netloc:
303
+ return "Error: Invalid base URL provided. Please provide a valid URL (e.g., https://example.com)"
304
+
305
+ # Get agent context - throw error if not available
306
+ if not config:
307
+ raise ValueError("Configuration is required but not provided")
308
+
309
+ context = self.context_from_config(config)
310
+ if not context or not context.agent_id:
311
+ raise ValueError("Agent ID is required but not found in configuration")
312
+
313
+ agent_id = context.agent_id
314
+
315
+ logger.info(f"[{agent_id}] Discovering sitemaps for {base_url}...")
316
+
317
+ # Get all sitemap content
318
+ sitemap_xml, found_sitemaps = await self._get_all_sitemap_content(base_url)
319
+
320
+ if not sitemap_xml:
321
+ logger.error(
322
+ f"[{agent_id}] No accessible sitemaps found for {base_url}"
323
+ )
324
+ return f"Error: No accessible sitemaps found for {base_url}. The website might not have sitemaps or they might be inaccessible."
325
+
326
+ logger.info(
327
+ f"[{agent_id}] Found {len(found_sitemaps)} sitemap(s). Extracting URLs with AI..."
328
+ )
329
+
330
+ try:
331
+ # Use AI to extract URLs from sitemap
332
+ prompt = self._create_ai_extraction_prompt(
333
+ sitemap_xml, include_patterns, exclude_patterns
334
+ )
335
+ ai_response = await self._call_ai_model(prompt, context)
336
+ all_urls = self._parse_ai_response(ai_response)
337
+
338
+ logger.info(
339
+ f"[{agent_id}] AI extracted {len(all_urls)} URLs from sitemap"
340
+ )
341
+
342
+ except Exception as e:
343
+ logger.error(
344
+ f"[{agent_id}] AI extraction failed: {e}, falling back to regex"
345
+ )
346
+ # Fallback to simple regex if AI fails
347
+ import re
348
+
349
+ url_pattern = r"<loc>(https?://[^<]+)</loc>"
350
+ all_urls = re.findall(url_pattern, sitemap_xml)
351
+
352
+ # Basic filtering for fallback
353
+ filtered_urls = []
354
+ for url in all_urls:
355
+ # Skip XML files (sitemaps)
356
+ if url.endswith((".xml", ".rss", ".atom")):
357
+ continue
358
+
359
+ # Apply exclude patterns
360
+ if exclude_patterns and any(
361
+ pattern in url for pattern in exclude_patterns
362
+ ):
363
+ continue
364
+
365
+ # Apply include patterns
366
+ if include_patterns:
367
+ if any(pattern in url for pattern in include_patterns):
368
+ filtered_urls.append(url)
369
+ else:
370
+ filtered_urls.append(url)
371
+
372
+ all_urls = filtered_urls
373
+ logger.info(
374
+ f"[{agent_id}] Regex fallback extracted {len(all_urls)} URLs from sitemap"
375
+ )
376
+
377
+ # Remove duplicates and limit
378
+ unique_urls = list(set(all_urls))[:max_urls]
379
+
380
+ if not unique_urls:
381
+ logger.error(
382
+ f"[{agent_id}] No valid URLs found in sitemaps after filtering"
383
+ )
384
+ return f"Error: No valid URLs found in sitemaps after filtering. Found sitemaps: {', '.join(found_sitemaps)}"
385
+
386
+ logger.info(
387
+ f"[{agent_id}] Extracted {len(unique_urls)} URLs from sitemaps. Scraping and indexing..."
388
+ )
389
+
390
+ # Use the utility function to scrape and index URLs directly
391
+ total_chunks, was_merged, valid_urls = await scrape_and_index_urls(
392
+ unique_urls, agent_id, self.skill_store, chunk_size, chunk_overlap
393
+ )
394
+
395
+ if total_chunks == 0:
396
+ logger.error(
397
+ f"[{agent_id}] No content could be extracted from discovered URLs"
398
+ )
399
+ return f"Error: No content could be extracted from the discovered URLs. Found sitemaps: {', '.join(found_sitemaps)}"
400
+
401
+ # Get current storage size for response
402
+ vs_manager = VectorStoreManager(self.skill_store)
403
+ current_size = await vs_manager.get_content_size(agent_id)
404
+ size_limit_reached = len(valid_urls) < len(unique_urls)
405
+
406
+ # Update metadata
407
+ metadata_manager = MetadataManager(self.skill_store)
408
+ new_metadata = metadata_manager.create_url_metadata(
409
+ valid_urls, [], "website_indexer"
410
+ )
411
+ await metadata_manager.update_metadata(agent_id, new_metadata)
412
+
413
+ logger.info(f"[{agent_id}] Website indexing completed successfully")
414
+
415
+ # Format the indexing result
416
+ result = ResponseFormatter.format_indexing_response(
417
+ "scraped and indexed",
418
+ valid_urls,
419
+ total_chunks,
420
+ chunk_size,
421
+ chunk_overlap,
422
+ was_merged,
423
+ current_size_bytes=current_size,
424
+ size_limit_reached=size_limit_reached,
425
+ total_requested_urls=len(unique_urls),
426
+ )
427
+
428
+ # Enhance the response with sitemap discovery info
429
+ enhanced_result = (
430
+ f"WEBSITE INDEXING COMPLETE\n"
431
+ f"Base URL: {base_url}\n"
432
+ f"Sitemaps discovered: {len(found_sitemaps)}\n"
433
+ f"URLs extracted: {len(unique_urls)}\n"
434
+ f"URLs successfully indexed: {len(valid_urls)}\n"
435
+ f"Include patterns: {', '.join(include_patterns) if include_patterns else 'None (all URLs)'}\n"
436
+ f"Exclude patterns: {', '.join(exclude_patterns) if exclude_patterns else 'None'}\n\n"
437
+ f"DISCOVERED SITEMAPS:\n"
438
+ f"{chr(10).join(['- ' + sitemap for sitemap in found_sitemaps])}\n\n"
439
+ f"INDEXING RESULTS:\n{result}"
440
+ )
441
+
442
+ return enhanced_result
443
+
444
+ except Exception as e:
445
+ # Extract agent_id for error logging if possible
446
+ agent_id = "UNKNOWN"
447
+ try:
448
+ if config:
449
+ context = self.context_from_config(config)
450
+ if context and context.agent_id:
451
+ agent_id = context.agent_id
452
+ except Exception:
453
+ pass
454
+
455
+ logger.error(f"[{agent_id}] Error in WebsiteIndexer: {e}", exc_info=True)
456
+ raise type(e)(f"[agent:{agent_id}]: {e}") from e
@@ -43,7 +43,7 @@ def setup_logging(env: str, debug: bool = False):
43
43
  debug: Debug mode flag
44
44
  """
45
45
 
46
- if env == "local" or debug:
46
+ if debug:
47
47
  # Set up logging configuration for local/debug
48
48
  logging.basicConfig(
49
49
  level=logging.DEBUG,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: intentkit
3
- Version: 0.5.2
3
+ Version: 0.6.0
4
4
  Summary: Intent-based AI Agent Platform - Core Package
5
5
  Project-URL: Homepage, https://github.com/crestal-network/intentkit
6
6
  Project-URL: Repository, https://github.com/crestal-network/intentkit