skydeckai-code 0.1.27__py3-none-any.whl → 0.1.28__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aidd/tools/__init__.py CHANGED
@@ -77,7 +77,7 @@ from .screenshot_tool import (
77
77
  handle_capture_screenshot,
78
78
  )
79
79
  from .system_tools import get_system_info_tool, handle_get_system_info
80
- from .web_tools import web_fetch_tool, handle_web_fetch
80
+ from .web_tools import web_fetch_tool, handle_web_fetch, web_search_tool, handle_web_search
81
81
 
82
82
  # Export all tools definitions
83
83
  TOOL_DEFINITIONS = [
@@ -125,6 +125,7 @@ TOOL_DEFINITIONS = [
125
125
  read_image_file_tool(),
126
126
  # Web tools
127
127
  web_fetch_tool(),
128
+ web_search_tool(),
128
129
  ]
129
130
 
130
131
  # Export all handlers
@@ -173,4 +174,5 @@ TOOL_HANDLERS = {
173
174
  "read_image_file": handle_read_image_file,
174
175
  # Web handlers
175
176
  "web_fetch": handle_web_fetch,
177
+ "web_search": handle_web_search,
176
178
  }
aidd/tools/web_tools.py CHANGED
@@ -1,9 +1,12 @@
1
1
  import os
2
- import requests
3
- from typing import List, Optional
2
+ import random
3
+ import time
4
+ from typing import List
4
5
  from urllib.parse import urlparse
5
6
 
7
+ import requests
6
8
  from mcp.types import TextContent
9
+
7
10
  from .state import state
8
11
 
9
12
 
@@ -17,6 +20,7 @@ def web_fetch_tool():
17
20
  "WHEN NOT TO USE: When you need to interact with complex websites requiring authentication "
18
21
  "or session management, when the data needs to be processed in a specific format not supported, "
19
22
  "or when you need to make authenticated API calls with OAuth. "
23
+ "TIP: Use 'web_search' first to find relevant URLs, then use this tool to fetch detailed content. "
20
24
  "RETURNS: The content of the URL as text. For HTML pages, returns the raw HTML content. "
21
25
  "For JSON endpoints, returns the JSON content as a string. Successful response includes HTTP "
22
26
  "status code. Failed requests include error details. Maximum request size enforced for safety.",
@@ -183,3 +187,589 @@ async def handle_web_fetch(arguments: dict) -> List[TextContent]:
183
187
  except Exception as e:
184
188
  # Handle other errors
185
189
  raise ValueError(f"Error processing content from {url}: {str(e)}")
190
+
191
+
192
+ def web_search_tool():
193
+ return {
194
+ "name": "web_search",
195
+ "description": "Performs a web search and returns the search results. "
196
+ "WHEN TO USE: When you need to find information on the web, get up-to-date data, "
197
+ "or research a topic. This provides more current information than your training data. "
198
+ "WHEN NOT TO USE: For queries requiring complex authentication, accessing private data, "
199
+ "or when you want to browse interactive websites. "
200
+ "TIP: For best results, use this tool to find relevant URLs, then use 'web_fetch' to get the full content of specific pages. "
201
+ "RETURNS: A list of search results including titles, URLs, and snippets for each result.",
202
+ "inputSchema": {
203
+ "type": "object",
204
+ "properties": {
205
+ "query": {
206
+ "type": "string",
207
+ "description": "The search query to send to search engine. Be specific to get better results. "
208
+ "Example: 'latest python release features' or 'climate change statistics 2023'."
209
+ },
210
+ "num_results": {
211
+ "type": "integer",
212
+ "description": "Number of search results to return. Maximum is 20 to prevent abuse.",
213
+ "default": 10
214
+ },
215
+ "convert_html_to_markdown": {
216
+ "type": "boolean",
217
+ "description": "If true, search result snippets will be converted from HTML to markdown "
218
+ "for better readability.",
219
+ "default": True
220
+ },
221
+ "search_engine": {
222
+ "type": "string",
223
+ "description": "Specifies which search engine to use. Options: 'auto' (tries all in sequence), "
224
+ "'bing', or 'duckduckgo'. Some engines may block automated requests.",
225
+ "enum": ["auto", "bing", "duckduckgo"],
226
+ "default": "auto"
227
+ }
228
+ },
229
+ "required": ["query"]
230
+ }
231
+ }
232
+
233
+
234
+ def _process_ddg_url(url):
235
+ """Process DuckDuckGo URLs to get the actual target URL."""
236
+ try:
237
+ import urllib.parse
238
+ url_parts = urllib.parse.urlparse(url)
239
+
240
+ # Case 1: Traditional uddg parameter format
241
+ if 'uddg' in url_parts.query:
242
+ query_parts = urllib.parse.parse_qs(url_parts.query)
243
+ extracted_url = query_parts.get('uddg', [''])[0]
244
+ if extracted_url:
245
+ return extracted_url
246
+
247
+ # Case 2: Advertising/redirect y.js format
248
+ elif 'y.js' in url_parts.path:
249
+ query_parts = urllib.parse.parse_qs(url_parts.query)
250
+ # Try ad_domain first
251
+ if 'ad_domain' in query_parts and query_parts['ad_domain'][0]:
252
+ return f"https://{query_parts['ad_domain'][0]}"
253
+ # Then try du parameter
254
+ elif 'du' in query_parts and query_parts['du'][0]:
255
+ return query_parts['du'][0]
256
+ # Try other known parameters
257
+ for param in ['u', 'l']:
258
+ if param in query_parts and query_parts[param][0]:
259
+ return query_parts[param][0]
260
+
261
+ # Case 3: Direct URL
262
+ elif url.startswith('http'):
263
+ return url
264
+
265
+ except Exception as e:
266
+ print(f"Error processing DuckDuckGo URL: {str(e)}")
267
+
268
+ # Default to original URL if all else fails
269
+ return url
270
+
271
+
272
+ def _process_bing_url(url):
273
+ """Process Bing URLs to get the actual target URL."""
274
+ try:
275
+ import urllib.parse
276
+ parsed_url = urllib.parse.urlparse(url)
277
+
278
+ # Check if it's a Bing redirect URL
279
+ if parsed_url.netloc == 'www.bing.com' and parsed_url.path == '/ck/a':
280
+ # Try to extract the actual URL from Bing's redirect
281
+ query_dict = urllib.parse.parse_qs(parsed_url.query)
282
+ if 'u' in query_dict:
283
+ # Bing stores the actual URL in the 'u' parameter, often base64 encoded
284
+ import base64
285
+ try:
286
+ # Try to decode if it's base64
287
+ real_url = base64.b64decode(query_dict['u'][0]).decode('utf-8')
288
+ return real_url
289
+ except Exception:
290
+ # If not base64, just use it directly
291
+ return query_dict['u'][0]
292
+
293
+ # Try other known redirect parameters
294
+ for param in ['purl', 'r']:
295
+ if param in query_dict:
296
+ return query_dict[param][0]
297
+
298
+ except Exception as e:
299
+ print(f"Error processing Bing URL: {str(e)}")
300
+
301
+ # Default to original URL if all else fails
302
+ return url
303
+
304
+
305
+ async def handle_web_search(arguments: dict) -> List[TextContent]:
306
+ """Handle performing a web search using direct HTML scraping with anti-detection measures."""
307
+ query = arguments.get("query")
308
+ num_results = min(arguments.get("num_results", 10), 20) # Cap at 20 results max
309
+ convert_html_to_markdown = arguments.get("convert_html_to_markdown", True)
310
+ search_engine = arguments.get("search_engine", "auto").lower()
311
+ engine_warning = None
312
+
313
+ if not query:
314
+ raise ValueError("Search query must be provided")
315
+
316
+ # Validate search engine parameter
317
+ valid_engines = ["auto", "bing", "duckduckgo"]
318
+ if search_engine not in valid_engines:
319
+ if search_engine == "google":
320
+ engine_warning = "Warning: Google search engine is no longer supported due to blocking automated requests. Falling back to 'auto' mode."
321
+ else:
322
+ engine_warning = f"Warning: Unsupported search engine '{search_engine}'. Valid options are: {', '.join(valid_engines)}. Falling back to 'auto' mode."
323
+ print(engine_warning)
324
+ search_engine = "auto" # Default to auto if invalid
325
+
326
+ # Create a list of common user agents to rotate through
327
+ user_agents = [
328
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
329
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
330
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Safari/605.1.15",
331
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
332
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
333
+ ]
334
+
335
+ # Use a random user agent
336
+ user_agent = random.choice(user_agents)
337
+
338
+ # Set up params for the request
339
+ params = {
340
+ "q": query,
341
+ "num": num_results + 5, # Request a few more results than needed
342
+ "hl": "en", # Language hint
343
+ "gl": "us", # Geolocation hint (helps avoid redirect to country-specific sites)
344
+ }
345
+
346
+ # Set up headers to more closely mimic a real browser
347
+ headers = {
348
+ "User-Agent": user_agent,
349
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
350
+ "Accept-Language": "en-US,en;q=0.9",
351
+ "Accept-Encoding": "gzip, deflate",
352
+ "Referer": "https://www.skydeck.ai/",
353
+ "Connection": "keep-alive",
354
+ "Cache-Control": "max-age=0",
355
+ "Upgrade-Insecure-Requests": "1",
356
+ "Sec-Fetch-Dest": "document",
357
+ "Sec-Fetch-Mode": "navigate",
358
+ "Sec-Fetch-Site": "same-origin",
359
+ "Sec-Fetch-User": "?1",
360
+ }
361
+
362
+ # Define search engines configurations
363
+ search_engines = [
364
+ {
365
+ "name": "DuckDuckGo HTML",
366
+ "id": "duckduckgo",
367
+ "url": "https://html.duckduckgo.com/html/",
368
+ "params": {"q": query},
369
+ "headers": {
370
+ "User-Agent": user_agent,
371
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
372
+ "Accept-Language": "en-US,en;q=0.9",
373
+ "Accept-Encoding": "gzip, deflate",
374
+ "Referer": "https://duckduckgo.com/",
375
+ "Connection": "keep-alive",
376
+ "Upgrade-Insecure-Requests": "1"
377
+ },
378
+ "result_selector": [
379
+ ".web-result",
380
+ ".result:not(.result--ad)",
381
+ ".results_links:not(.result--ad)",
382
+ ".result"
383
+ ],
384
+ "title_selector": [
385
+ ".result__title",
386
+ ".result__a",
387
+ "h2",
388
+ ".result__title a"
389
+ ],
390
+ "link_selector": [
391
+ "a.result__a",
392
+ "a.result__url",
393
+ ".result__title a",
394
+ "a[href^='http']"
395
+ ],
396
+ "snippet_selector": [
397
+ ".result__snippet",
398
+ ".result__snippet p",
399
+ ".result__desc",
400
+ ".result__body",
401
+ ".snippet"
402
+ ]
403
+ },
404
+ {
405
+ "name": "Bing",
406
+ "id": "bing",
407
+ "url": "https://www.bing.com/search",
408
+ "params": {"q": query, "count": num_results},
409
+ "headers": {
410
+ "User-Agent": user_agent,
411
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
412
+ "Accept-Language": "en-US,en;q=0.9",
413
+ "Accept-Encoding": "gzip, deflate",
414
+ "Referer": "https://www.bing.com/",
415
+ "Connection": "keep-alive",
416
+ "Upgrade-Insecure-Requests": "1"
417
+ },
418
+ "result_selector": [
419
+ ".b_algo",
420
+ "li.b_algo",
421
+ ".b_results > li:not(.b_ad)",
422
+ "ol#b_results > li"
423
+ ],
424
+ "title_selector": [
425
+ "h2",
426
+ ".b_title",
427
+ "h2 a",
428
+ "a"
429
+ ],
430
+ "link_selector": [
431
+ "h2 a",
432
+ "a.tilk",
433
+ "cite",
434
+ ".b_attribution > cite",
435
+ "a[href^='http']"
436
+ ],
437
+ "snippet_selector": [
438
+ ".b_caption p",
439
+ ".b_snippet",
440
+ ".b_richcard",
441
+ ".b_caption",
442
+ ".b_algoSlug"
443
+ ]
444
+ }
445
+ ]
446
+
447
+ # Filter engines based on user preference
448
+ if search_engine != "auto":
449
+ filtered_engines = [engine for engine in search_engines if engine["id"] == search_engine]
450
+ if filtered_engines:
451
+ search_engines = filtered_engines
452
+ # If no matching engine found, keep the original list (fallback to auto)
453
+
454
+ # Track URLs we've already seen to prevent duplicates
455
+ seen_urls = set()
456
+
457
+ # Try each search engine until one works
458
+ for engine in search_engines:
459
+ try:
460
+ print(f"Trying search with {engine['name']}...")
461
+
462
+ # Add a small delay to avoid rate limiting
463
+ time.sleep(random.uniform(0.5, 1.5))
464
+
465
+ # Make the request
466
+ response = requests.get(
467
+ engine["url"],
468
+ params=engine["params"],
469
+ headers=engine["headers"],
470
+ timeout=15
471
+ )
472
+
473
+ # Check if the response was successful
474
+ if response.status_code == 200:
475
+ # Parse the HTML response
476
+ try:
477
+ from bs4 import BeautifulSoup
478
+ soup = BeautifulSoup(response.text, 'html.parser')
479
+ search_results = []
480
+
481
+ # Special handling for DuckDuckGo which uses different URL structure
482
+ is_ddg = engine["name"] == "DuckDuckGo HTML"
483
+
484
+ # Convert single selector to list for consistent handling
485
+ result_selectors = engine["result_selector"]
486
+ if isinstance(result_selectors, str):
487
+ result_selectors = [result_selectors]
488
+
489
+ # Try each result selector until we find results
490
+ result_elements = []
491
+ for selector in result_selectors:
492
+ result_elements = soup.select(selector)
493
+ if result_elements:
494
+ print(f"Found {len(result_elements)} results with selector '{selector}'")
495
+ break
496
+
497
+ print(f"Found {len(result_elements)} potential results with {engine['name']}")
498
+
499
+ for result in result_elements:
500
+ if len(search_results) >= num_results:
501
+ break
502
+
503
+ # Try all title selectors
504
+ title_selectors = engine["title_selector"]
505
+ if isinstance(title_selectors, str):
506
+ title_selectors = [title_selectors]
507
+
508
+ title_element = None
509
+ for selector in title_selectors:
510
+ title_element = result.select_one(selector)
511
+ if title_element:
512
+ break
513
+
514
+ # Try all link selectors
515
+ link_selectors = engine["link_selector"]
516
+ if isinstance(link_selectors, str):
517
+ link_selectors = [link_selectors]
518
+
519
+ link_element = None
520
+ for selector in link_selectors:
521
+ link_element = result.select_one(selector)
522
+ if link_element and 'href' in link_element.attrs:
523
+ break
524
+
525
+ # Try all snippet selectors
526
+ snippet_selectors = engine["snippet_selector"]
527
+ if isinstance(snippet_selectors, str):
528
+ snippet_selectors = [snippet_selectors]
529
+
530
+ snippet_element = None
531
+ for selector in snippet_selectors:
532
+ snippet_element = result.select_one(selector)
533
+ if snippet_element:
534
+ break
535
+
536
+ # If we couldn't find link or title, try looking for any anchor tag with text
537
+ if not link_element and not title_element:
538
+ for anchor in result.find_all('a', href=True):
539
+ if anchor.text.strip() and len(anchor.text.strip()) > 3:
540
+ link_element = anchor
541
+ title_element = anchor
542
+ break
543
+
544
+ if title_element and link_element and 'href' in link_element.attrs:
545
+ # Process URL
546
+ url = link_element['href']
547
+
548
+ # Process URL based on search engine
549
+ if is_ddg:
550
+ url = _process_ddg_url(url)
551
+ elif engine["id"] == "bing":
552
+ url = _process_bing_url(url)
553
+
554
+ # Skip duplicate URLs
555
+ canonical_url = url.split('?')[0].rstrip('/') # Remove query params and trailing slash for comparison
556
+ if canonical_url in seen_urls:
557
+ continue
558
+ seen_urls.add(canonical_url)
559
+
560
+ # Ensure URL is valid
561
+ if not url or not url.startswith('http'):
562
+ continue
563
+
564
+ # Get title and snippet
565
+ title = title_element.text.strip()
566
+ snippet = snippet_element.text.strip() if snippet_element else "No description available"
567
+
568
+ # Add to results if we have valid data
569
+ if title:
570
+ search_results.append({
571
+ "title": title,
572
+ "link": url,
573
+ "snippet": snippet
574
+ })
575
+
576
+ # If we found results, format and return them
577
+ if search_results:
578
+ print(f"Success! Found {len(search_results)} results with {engine['name']}")
579
+ return _format_search_results(query, search_results, convert_html_to_markdown, engine["name"], engine_warning)
580
+
581
+ except Exception as parse_error:
582
+ print(f"Error parsing {engine['name']} results: {str(parse_error)}")
583
+ # Continue to the next engine
584
+ else:
585
+ print(f"{engine['name']} returned status code: {response.status_code}")
586
+
587
+ except Exception as e:
588
+ print(f"Error with {engine['name']}: {str(e)}")
589
+ # Continue to the next engine
590
+
591
+ # If all engines fail, try a last-resort approach: extract any links from the last response
592
+ try:
593
+ if 'response' in locals() and response.status_code == 200:
594
+ from bs4 import BeautifulSoup
595
+ soup = BeautifulSoup(response.text, 'html.parser')
596
+
597
+ print("Attempting emergency link extraction...")
598
+ emergency_results = []
599
+
600
+ # Look for common result containers first
601
+ potential_containers = [
602
+ # Common search result containers
603
+ soup.select("div.g, div.b_algo, .result, .web-result, .results_links, li[data-bm], div[data-hveid]"),
604
+ # Any div with title-like content
605
+ soup.select("div:has(h1), div:has(h2), div:has(h3), div:has(h4)"),
606
+ # Main content areas
607
+ soup.select("main, #main, #content, .content, #results, .results"),
608
+ # Fallback to any link with reasonable text
609
+ soup.select("a[href^='http']")
610
+ ]
611
+
612
+ # Process each container type in order until we find enough results
613
+ for container_set in potential_containers:
614
+ if container_set and len(emergency_results) < num_results:
615
+ for container in container_set:
616
+ # For containers, look for links inside
617
+ if container.name != 'a':
618
+ links = container.select("a[href^='http']") or []
619
+ # Process each link in the container
620
+ for link in links:
621
+ url = link.get('href', '')
622
+ title = link.text.strip()
623
+
624
+ # Skip navigation links or empty links
625
+ if not url or not title or len(title) < 5:
626
+ continue
627
+
628
+ # Skip search engine internal links
629
+ if any(s in url for s in ['google.com/search', 'bing.com/search', 'duckduckgo.com']):
630
+ continue
631
+
632
+ # Skip duplicate URLs
633
+ canonical_url = url.split('?')[0].rstrip('/')
634
+ if canonical_url in seen_urls:
635
+ continue
636
+ seen_urls.add(canonical_url)
637
+
638
+ # Process URL based on domain
639
+ if 'bing.com' in url:
640
+ url = _process_bing_url(url)
641
+ elif 'duckduckgo.com' in url:
642
+ url = _process_ddg_url(url)
643
+
644
+ # Find snippet text near the link if possible
645
+ snippet = "No description available"
646
+ # Try to get snippet from surrounding paragraph or div
647
+ parent = link.parent
648
+ if parent:
649
+ # Look for sibling paragraphs or divs
650
+ sibling = parent.find_next_sibling(['p', 'div', 'span'])
651
+ if sibling and sibling.text.strip():
652
+ snippet = sibling.text.strip()
653
+ # Or try parent's text excluding the link text
654
+ elif parent.name in ['p', 'div', 'span'] and len(parent.text) > len(title):
655
+ snippet_text = parent.text.replace(title, '').strip()
656
+ if snippet_text:
657
+ snippet = snippet_text
658
+
659
+ emergency_results.append({
660
+ "title": title,
661
+ "link": url,
662
+ "snippet": snippet
663
+ })
664
+
665
+ if len(emergency_results) >= num_results:
666
+ break
667
+ else:
668
+ # Process direct link
669
+ url = container.get('href', '')
670
+ title = container.text.strip()
671
+
672
+ # Skip invalid links
673
+ if not url or not title or len(title) < 5:
674
+ continue
675
+
676
+ # Skip search engine internal links
677
+ if any(s in url for s in ['google.com/search', 'bing.com/search', 'duckduckgo.com']):
678
+ continue
679
+
680
+ # Skip duplicate URLs
681
+ canonical_url = url.split('?')[0].rstrip('/')
682
+ if canonical_url in seen_urls:
683
+ continue
684
+ seen_urls.add(canonical_url)
685
+
686
+ emergency_results.append({
687
+ "title": title,
688
+ "link": url,
689
+ "snippet": "No description available"
690
+ })
691
+
692
+ if len(emergency_results) >= num_results:
693
+ break
694
+
695
+ if len(emergency_results) >= num_results:
696
+ break
697
+
698
+ if emergency_results:
699
+ print(f"Found {len(emergency_results)} emergency results by extracting links")
700
+ return _format_search_results(query, emergency_results, convert_html_to_markdown, "Emergency Links", engine_warning)
701
+ except Exception as e:
702
+ print(f"Error in emergency link extraction: {str(e)}")
703
+
704
+ # If all search methods fail, provide helpful fallback information
705
+ print("All search methods failed, providing search fallback")
706
+ return _provide_search_fallback(query, engine_warning)
707
+
708
+
709
+ def _format_search_results(query: str, search_results: list, convert_html_to_markdown: bool, engine_name: str = None, engine_warning: str = None) -> List[TextContent]:
710
+ """Format search results into markdown."""
711
+ formatted_results = ["# Web Search Results\n\n"]
712
+ formatted_results.append(f"**Query:** {query}\n\n")
713
+
714
+ if engine_warning:
715
+ formatted_results.append(f"**{engine_warning}**\n\n")
716
+
717
+ if engine_name:
718
+ formatted_results.append(f"**Source:** {engine_name}\n\n")
719
+
720
+ for i, item in enumerate(search_results, 1):
721
+ title = item.get("title", "No title")
722
+ link = item.get("link", "")
723
+ snippet = item.get("snippet", "No description available")
724
+
725
+ # Convert HTML in snippet to markdown if requested
726
+ if convert_html_to_markdown:
727
+ try:
728
+ import html2text
729
+ h = html2text.HTML2Text()
730
+ h.ignore_links = False
731
+ h.ignore_images = True
732
+ h.body_width = 0 # Don't wrap text
733
+
734
+ # Remove HTML tags from title and snippet
735
+ title = h.handle(title) if '<' in title else title
736
+ snippet = h.handle(snippet) if '<' in snippet else snippet
737
+ except ImportError:
738
+ # Continue without conversion if html2text is not available
739
+ # Just strip basic HTML tags as a fallback
740
+ import re
741
+ title = re.sub(r'<[^>]*>', '', title)
742
+ snippet = re.sub(r'<[^>]*>', '', snippet)
743
+
744
+ formatted_results.append(f"## {i}. {title}\n")
745
+ formatted_results.append(f"**URL:** {link}\n\n")
746
+ formatted_results.append(f"{snippet}\n\n---\n\n")
747
+
748
+ return [TextContent(
749
+ type="text",
750
+ text="".join(formatted_results)
751
+ )]
752
+
753
+
754
+ def _provide_search_fallback(query: str, engine_warning: str = None) -> List[TextContent]:
755
+ """Provide a useful fallback when search fails."""
756
+ # Create a helpful response with suggestions for alternative approaches
757
+ formatted_results = ["# Web Search Results\n\n"]
758
+ formatted_results.append(f"**Query:** {query}\n\n")
759
+
760
+ if engine_warning:
761
+ formatted_results.append(f"**{engine_warning}**\n\n")
762
+
763
+ formatted_results.append("I couldn't retrieve search results at this time.\n\n")
764
+
765
+ # Add explanation about limitations
766
+ formatted_results.append("## Why search might be unavailable\n\n")
767
+ formatted_results.append("Web search APIs often have restrictions on automated access, which can cause searches to fail. When this happens, it's better to:\n\n")
768
+ formatted_results.append("1. Try a different search engine (Bing or DuckDuckGo which are more reliable for automated access)\n")
769
+ formatted_results.append("2. Visit specific authoritative sites directly\n")
770
+ formatted_results.append("3. Try the search again later, or with different terms\n")
771
+
772
+ return [TextContent(
773
+ type="text",
774
+ text="".join(formatted_results)
775
+ )]
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skydeckai-code
3
- Version: 0.1.27
4
- Summary: This MCP server provides a comprehensive set of tools for AI-driven Development workflows including file operations, code analysis, multi-language execution, Git operations, web content fetching, code content searching, and system information retrieval.
3
+ Version: 0.1.28
4
+ Summary: This MCP server provides a comprehensive set of tools for AI-driven Development workflows including file operations, code analysis, multi-language execution, Git operations, web content fetching with HTML-to-markdown conversion, multi-engine web search, code content searching, and system information retrieval.
5
5
  Project-URL: Homepage, https://github.com/skydeckai/skydeckai-code
6
6
  Project-URL: Repository, https://github.com/skydeckai/skydeckai-code
7
7
  Project-URL: Documentation, https://github.com/skydeckai/skydeckai-code/blob/main/README.md
@@ -10,6 +10,7 @@ License: Apache 2.0
10
10
  License-File: LICENSE
11
11
  Keywords: ai,aidd,code,code-analysis,development,mcp
12
12
  Requires-Python: >=3.11
13
+ Requires-Dist: beautifulsoup4>=4.13.3
13
14
  Requires-Dist: gitpython>=3.1.44
14
15
  Requires-Dist: html2text>=2025.4.15
15
16
  Requires-Dist: mcp>=1.6.0
@@ -35,7 +36,7 @@ Description-Content-Type: text/markdown
35
36
 
36
37
  # SkyDeckAI Code
37
38
 
38
- An MCP server that provides a comprehensive set of tools for AI-driven development workflows. Features include file system operations, code analysis using tree-sitter for multiple programming languages, Git operations, code execution, web content fetching, code content searching, and system information retrieval. Designed to enhance AI's capability to assist in software development tasks by providing direct access to both local and remote resources.
39
+ An MCP server that provides a comprehensive set of tools for AI-driven development workflows. Features include file system operations, code analysis using tree-sitter for multiple programming languages, Git operations, code execution, web content fetching with HTML-to-markdown conversion, multi-engine web search, code content searching, and system information retrieval. Designed to enhance AI's capability to assist in software development tasks by providing direct access to both local and remote resources.
39
40
 
40
41
  # Formerly Known As MCP-Server-AIDD
41
42
 
@@ -98,7 +99,8 @@ If you're using SkyDeck AI Helper app, you can search for "SkyDeckAI Code" and i
98
99
  - Code content searching with regex pattern matching
99
100
  - Multi-language code execution with safety measures
100
101
  - Git operations (status, diff, commit, branch management, cloning)
101
- - Web content fetching from APIs and websites
102
+ - Web content fetching from APIs and websites with HTML-to-markdown conversion
103
+ - Multi-engine web search with reliable fallback mechanisms
102
104
  - Batch operations for parallel and serial tool execution
103
105
  - Security controls with configurable workspace boundaries
104
106
  - Screenshot and screen context tools
@@ -571,6 +573,53 @@ skydeckai-code-cli --tool web_fetch --args '{
571
573
  }'
572
574
  ```
573
575
 
576
+ #### web_search
577
+
578
+ Performs a robust web search using multiple search engines and returns concise, relevant results.
579
+
580
+ ```json
581
+ {
582
+ "query": "latest python release features",
583
+ "num_results": 8,
584
+ "convert_html_to_markdown": true,
585
+ "search_engine": "bing"
586
+ }
587
+ ```
588
+
589
+ **Parameters:**
590
+ | Parameter | Type | Required | Description |
591
+ |-----------|---------|----------|---------------------------------------|
592
+ | query | string | Yes | The search query to process. Be specific for better results. |
593
+ | num_results | integer | No | Maximum number of search results to return (default: 10, max: 20) |
594
+ | convert_html_to_markdown | boolean | No | When true, content will be converted from HTML to markdown for better readability (default: true) |
595
+ | search_engine | string | No | Specifies which search engine to use: "auto" (default), "bing", or "duckduckgo" |
596
+
597
+ **Returns:**
598
+ A list of search results formatted in markdown, including titles, URLs, and snippets for each result. Results are deduplicated and organized hierarchically for easy reading.
599
+
600
+ This tool uses a multi-engine approach that tries different search engines with various parsing strategies to ensure reliable results. You can specify a preferred engine, but some engines may block automated access, in which case the tool will fall back to alternative engines when "auto" is selected.
601
+
602
+ **Example Usage:**
603
+
604
+ ```bash
605
+ # Search with default settings (auto engine selection)
606
+ skydeckai-code-cli --tool web_search --args '{
607
+ "query": "latest python release features"
608
+ }'
609
+
610
+ # Try DuckDuckGo if you want alternative results
611
+ skydeckai-code-cli --tool web_search --args '{
612
+ "query": "machine learning frameworks comparison",
613
+ "search_engine": "duckduckgo"
614
+ }'
615
+
616
+ # Use Bing for reliable results
617
+ skydeckai-code-cli --tool web_search --args '{
618
+ "query": "best programming practices 2023",
619
+ "search_engine": "bing"
620
+ }'
621
+ ```
622
+
574
623
  ### Utility Tools
575
624
 
576
625
  #### batch_tools
@@ -1,7 +1,7 @@
1
1
  aidd/__init__.py,sha256=c9HBWxWruCxoAqLCJqltylAwz_7xmaK3g8DKViJZs0Q,222
2
2
  aidd/cli.py,sha256=cLtaQJmMBfr7fHkd0dyJqpDrVTIwybL48PotniWGrFM,5031
3
3
  aidd/server.py,sha256=kPRyWeWkMCZjabelC65XTmzZG7yw8htMJKSfnUcKnb0,1575
4
- aidd/tools/__init__.py,sha256=Ang69KUss3OzXQdJ8Wyabo-5WNMRB0gmlPlPbrKprg8,5214
4
+ aidd/tools/__init__.py,sha256=Oyl9YzMB1SRT_skxMcWHPP7ScTjfBSgT3N4ctGxHMAI,5310
5
5
  aidd/tools/base.py,sha256=wHSAaGGYWM8ECmoYd7KEcmjsZRWesNQFf3zMjCKGMcc,380
6
6
  aidd/tools/code_analysis.py,sha256=fDpm2o_If5PsngXzHN2-ezSkPVT0ZxivLuzmHrOAmVU,33188
7
7
  aidd/tools/code_execution.py,sha256=dIPxHBtclsetDZY4jGlSBrw_t-7VlIVrK8mflnZ6c4w,13176
@@ -17,9 +17,9 @@ aidd/tools/path_tools.py,sha256=RGoOhqP69eHJzM8tEgn_5-GRaR0gp25fd0XZIJ_RnQE,4045
17
17
  aidd/tools/screenshot_tool.py,sha256=NMO5B4UG8qfMEOMRd2YoOjtwz_oQ2y1UAGU22jV1yGU,46337
18
18
  aidd/tools/state.py,sha256=RWSw0Jfsui8FqC0xsI7Ik07tAg35hRwLHa5xGBVbiI4,1493
19
19
  aidd/tools/system_tools.py,sha256=H4_qveKC2HA7SIbi-j4vxA0W4jYh2wfu9A6ni5wkZyA,7249
20
- aidd/tools/web_tools.py,sha256=DEovG7ZK2ER0S8YY0He5AsAfYMDpwLeqENEbuOhuEpY,8562
21
- skydeckai_code-0.1.27.dist-info/METADATA,sha256=-_y9BgFYwFNJmIgUUmoLAJkaOiBQvC8paZiGhhwV3ZA,29177
22
- skydeckai_code-0.1.27.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
- skydeckai_code-0.1.27.dist-info/entry_points.txt,sha256=cT-IHh3_ioGLk3kwIeqj1X6Li1dnJinX9qKWUl7nOLg,80
24
- skydeckai_code-0.1.27.dist-info/licenses/LICENSE,sha256=uHse04vmI6ZjW7TblegFl30X-sDyyF0-QvH8ItPca3c,10865
25
- skydeckai_code-0.1.27.dist-info/RECORD,,
20
+ aidd/tools/web_tools.py,sha256=gdsj2DEVYb_oYChItK5I1ugt2w25U7IAa5kEw9q6MVg,35534
21
+ skydeckai_code-0.1.28.dist-info/METADATA,sha256=TxHt3pJB_Cjo9u49Y-tBgS7-oii2v_rMx-6bT5VohyU,31300
22
+ skydeckai_code-0.1.28.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
23
+ skydeckai_code-0.1.28.dist-info/entry_points.txt,sha256=cT-IHh3_ioGLk3kwIeqj1X6Li1dnJinX9qKWUl7nOLg,80
24
+ skydeckai_code-0.1.28.dist-info/licenses/LICENSE,sha256=uHse04vmI6ZjW7TblegFl30X-sDyyF0-QvH8ItPca3c,10865
25
+ skydeckai_code-0.1.28.dist-info/RECORD,,