signalwire-agents 0.1.23__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. signalwire_agents/__init__.py +1 -1
  2. signalwire_agents/agent_server.py +2 -1
  3. signalwire_agents/cli/config.py +61 -0
  4. signalwire_agents/cli/core/__init__.py +1 -0
  5. signalwire_agents/cli/core/agent_loader.py +254 -0
  6. signalwire_agents/cli/core/argparse_helpers.py +164 -0
  7. signalwire_agents/cli/core/dynamic_config.py +62 -0
  8. signalwire_agents/cli/execution/__init__.py +1 -0
  9. signalwire_agents/cli/execution/datamap_exec.py +437 -0
  10. signalwire_agents/cli/execution/webhook_exec.py +125 -0
  11. signalwire_agents/cli/output/__init__.py +1 -0
  12. signalwire_agents/cli/output/output_formatter.py +132 -0
  13. signalwire_agents/cli/output/swml_dump.py +177 -0
  14. signalwire_agents/cli/simulation/__init__.py +1 -0
  15. signalwire_agents/cli/simulation/data_generation.py +365 -0
  16. signalwire_agents/cli/simulation/data_overrides.py +187 -0
  17. signalwire_agents/cli/simulation/mock_env.py +271 -0
  18. signalwire_agents/cli/test_swaig.py +522 -2539
  19. signalwire_agents/cli/types.py +72 -0
  20. signalwire_agents/core/agent/__init__.py +1 -3
  21. signalwire_agents/core/agent/config/__init__.py +1 -3
  22. signalwire_agents/core/agent/prompt/manager.py +25 -7
  23. signalwire_agents/core/agent/tools/decorator.py +2 -0
  24. signalwire_agents/core/agent/tools/registry.py +8 -0
  25. signalwire_agents/core/agent_base.py +492 -3053
  26. signalwire_agents/core/function_result.py +31 -42
  27. signalwire_agents/core/mixins/__init__.py +28 -0
  28. signalwire_agents/core/mixins/ai_config_mixin.py +373 -0
  29. signalwire_agents/core/mixins/auth_mixin.py +287 -0
  30. signalwire_agents/core/mixins/prompt_mixin.py +345 -0
  31. signalwire_agents/core/mixins/serverless_mixin.py +368 -0
  32. signalwire_agents/core/mixins/skill_mixin.py +55 -0
  33. signalwire_agents/core/mixins/state_mixin.py +219 -0
  34. signalwire_agents/core/mixins/tool_mixin.py +295 -0
  35. signalwire_agents/core/mixins/web_mixin.py +1130 -0
  36. signalwire_agents/core/skill_manager.py +3 -1
  37. signalwire_agents/core/swaig_function.py +10 -1
  38. signalwire_agents/core/swml_service.py +140 -58
  39. signalwire_agents/skills/README.md +452 -0
  40. signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
  41. signalwire_agents/skills/datasphere/README.md +210 -0
  42. signalwire_agents/skills/datasphere_serverless/README.md +258 -0
  43. signalwire_agents/skills/datetime/README.md +132 -0
  44. signalwire_agents/skills/joke/README.md +149 -0
  45. signalwire_agents/skills/math/README.md +161 -0
  46. signalwire_agents/skills/native_vector_search/skill.py +33 -13
  47. signalwire_agents/skills/play_background_file/README.md +218 -0
  48. signalwire_agents/skills/spider/README.md +236 -0
  49. signalwire_agents/skills/spider/__init__.py +4 -0
  50. signalwire_agents/skills/spider/skill.py +479 -0
  51. signalwire_agents/skills/swml_transfer/README.md +395 -0
  52. signalwire_agents/skills/swml_transfer/__init__.py +1 -0
  53. signalwire_agents/skills/swml_transfer/skill.py +257 -0
  54. signalwire_agents/skills/weather_api/README.md +178 -0
  55. signalwire_agents/skills/web_search/README.md +163 -0
  56. signalwire_agents/skills/wikipedia_search/README.md +228 -0
  57. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/METADATA +47 -2
  58. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/RECORD +62 -22
  59. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/entry_points.txt +1 -1
  60. signalwire_agents/core/agent/config/ephemeral.py +0 -176
  61. signalwire_agents-0.1.23.data/data/schema.json +0 -5611
  62. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/WHEEL +0 -0
  63. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/licenses/LICENSE +0 -0
  64. {signalwire_agents-0.1.23.dist-info → signalwire_agents-0.1.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,236 @@
1
+ # Spider Skill
2
+
3
+ Fast web scraping and crawling capabilities for SignalWire AI Agents. Optimized for speed and token efficiency with sub-second response times.
4
+
5
+ ## Features
6
+
7
+ - **Single page scraping** - Extract content from any web page in under 500ms
8
+ - **Multi-page crawling** - Follow links and crawl entire sections of websites
9
+ - **Structured data extraction** - Extract specific data using CSS/XPath selectors
10
+ - **Multiple output formats** - Plain text, markdown, or structured JSON
11
+ - **Smart text truncation** - Intelligently truncate long content while preserving key information
12
+ - **Response caching** - Cache pages to avoid redundant requests
13
+ - **Configurable crawling** - Control depth, page limits, and URL patterns
14
+
15
+ ## Installation
16
+
17
+ ```python
18
+ # Basic usage with defaults (single page scraping)
19
+ agent.add_skill("spider")
20
+
21
+ # Custom configuration
22
+ agent.add_skill("spider", {
23
+ "delay": 0.5,
24
+ "max_pages": 10,
25
+ "max_depth": 2
26
+ })
27
+ ```
28
+
29
+ ## Configuration Parameters
30
+
31
+ | Parameter | Type | Default | Description |
32
+ |-----------|------|---------|-------------|
33
+ | `delay` | float | 0.1 | Seconds between requests |
34
+ | `concurrent_requests` | int | 5 | Number of parallel requests |
35
+ | `timeout` | int | 5 | Request timeout in seconds |
36
+ | `max_pages` | int | 1 | Maximum pages to crawl |
37
+ | `max_depth` | int | 0 | How many links deep to crawl |
38
+ | `extract_type` | string | "fast_text" | Default extraction method |
39
+ | `max_text_length` | int | 3000 | Maximum characters per page |
40
+ | `clean_text` | bool | True | Remove extra whitespace |
41
+ | `cache_enabled` | bool | True | Enable response caching |
42
+ | `follow_robots_txt` | bool | False | Respect robots.txt |
43
+ | `user_agent` | string | "Spider/1.0" | User agent string |
44
+ | `headers` | dict | {} | Additional HTTP headers |
45
+
46
+ ## Available Tools
47
+
48
+ ### scrape_url
49
+
50
+ Extract text content from a single web page.
51
+
52
+ **Parameters:**
53
+ - `url` (required): The URL to scrape
54
+ - `extract_type` (optional): "fast_text", "markdown", or "structured"
55
+ - `selectors` (optional): CSS/XPath selectors for specific elements
56
+
57
+ **Examples:**
58
+ ```
59
+ "Please get the content from https://example.com/article"
60
+ "Scrape the main text from https://docs.example.com in markdown format"
61
+ "Extract the product price from this page using the .price selector"
62
+ ```
63
+
64
+ ### crawl_site
65
+
66
+ Crawl multiple pages starting from a URL.
67
+
68
+ **Parameters:**
69
+ - `start_url` (required): Starting URL for the crawl
70
+ - `max_depth` (optional): How many links deep to crawl
71
+ - `follow_patterns` (optional): List of regex patterns for URLs to follow
72
+ - `max_pages` (optional): Maximum pages to crawl
73
+
74
+ **Examples:**
75
+ ```
76
+ "Crawl the documentation starting from /docs with depth 2"
77
+ "Get all blog posts from the site, following only /blog/ URLs"
78
+ "Crawl up to 20 pages from their support section"
79
+ ```
80
+
81
+ ### extract_structured_data
82
+
83
+ Extract specific data from a web page using selectors.
84
+
85
+ **Parameters:**
86
+ - `url` (required): The URL to scrape
87
+ - `selectors` (required): Dictionary mapping field names to CSS/XPath selectors
88
+
89
+ **Examples:**
90
+ ```
91
+ "Extract the title, price, and description from this product page"
92
+ "Get all the email addresses and phone numbers from the contact page"
93
+ ```
94
+
95
+ ## Usage Examples
96
+
97
+ ### Basic Single Page Scraping (Default)
98
+ ```python
99
+ agent.add_skill("spider")
100
+ # AI can now: "Get the content from https://example.com"
101
+ ```
102
+
103
+ ### Documentation Crawling
104
+ ```python
105
+ agent.add_skill("spider", {
106
+ "max_pages": 50,
107
+ "max_depth": 3,
108
+ "delay": 1.0,
109
+ "extract_type": "markdown"
110
+ })
111
+ # AI can now: "Crawl the API documentation and summarize the endpoints"
112
+ ```
113
+
114
+ ### Fast News Aggregation
115
+ ```python
116
+ agent.add_skill("spider", {
117
+ "concurrent_requests": 10,
118
+ "delay": 0.05,
119
+ "max_pages": 20,
120
+ "max_text_length": 1000,
121
+ "cache_enabled": True
122
+ })
123
+ # AI can now: "Get the latest articles from the news section"
124
+ ```
125
+
126
+ ### Respectful External Scraping
127
+ ```python
128
+ agent.add_skill("spider", {
129
+ "delay": 2.0,
130
+ "concurrent_requests": 1,
131
+ "follow_robots_txt": True,
132
+ "user_agent": "MyBot/1.0 (contact@example.com)"
133
+ })
134
+ # AI can now: "Carefully scrape competitor pricing data"
135
+ ```
136
+
137
+ ### Multiple Spider Instances
138
+ ```python
139
+ # Fast spider for internal sites
140
+ agent.add_skill("spider", {
141
+ "tool_name": "fast_spider",
142
+ "delay": 0.1,
143
+ "concurrent_requests": 10
144
+ })
145
+
146
+ # Slow spider for external sites
147
+ agent.add_skill("spider", {
148
+ "tool_name": "polite_spider",
149
+ "delay": 2.0,
150
+ "concurrent_requests": 1,
151
+ "follow_robots_txt": True
152
+ })
153
+ # AI can now use: fast_spider_scrape_url() and polite_spider_scrape_url()
154
+ ```
155
+
156
+ ## Output Examples
157
+
158
+ ### Fast Text Output (Default)
159
+ ```
160
+ Content from https://example.com/article (2,456 characters):
161
+
162
+ How to Build Better Web Applications
163
+ Published on January 15, 2024
164
+
165
+ In this comprehensive guide, we'll explore modern techniques for building
166
+ scalable and maintainable web applications...
167
+
168
+ Key Topics:
169
+ - Architecture patterns
170
+ - Performance optimization
171
+ - Security best practices
172
+ - Testing strategies
173
+
174
+ [...CONTENT TRUNCATED...]
175
+
176
+ For more information, visit our documentation portal.
177
+ ```
178
+
179
+ ### Crawl Summary Output
180
+ ```
181
+ Crawled 5 pages from docs.example.com:
182
+
183
+ 1. https://docs.example.com/ (depth: 0, 3,456 chars)
184
+ Summary: Welcome to our documentation. This guide covers...
185
+
186
+ 2. https://docs.example.com/quickstart (depth: 1, 2,890 chars)
187
+ Summary: Quick Start Guide. Get up and running in 5 minutes...
188
+
189
+ 3. https://docs.example.com/api (depth: 1, 4,567 chars)
190
+ Summary: API Reference. Complete documentation of all endpoints...
191
+
192
+ Total content: 15,234 characters across 5 pages
193
+ ```
194
+
195
+ ## Performance Characteristics
196
+
197
+ - **Single page scrape**: ~300-500ms
198
+ - **10-page crawl**: ~2-3 seconds
199
+ - **Text extraction**: <50ms per page
200
+ - **Caching**: Subsequent requests ~10ms
201
+
202
+ ## Best Practices
203
+
204
+ 1. **Start with defaults** - The skill is optimized for single page scraping out of the box
205
+ 2. **Use caching** - Enabled by default, saves time on repeated requests
206
+ 3. **Set appropriate delays** - Be respectful of external sites (2+ seconds)
207
+ 4. **Limit crawl scope** - Use `max_pages` and `max_depth` to control crawl size
208
+ 5. **Use URL patterns** - Filter crawls with `follow_patterns` for focused results
209
+ 6. **Monitor performance** - Check logs for timing and error information
210
+
211
+ ## Limitations
212
+
213
+ - No JavaScript rendering (for speed)
214
+ - Basic text extraction only
215
+ - No authentication support
216
+ - No form submission
217
+ - Limited to HTML content
218
+ - No file downloads
219
+
220
+ ## Error Handling
221
+
222
+ The skill handles common errors gracefully:
223
+ - **Timeouts**: Returns partial content with timeout notice
224
+ - **HTTP errors**: Reports status code and error message
225
+ - **Invalid URLs**: Clear error message
226
+ - **Rate limiting**: Respects 429 status codes
227
+ - **Network errors**: Returns descriptive error message
228
+
229
+ ## Contributing
230
+
231
+ To enhance this skill:
232
+ 1. Keep performance as the top priority
233
+ 2. Maintain backward compatibility
234
+ 3. Add tests for new features
235
+ 4. Update this documentation
236
+ 5. Consider token efficiency in outputs
@@ -0,0 +1,4 @@
1
+ """Spider skill for web scraping."""
2
+ from .skill import SpiderSkill
3
+
4
+ __all__ = ["SpiderSkill"]