signalwire-agents 0.1.37__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. signalwire_agents/__init__.py +1 -1
  2. signalwire_agents/cli/build_search.py +95 -19
  3. signalwire_agents/core/agent_base.py +38 -0
  4. signalwire_agents/core/mixins/ai_config_mixin.py +120 -0
  5. signalwire_agents/core/skill_manager.py +47 -0
  6. signalwire_agents/search/index_builder.py +105 -10
  7. signalwire_agents/search/pgvector_backend.py +523 -0
  8. signalwire_agents/search/search_engine.py +41 -4
  9. signalwire_agents/search/search_service.py +86 -35
  10. signalwire_agents/skills/api_ninjas_trivia/skill.py +37 -1
  11. signalwire_agents/skills/datasphere/skill.py +82 -0
  12. signalwire_agents/skills/datasphere_serverless/skill.py +82 -0
  13. signalwire_agents/skills/joke/skill.py +21 -0
  14. signalwire_agents/skills/mcp_gateway/skill.py +82 -0
  15. signalwire_agents/skills/native_vector_search/README.md +210 -0
  16. signalwire_agents/skills/native_vector_search/skill.py +197 -7
  17. signalwire_agents/skills/play_background_file/skill.py +36 -0
  18. signalwire_agents/skills/registry.py +36 -0
  19. signalwire_agents/skills/spider/skill.py +113 -0
  20. signalwire_agents/skills/swml_transfer/skill.py +90 -0
  21. signalwire_agents/skills/weather_api/skill.py +28 -0
  22. signalwire_agents/skills/wikipedia_search/skill.py +22 -0
  23. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/METADATA +53 -1
  24. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/RECORD +28 -26
  25. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/WHEEL +0 -0
  26. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/entry_points.txt +0 -0
  27. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/licenses/LICENSE +0 -0
  28. {signalwire_agents-0.1.37.dist-info → signalwire_agents-0.1.38.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,210 @@
1
+ # Native Vector Search Skill
2
+
3
+ The Native Vector Search skill provides document search capabilities using vector similarity and keyword search. It supports multiple storage backends including SQLite (local files) and PostgreSQL with pgvector extension.
4
+
5
+ ## Features
6
+
7
+ - **Hybrid Search**: Combines vector similarity and keyword search for better results
8
+ - **Multiple Backends**: SQLite for local deployment, pgvector for scalable production use
9
+ - **Remote Search**: Connect to remote search servers
10
+ - **Auto-indexing**: Automatically build indexes from source directories
11
+ - **NLP Enhancement**: Query expansion and synonym matching
12
+ - **Tag Filtering**: Filter results by document tags
13
+
14
+ ## Backends
15
+
16
+ ### SQLite Backend (Default)
17
+ - Stores indexes in `.swsearch` files
18
+ - Good for single-agent deployments
19
+ - Portable and self-contained
20
+ - No external dependencies
21
+
22
+ ### pgvector Backend
23
+ - Uses PostgreSQL with pgvector extension
24
+ - Scalable for multi-agent deployments
25
+ - Real-time updates capability
26
+ - Efficient similarity search with specialized indexes
27
+
28
+ ### Remote Search Server
29
+ - Connect to centralized search API
30
+ - Lower memory usage per agent
31
+ - Shared knowledge base
32
+
33
+ ## Configuration Parameters
34
+
35
+ ### Basic Parameters
36
+ - `tool_name`: Name of the search tool (default: "search_knowledge")
37
+ - `description`: Tool description for the AI
38
+ - `count`: Number of results to return (default: 5)
39
+ - `distance_threshold`: Minimum similarity score (default: 0.0)
40
+ - `tags`: Filter results by these tags
41
+
42
+ ### Backend Selection
43
+ - `backend`: Storage backend - "sqlite" or "pgvector" (default: "sqlite")
44
+
45
+ ### SQLite Backend
46
+ - `index_file`: Path to .swsearch index file
47
+ - `build_index`: Auto-build index from source (default: false)
48
+ - `source_dir`: Directory to index if build_index=true
49
+
50
+ ### pgvector Backend
51
+ - `connection_string`: PostgreSQL connection string (required)
52
+ - `collection_name`: Name of the collection to search (required)
53
+
54
+ ### Remote Backend
55
+ - `remote_url`: URL of remote search server
56
+ - `index_name`: Name of index on remote server
57
+
58
+ ### Response Formatting
59
+ - `response_prefix`: Text to prepend to results
60
+ - `response_postfix`: Text to append to results
61
+ - `no_results_message`: Message when no results found
62
+
63
+ ### NLP Configuration
64
+ - `query_nlp_backend`: NLP backend for queries ("nltk" or "spacy")
65
+ - `index_nlp_backend`: NLP backend for indexing ("nltk" or "spacy")
66
+
67
+ ## Usage Examples
68
+
69
+ ### SQLite Backend (Local File)
70
+ ```python
71
+ agent.add_skill("native_vector_search", {
72
+ "tool_name": "search_docs",
73
+ "description": "Search technical documentation",
74
+ "index_file": "docs.swsearch",
75
+ "count": 5
76
+ })
77
+ ```
78
+
79
+ ### pgvector Backend (PostgreSQL)
80
+ ```python
81
+ agent.add_skill("native_vector_search", {
82
+ "tool_name": "search_knowledge",
83
+ "description": "Search the knowledge base",
84
+ "backend": "pgvector",
85
+ "connection_string": "postgresql://user:pass@localhost:5432/knowledge",
86
+ "collection_name": "docs_collection",
87
+ "count": 5
88
+ })
89
+ ```
90
+
91
+ ### Remote Search Server
92
+ ```python
93
+ agent.add_skill("native_vector_search", {
94
+ "tool_name": "search_api",
95
+ "description": "Search API documentation",
96
+ "remote_url": "http://search-server:8001",
97
+ "index_name": "api_docs"
98
+ })
99
+ ```
100
+
101
+ ### Auto-build Index
102
+ ```python
103
+ agent.add_skill("native_vector_search", {
104
+ "tool_name": "search_local",
105
+ "build_index": True,
106
+ "source_dir": "./documentation",
107
+ "file_types": ["md", "txt"],
108
+ "index_file": "auto_docs.swsearch"
109
+ })
110
+ ```
111
+
112
+ ### Multiple Search Instances
113
+ ```python
114
+ # Documentation search
115
+ agent.add_skill("native_vector_search", {
116
+ "tool_name": "search_docs",
117
+ "index_file": "docs.swsearch",
118
+ "description": "Search documentation"
119
+ })
120
+
121
+ # Code examples search
122
+ agent.add_skill("native_vector_search", {
123
+ "tool_name": "search_examples",
124
+ "backend": "pgvector",
125
+ "connection_string": "postgresql://localhost/knowledge",
126
+ "collection_name": "examples",
127
+ "description": "Search code examples"
128
+ })
129
+ ```
130
+
131
+ ## Installation
132
+
133
+ ### For SQLite Backend
134
+ ```bash
135
+ pip install signalwire-agents[search]
136
+ ```
137
+
138
+ ### For pgvector Backend
139
+ ```bash
140
+ pip install signalwire-agents[search,pgvector]
141
+ ```
142
+
143
+ ### For All Features
144
+ ```bash
145
+ pip install signalwire-agents[search-all]
146
+ ```
147
+
148
+ ## Building Indexes
149
+
150
+ ### Using sw-search CLI
151
+
152
+ #### SQLite Backend
153
+ ```bash
154
+ sw-search ./docs --output docs.swsearch
155
+ ```
156
+
157
+ #### pgvector Backend
158
+ ```bash
159
+ sw-search ./docs \
160
+ --backend pgvector \
161
+ --connection-string "postgresql://localhost/knowledge" \
162
+ --output docs_collection
163
+ ```
164
+
165
+ ## Performance Considerations
166
+
167
+ ### SQLite
168
+ - Fast for small to medium datasets (<100k documents)
169
+ - Linear search for vector similarity
170
+ - Single-file deployment
171
+
172
+ ### pgvector
173
+ - Efficient for large datasets
174
+ - Uses IVFFlat or HNSW indexes
175
+ - Handles concurrent access well
176
+ - Requires PostgreSQL server
177
+
178
+ ### NLP Backends
179
+ - `nltk`: Fast, good for most use cases (~50-100ms)
180
+ - `spacy`: Better quality, slower (~150-300ms)
181
+
182
+ ## Environment Variables
183
+
184
+ None required - all configuration comes through skill parameters.
185
+
186
+ ## Troubleshooting
187
+
188
+ ### "Search dependencies not available"
189
+ Install the search extras:
190
+ ```bash
191
+ pip install signalwire-agents[search]
192
+ ```
193
+
194
+ ### "pgvector dependencies not available"
195
+ Install pgvector support:
196
+ ```bash
197
+ pip install signalwire-agents[pgvector]
198
+ ```
199
+
200
+ ### "Failed to connect to pgvector"
201
+ 1. Ensure PostgreSQL is running
202
+ 2. Check connection string
203
+ 3. Verify pgvector extension is installed
204
+ 4. Check collection exists
205
+
206
+ ### Poor Search Results
207
+ 1. Try different NLP backends
208
+ 2. Adjust distance_threshold
209
+ 3. Check document preprocessing
210
+ 4. Verify index quality
@@ -28,6 +28,174 @@ class NativeVectorSearchSkill(SkillBase):
28
28
  # Enable multiple instances support
29
29
  SUPPORTS_MULTIPLE_INSTANCES = True
30
30
 
31
+ @classmethod
32
+ def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
33
+ """Get parameter schema for Native Vector Search skill
34
+
35
+ This skill supports three modes of operation:
36
+ 1. Network Mode: Set 'remote_url' to connect to a remote search server
37
+ 2. Local pgvector: Set backend='pgvector' with connection_string and collection_name
38
+ 3. Local SQLite: Set 'index_file' to use a local .swsearch file (default)
39
+ """
40
+ schema = super().get_parameter_schema()
41
+ schema.update({
42
+ "index_file": {
43
+ "type": "string",
44
+ "description": "Path to .swsearch index file (SQLite backend only). Use this for local file-based search",
45
+ "required": False
46
+ },
47
+ "build_index": {
48
+ "type": "boolean",
49
+ "description": "Whether to build index from source files",
50
+ "default": False,
51
+ "required": False
52
+ },
53
+ "source_dir": {
54
+ "type": "string",
55
+ "description": "Directory containing documents to index (required if build_index=True)",
56
+ "required": False
57
+ },
58
+ "remote_url": {
59
+ "type": "string",
60
+ "description": "URL of remote search server for network mode (e.g., http://localhost:8001). Use this instead of index_file or pgvector for centralized search",
61
+ "required": False
62
+ },
63
+ "index_name": {
64
+ "type": "string",
65
+ "description": "Name of index on remote server (network mode only, used with remote_url)",
66
+ "default": "default",
67
+ "required": False
68
+ },
69
+ "count": {
70
+ "type": "integer",
71
+ "description": "Number of search results to return",
72
+ "default": 5,
73
+ "required": False,
74
+ "minimum": 1,
75
+ "maximum": 20
76
+ },
77
+ "distance_threshold": {
78
+ "type": "number",
79
+ "description": "Maximum distance threshold for results (0.0 = no limit)",
80
+ "default": 0.0,
81
+ "required": False,
82
+ "minimum": 0.0,
83
+ "maximum": 1.0
84
+ },
85
+ "tags": {
86
+ "type": "array",
87
+ "description": "Tags to filter search results",
88
+ "default": [],
89
+ "required": False,
90
+ "items": {
91
+ "type": "string"
92
+ }
93
+ },
94
+ "global_tags": {
95
+ "type": "array",
96
+ "description": "Tags to apply to all indexed documents",
97
+ "default": [],
98
+ "required": False,
99
+ "items": {
100
+ "type": "string"
101
+ }
102
+ },
103
+ "file_types": {
104
+ "type": "array",
105
+ "description": "File extensions to include when building index",
106
+ "default": ["md", "txt", "pdf", "docx", "html"],
107
+ "required": False,
108
+ "items": {
109
+ "type": "string"
110
+ }
111
+ },
112
+ "exclude_patterns": {
113
+ "type": "array",
114
+ "description": "Patterns to exclude when building index",
115
+ "default": ["**/node_modules/**", "**/.git/**", "**/dist/**", "**/build/**"],
116
+ "required": False,
117
+ "items": {
118
+ "type": "string"
119
+ }
120
+ },
121
+ "no_results_message": {
122
+ "type": "string",
123
+ "description": "Message when no results are found",
124
+ "default": "No information found for '{query}'",
125
+ "required": False
126
+ },
127
+ "response_prefix": {
128
+ "type": "string",
129
+ "description": "Prefix to add to search results",
130
+ "default": "",
131
+ "required": False
132
+ },
133
+ "response_postfix": {
134
+ "type": "string",
135
+ "description": "Postfix to add to search results",
136
+ "default": "",
137
+ "required": False
138
+ },
139
+ "description": {
140
+ "type": "string",
141
+ "description": "Tool description",
142
+ "default": "Search the knowledge base for information",
143
+ "required": False
144
+ },
145
+ "hints": {
146
+ "type": "array",
147
+ "description": "Speech recognition hints",
148
+ "default": [],
149
+ "required": False,
150
+ "items": {
151
+ "type": "string"
152
+ }
153
+ },
154
+ "nlp_backend": {
155
+ "type": "string",
156
+ "description": "NLP backend for query processing",
157
+ "default": "basic",
158
+ "required": False,
159
+ "enum": ["basic", "spacy", "nltk"]
160
+ },
161
+ "query_nlp_backend": {
162
+ "type": "string",
163
+ "description": "NLP backend for query expansion",
164
+ "required": False,
165
+ "enum": ["basic", "spacy", "nltk"]
166
+ },
167
+ "index_nlp_backend": {
168
+ "type": "string",
169
+ "description": "NLP backend for indexing",
170
+ "required": False,
171
+ "enum": ["basic", "spacy", "nltk"]
172
+ },
173
+ "backend": {
174
+ "type": "string",
175
+ "description": "Storage backend for local database mode: 'sqlite' for file-based or 'pgvector' for PostgreSQL. Ignored if remote_url is set",
176
+ "default": "sqlite",
177
+ "required": False,
178
+ "enum": ["sqlite", "pgvector"]
179
+ },
180
+ "connection_string": {
181
+ "type": "string",
182
+ "description": "PostgreSQL connection string (pgvector backend only, e.g., 'postgresql://user:pass@localhost:5432/dbname'). Required when backend='pgvector'",
183
+ "required": False
184
+ },
185
+ "collection_name": {
186
+ "type": "string",
187
+ "description": "Collection/table name in PostgreSQL (pgvector backend only). Required when backend='pgvector'",
188
+ "required": False
189
+ },
190
+ "verbose": {
191
+ "type": "boolean",
192
+ "description": "Enable verbose logging",
193
+ "default": False,
194
+ "required": False
195
+ }
196
+ })
197
+ return schema
198
+
31
199
  def get_instance_key(self) -> str:
32
200
  """
33
201
  Get the key used to track this skill instance
@@ -43,6 +211,9 @@ class NativeVectorSearchSkill(SkillBase):
43
211
 
44
212
  # Get configuration first
45
213
  self.tool_name = self.params.get('tool_name', 'search_knowledge')
214
+ self.backend = self.params.get('backend', 'sqlite')
215
+ self.connection_string = self.params.get('connection_string')
216
+ self.collection_name = self.params.get('collection_name')
46
217
  self.index_file = self.params.get('index_file')
47
218
  self.build_index = self.params.get('build_index', False)
48
219
  self.source_dir = self.params.get('source_dir')
@@ -153,13 +324,32 @@ class NativeVectorSearchSkill(SkillBase):
153
324
 
154
325
  # Initialize local search engine
155
326
  self.search_engine = None
156
- if self.search_available and self.index_file and os.path.exists(self.index_file):
157
- try:
158
- from signalwire_agents.search import SearchEngine
159
- self.search_engine = SearchEngine(self.index_file)
160
- except Exception as e:
161
- self.logger.error(f"Failed to load search index {self.index_file}: {e}")
162
- self.search_available = False
327
+ if self.search_available:
328
+ if self.backend == 'pgvector':
329
+ # Initialize pgvector backend
330
+ if self.connection_string and self.collection_name:
331
+ try:
332
+ from signalwire_agents.search import SearchEngine
333
+ self.search_engine = SearchEngine(
334
+ backend='pgvector',
335
+ connection_string=self.connection_string,
336
+ collection_name=self.collection_name
337
+ )
338
+ self.logger.info(f"Connected to pgvector collection: {self.collection_name}")
339
+ except Exception as e:
340
+ self.logger.error(f"Failed to connect to pgvector: {e}")
341
+ self.search_available = False
342
+ else:
343
+ self.logger.error("pgvector backend requires connection_string and collection_name")
344
+ self.search_available = False
345
+ elif self.index_file and os.path.exists(self.index_file):
346
+ # Initialize SQLite backend
347
+ try:
348
+ from signalwire_agents.search import SearchEngine
349
+ self.search_engine = SearchEngine(backend='sqlite', index_path=self.index_file)
350
+ except Exception as e:
351
+ self.logger.error(f"Failed to load search index {self.index_file}: {e}")
352
+ self.search_available = False
163
353
 
164
354
  return True
165
355
 
@@ -48,6 +48,42 @@ class PlayBackgroundFileSkill(SkillBase):
48
48
  SKILL_DESCRIPTION = "Control background file playback"
49
49
  SUPPORTS_MULTIPLE_INSTANCES = True
50
50
 
51
+ @classmethod
52
+ def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
53
+ """Get parameter schema for Play Background File skill"""
54
+ schema = super().get_parameter_schema()
55
+ schema.update({
56
+ "files": {
57
+ "type": "array",
58
+ "description": "Array of file configurations to make available for playback",
59
+ "required": True,
60
+ "items": {
61
+ "type": "object",
62
+ "properties": {
63
+ "key": {
64
+ "type": "string",
65
+ "description": "Unique identifier for the file"
66
+ },
67
+ "description": {
68
+ "type": "string",
69
+ "description": "Human-readable description of the file"
70
+ },
71
+ "url": {
72
+ "type": "string",
73
+ "description": "URL of the audio/video file to play"
74
+ },
75
+ "wait": {
76
+ "type": "boolean",
77
+ "description": "Whether to wait for file to finish playing",
78
+ "default": False
79
+ }
80
+ },
81
+ "required": ["key", "description", "url"]
82
+ }
83
+ }
84
+ })
85
+ return schema
86
+
51
87
  def __init__(self, agent, params: Dict[str, Any] = None):
52
88
  """
53
89
  Initialize the skill with configuration parameters.
@@ -128,6 +128,42 @@ class SkillRegistry:
128
128
  if not hasattr(skill_class, 'SKILL_NAME') or skill_class.SKILL_NAME is None:
129
129
  raise ValueError(f"{skill_class} must define SKILL_NAME")
130
130
 
131
+ # Validate that the skill has a proper parameter schema
132
+ if not hasattr(skill_class, 'get_parameter_schema') or not callable(getattr(skill_class, 'get_parameter_schema')):
133
+ raise ValueError(f"{skill_class.__name__} must have get_parameter_schema() classmethod")
134
+
135
+ # Try to call get_parameter_schema to ensure it's properly implemented
136
+ try:
137
+ schema = skill_class.get_parameter_schema()
138
+ if not isinstance(schema, dict):
139
+ raise ValueError(f"{skill_class.__name__}.get_parameter_schema() must return a dictionary, got {type(schema)}")
140
+
141
+ # Ensure it's not an empty schema (skills should at least have the base parameters)
142
+ if not schema:
143
+ raise ValueError(f"{skill_class.__name__}.get_parameter_schema() returned an empty dictionary. Skills should at least call super().get_parameter_schema()")
144
+
145
+ # Check if the skill has overridden the method (not just inherited base)
146
+ skill_method = getattr(skill_class, 'get_parameter_schema', None)
147
+ base_method = getattr(SkillBase, 'get_parameter_schema', None)
148
+
149
+ if skill_method and base_method:
150
+ # For class methods, check the underlying function
151
+ skill_func = skill_method.__func__ if hasattr(skill_method, '__func__') else skill_method
152
+ base_func = base_method.__func__ if hasattr(base_method, '__func__') else base_method
153
+
154
+ if skill_func is base_func:
155
+ # Get base schema to check if skill added any parameters
156
+ base_schema = SkillBase.get_parameter_schema()
157
+ if set(schema.keys()) == set(base_schema.keys()):
158
+ raise ValueError(f"{skill_class.__name__} must override get_parameter_schema() to define its specific parameters")
159
+
160
+ except AttributeError as e:
161
+ raise ValueError(f"{skill_class.__name__} must properly implement get_parameter_schema() classmethod")
162
+ except ValueError:
163
+ raise # Re-raise our validation errors
164
+ except Exception as e:
165
+ raise ValueError(f"{skill_class.__name__}.get_parameter_schema() failed: {e}")
166
+
131
167
  if skill_class.SKILL_NAME in self._skills:
132
168
  self.logger.warning(f"Skill '{skill_class.SKILL_NAME}' already registered")
133
169
  return
@@ -33,6 +33,119 @@ class SpiderSkill(SkillBase):
33
33
  # Compiled regex for performance
34
34
  WHITESPACE_REGEX = re.compile(r'\s+')
35
35
 
36
+ @classmethod
37
+ def get_parameter_schema(cls) -> Dict[str, Dict[str, Any]]:
38
+ """Get parameter schema for Spider skill"""
39
+ schema = super().get_parameter_schema()
40
+ schema.update({
41
+ "delay": {
42
+ "type": "number",
43
+ "description": "Delay between requests in seconds",
44
+ "default": 0.1,
45
+ "required": False,
46
+ "minimum": 0.0
47
+ },
48
+ "concurrent_requests": {
49
+ "type": "integer",
50
+ "description": "Number of concurrent requests allowed",
51
+ "default": 5,
52
+ "required": False,
53
+ "minimum": 1,
54
+ "maximum": 20
55
+ },
56
+ "timeout": {
57
+ "type": "integer",
58
+ "description": "Request timeout in seconds",
59
+ "default": 5,
60
+ "required": False,
61
+ "minimum": 1,
62
+ "maximum": 60
63
+ },
64
+ "max_pages": {
65
+ "type": "integer",
66
+ "description": "Maximum number of pages to scrape",
67
+ "default": 1,
68
+ "required": False,
69
+ "minimum": 1,
70
+ "maximum": 100
71
+ },
72
+ "max_depth": {
73
+ "type": "integer",
74
+ "description": "Maximum crawl depth (0 = single page only)",
75
+ "default": 0,
76
+ "required": False,
77
+ "minimum": 0,
78
+ "maximum": 5
79
+ },
80
+ "extract_type": {
81
+ "type": "string",
82
+ "description": "Content extraction method",
83
+ "default": "fast_text",
84
+ "required": False,
85
+ "enum": ["fast_text", "clean_text", "full_text", "html", "custom"]
86
+ },
87
+ "max_text_length": {
88
+ "type": "integer",
89
+ "description": "Maximum text length to return",
90
+ "default": 10000,
91
+ "required": False,
92
+ "minimum": 100,
93
+ "maximum": 100000
94
+ },
95
+ "clean_text": {
96
+ "type": "boolean",
97
+ "description": "Whether to clean extracted text",
98
+ "default": True,
99
+ "required": False
100
+ },
101
+ "selectors": {
102
+ "type": "object",
103
+ "description": "Custom CSS/XPath selectors for extraction",
104
+ "default": {},
105
+ "required": False,
106
+ "additionalProperties": {
107
+ "type": "string"
108
+ }
109
+ },
110
+ "follow_patterns": {
111
+ "type": "array",
112
+ "description": "URL patterns to follow when crawling",
113
+ "default": [],
114
+ "required": False,
115
+ "items": {
116
+ "type": "string"
117
+ }
118
+ },
119
+ "user_agent": {
120
+ "type": "string",
121
+ "description": "User agent string for requests",
122
+ "default": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
123
+ "required": False
124
+ },
125
+ "headers": {
126
+ "type": "object",
127
+ "description": "Additional HTTP headers",
128
+ "default": {},
129
+ "required": False,
130
+ "additionalProperties": {
131
+ "type": "string"
132
+ }
133
+ },
134
+ "follow_robots_txt": {
135
+ "type": "boolean",
136
+ "description": "Whether to respect robots.txt",
137
+ "default": True,
138
+ "required": False
139
+ },
140
+ "cache_enabled": {
141
+ "type": "boolean",
142
+ "description": "Whether to cache scraped pages",
143
+ "default": True,
144
+ "required": False
145
+ }
146
+ })
147
+ return schema
148
+
36
149
  def __init__(self, agent, params: Dict[str, Any]):
37
150
  """Initialize the spider skill with configuration parameters."""
38
151
  super().__init__(agent, params)