signalwire-agents 0.1.13__py3-none-any.whl → 1.0.17.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. signalwire_agents/__init__.py +99 -15
  2. signalwire_agents/agent_server.py +248 -60
  3. signalwire_agents/agents/bedrock.py +296 -0
  4. signalwire_agents/cli/__init__.py +9 -0
  5. signalwire_agents/cli/build_search.py +951 -41
  6. signalwire_agents/cli/config.py +80 -0
  7. signalwire_agents/cli/core/__init__.py +10 -0
  8. signalwire_agents/cli/core/agent_loader.py +470 -0
  9. signalwire_agents/cli/core/argparse_helpers.py +179 -0
  10. signalwire_agents/cli/core/dynamic_config.py +71 -0
  11. signalwire_agents/cli/core/service_loader.py +303 -0
  12. signalwire_agents/cli/dokku.py +2320 -0
  13. signalwire_agents/cli/execution/__init__.py +10 -0
  14. signalwire_agents/cli/execution/datamap_exec.py +446 -0
  15. signalwire_agents/cli/execution/webhook_exec.py +134 -0
  16. signalwire_agents/cli/init_project.py +2636 -0
  17. signalwire_agents/cli/output/__init__.py +10 -0
  18. signalwire_agents/cli/output/output_formatter.py +255 -0
  19. signalwire_agents/cli/output/swml_dump.py +186 -0
  20. signalwire_agents/cli/simulation/__init__.py +10 -0
  21. signalwire_agents/cli/simulation/data_generation.py +374 -0
  22. signalwire_agents/cli/simulation/data_overrides.py +200 -0
  23. signalwire_agents/cli/simulation/mock_env.py +282 -0
  24. signalwire_agents/cli/swaig_test_wrapper.py +52 -0
  25. signalwire_agents/cli/test_swaig.py +566 -2366
  26. signalwire_agents/cli/types.py +81 -0
  27. signalwire_agents/core/__init__.py +2 -2
  28. signalwire_agents/core/agent/__init__.py +12 -0
  29. signalwire_agents/core/agent/config/__init__.py +12 -0
  30. signalwire_agents/core/agent/deployment/__init__.py +9 -0
  31. signalwire_agents/core/agent/deployment/handlers/__init__.py +9 -0
  32. signalwire_agents/core/agent/prompt/__init__.py +14 -0
  33. signalwire_agents/core/agent/prompt/manager.py +306 -0
  34. signalwire_agents/core/agent/routing/__init__.py +9 -0
  35. signalwire_agents/core/agent/security/__init__.py +9 -0
  36. signalwire_agents/core/agent/swml/__init__.py +9 -0
  37. signalwire_agents/core/agent/tools/__init__.py +15 -0
  38. signalwire_agents/core/agent/tools/decorator.py +97 -0
  39. signalwire_agents/core/agent/tools/registry.py +210 -0
  40. signalwire_agents/core/agent_base.py +845 -2916
  41. signalwire_agents/core/auth_handler.py +233 -0
  42. signalwire_agents/core/config_loader.py +259 -0
  43. signalwire_agents/core/contexts.py +418 -0
  44. signalwire_agents/core/data_map.py +3 -15
  45. signalwire_agents/core/function_result.py +116 -44
  46. signalwire_agents/core/logging_config.py +162 -18
  47. signalwire_agents/core/mixins/__init__.py +28 -0
  48. signalwire_agents/core/mixins/ai_config_mixin.py +442 -0
  49. signalwire_agents/core/mixins/auth_mixin.py +280 -0
  50. signalwire_agents/core/mixins/prompt_mixin.py +358 -0
  51. signalwire_agents/core/mixins/serverless_mixin.py +460 -0
  52. signalwire_agents/core/mixins/skill_mixin.py +55 -0
  53. signalwire_agents/core/mixins/state_mixin.py +153 -0
  54. signalwire_agents/core/mixins/tool_mixin.py +230 -0
  55. signalwire_agents/core/mixins/web_mixin.py +1142 -0
  56. signalwire_agents/core/security_config.py +333 -0
  57. signalwire_agents/core/skill_base.py +84 -1
  58. signalwire_agents/core/skill_manager.py +62 -20
  59. signalwire_agents/core/swaig_function.py +18 -5
  60. signalwire_agents/core/swml_builder.py +207 -11
  61. signalwire_agents/core/swml_handler.py +27 -21
  62. signalwire_agents/core/swml_renderer.py +123 -312
  63. signalwire_agents/core/swml_service.py +171 -203
  64. signalwire_agents/mcp_gateway/__init__.py +29 -0
  65. signalwire_agents/mcp_gateway/gateway_service.py +564 -0
  66. signalwire_agents/mcp_gateway/mcp_manager.py +513 -0
  67. signalwire_agents/mcp_gateway/session_manager.py +218 -0
  68. signalwire_agents/prefabs/concierge.py +0 -3
  69. signalwire_agents/prefabs/faq_bot.py +0 -3
  70. signalwire_agents/prefabs/info_gatherer.py +0 -3
  71. signalwire_agents/prefabs/receptionist.py +0 -3
  72. signalwire_agents/prefabs/survey.py +0 -3
  73. signalwire_agents/schema.json +9218 -5489
  74. signalwire_agents/search/__init__.py +7 -1
  75. signalwire_agents/search/document_processor.py +490 -31
  76. signalwire_agents/search/index_builder.py +307 -37
  77. signalwire_agents/search/migration.py +418 -0
  78. signalwire_agents/search/models.py +30 -0
  79. signalwire_agents/search/pgvector_backend.py +748 -0
  80. signalwire_agents/search/query_processor.py +162 -31
  81. signalwire_agents/search/search_engine.py +916 -35
  82. signalwire_agents/search/search_service.py +376 -53
  83. signalwire_agents/skills/README.md +452 -0
  84. signalwire_agents/skills/__init__.py +14 -2
  85. signalwire_agents/skills/api_ninjas_trivia/README.md +215 -0
  86. signalwire_agents/skills/api_ninjas_trivia/__init__.py +12 -0
  87. signalwire_agents/skills/api_ninjas_trivia/skill.py +237 -0
  88. signalwire_agents/skills/datasphere/README.md +210 -0
  89. signalwire_agents/skills/datasphere/skill.py +84 -3
  90. signalwire_agents/skills/datasphere_serverless/README.md +258 -0
  91. signalwire_agents/skills/datasphere_serverless/__init__.py +9 -0
  92. signalwire_agents/skills/datasphere_serverless/skill.py +82 -1
  93. signalwire_agents/skills/datetime/README.md +132 -0
  94. signalwire_agents/skills/datetime/__init__.py +9 -0
  95. signalwire_agents/skills/datetime/skill.py +20 -7
  96. signalwire_agents/skills/joke/README.md +149 -0
  97. signalwire_agents/skills/joke/__init__.py +9 -0
  98. signalwire_agents/skills/joke/skill.py +21 -0
  99. signalwire_agents/skills/math/README.md +161 -0
  100. signalwire_agents/skills/math/__init__.py +9 -0
  101. signalwire_agents/skills/math/skill.py +18 -4
  102. signalwire_agents/skills/mcp_gateway/README.md +230 -0
  103. signalwire_agents/skills/mcp_gateway/__init__.py +10 -0
  104. signalwire_agents/skills/mcp_gateway/skill.py +421 -0
  105. signalwire_agents/skills/native_vector_search/README.md +210 -0
  106. signalwire_agents/skills/native_vector_search/__init__.py +9 -0
  107. signalwire_agents/skills/native_vector_search/skill.py +569 -101
  108. signalwire_agents/skills/play_background_file/README.md +218 -0
  109. signalwire_agents/skills/play_background_file/__init__.py +12 -0
  110. signalwire_agents/skills/play_background_file/skill.py +242 -0
  111. signalwire_agents/skills/registry.py +395 -40
  112. signalwire_agents/skills/spider/README.md +236 -0
  113. signalwire_agents/skills/spider/__init__.py +13 -0
  114. signalwire_agents/skills/spider/skill.py +598 -0
  115. signalwire_agents/skills/swml_transfer/README.md +395 -0
  116. signalwire_agents/skills/swml_transfer/__init__.py +10 -0
  117. signalwire_agents/skills/swml_transfer/skill.py +359 -0
  118. signalwire_agents/skills/weather_api/README.md +178 -0
  119. signalwire_agents/skills/weather_api/__init__.py +12 -0
  120. signalwire_agents/skills/weather_api/skill.py +191 -0
  121. signalwire_agents/skills/web_search/README.md +163 -0
  122. signalwire_agents/skills/web_search/__init__.py +9 -0
  123. signalwire_agents/skills/web_search/skill.py +586 -112
  124. signalwire_agents/skills/wikipedia_search/README.md +228 -0
  125. signalwire_agents/{core/state → skills/wikipedia_search}/__init__.py +5 -4
  126. signalwire_agents/skills/{wikipedia → wikipedia_search}/skill.py +33 -3
  127. signalwire_agents/web/__init__.py +17 -0
  128. signalwire_agents/web/web_service.py +559 -0
  129. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-agent-init.1 +400 -0
  130. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/sw-search.1 +483 -0
  131. signalwire_agents-1.0.17.dev4.data/data/share/man/man1/swaig-test.1 +308 -0
  132. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/METADATA +347 -215
  133. signalwire_agents-1.0.17.dev4.dist-info/RECORD +147 -0
  134. signalwire_agents-1.0.17.dev4.dist-info/entry_points.txt +6 -0
  135. signalwire_agents/core/state/file_state_manager.py +0 -219
  136. signalwire_agents/core/state/state_manager.py +0 -101
  137. signalwire_agents/skills/wikipedia/__init__.py +0 -9
  138. signalwire_agents-0.1.13.data/data/schema.json +0 -5611
  139. signalwire_agents-0.1.13.dist-info/RECORD +0 -67
  140. signalwire_agents-0.1.13.dist-info/entry_points.txt +0 -3
  141. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/WHEEL +0 -0
  142. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/licenses/LICENSE +0 -0
  143. {signalwire_agents-0.1.13.dist-info → signalwire_agents-1.0.17.dev4.dist-info}/top_level.txt +0 -0
@@ -77,22 +77,87 @@ def load_spacy_model(language: str):
77
77
  _spacy_warning_shown = True
78
78
  return None
79
79
 
80
- def vectorize_query(query: str):
80
+ # Model cache - stores multiple models by name
81
+ _model_cache = {} # model_name -> SentenceTransformer instance
82
+ _model_lock = None
83
+
84
+ def set_global_model(model):
85
+ """Legacy function - adds model to cache instead of setting globally"""
86
+ if model and hasattr(model, 'model_name'):
87
+ _model_cache[model.model_name] = model
88
+ logger.info(f"Model added to cache: {model.model_name}")
89
+
90
+ def _get_cached_model(model_name: str = None):
91
+ """Get or create cached sentence transformer model
92
+
93
+ Args:
94
+ model_name: Optional model name. If not provided, uses default.
95
+ """
96
+ global _model_cache, _model_lock
97
+
98
+ # Default model
99
+ if model_name is None:
100
+ model_name = 'sentence-transformers/all-mpnet-base-v2'
101
+
102
+ # Initialize lock if needed
103
+ if _model_lock is None:
104
+ import threading
105
+ _model_lock = threading.Lock()
106
+
107
+ # Check if model is already in cache
108
+ if model_name in _model_cache:
109
+ return _model_cache[model_name]
110
+
111
+ # Load model with lock to prevent race conditions
112
+ with _model_lock:
113
+ # Double check in case another thread loaded it
114
+ if model_name in _model_cache:
115
+ return _model_cache[model_name]
116
+
117
+ try:
118
+ from sentence_transformers import SentenceTransformer
119
+ logger.info(f"Loading sentence transformer model: {model_name}")
120
+ model = SentenceTransformer(model_name)
121
+ # Store the model name for identification
122
+ model.model_name = model_name
123
+ # Add to cache
124
+ _model_cache[model_name] = model
125
+ logger.info(f"Successfully loaded and cached model: {model_name}")
126
+ return model
127
+ except ImportError:
128
+ logger.error("sentence-transformers not available. Cannot load model.")
129
+ return None
130
+ except Exception as e:
131
+ logger.error(f"Failed to load model {model_name}: {e}")
132
+ return None
133
+
134
+ def vectorize_query(query: str, model=None, model_name: str = None):
81
135
  """
82
136
  Vectorize query using sentence transformers
83
137
  Returns numpy array of embeddings
138
+
139
+ Args:
140
+ query: Query string to vectorize
141
+ model: Optional pre-loaded model instance. If not provided, uses cached model.
142
+ model_name: Optional model name to use if loading a new model
84
143
  """
85
144
  try:
86
- from sentence_transformers import SentenceTransformer
87
145
  import numpy as np
88
146
 
89
- # Use the same model as specified in the architecture
90
- model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
147
+ # Use provided model or get cached one
148
+ if model is None:
149
+ model = _get_cached_model(model_name)
150
+ if model is None:
151
+ return None
152
+
91
153
  embedding = model.encode(query, show_progress_bar=False)
92
154
  return embedding
93
155
 
94
156
  except ImportError:
95
- logger.error("sentence-transformers not available. Cannot vectorize query.")
157
+ logger.error("numpy not available. Cannot vectorize query.")
158
+ return None
159
+ except Exception as e:
160
+ logger.error(f"Error vectorizing query: {e}")
96
161
  return None
97
162
 
98
163
  # Language to NLTK stopwords mapping
@@ -118,15 +183,28 @@ stopwords_language_map = {
118
183
  # Function to ensure NLTK resources are downloaded
119
184
  def ensure_nltk_resources():
120
185
  """Download required NLTK resources if not already present"""
121
- resources = ['punkt', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']
186
+ resources = ['punkt', 'punkt_tab', 'wordnet', 'averaged_perceptron_tagger', 'stopwords']
122
187
  for resource in resources:
123
188
  try:
124
- nltk.data.find(f'corpora/{resource}')
189
+ # Try different paths for different resource types
190
+ if resource in ['punkt', 'punkt_tab']:
191
+ nltk.data.find(f'tokenizers/{resource}')
192
+ elif resource in ['wordnet']:
193
+ nltk.data.find(f'corpora/{resource}')
194
+ elif resource in ['averaged_perceptron_tagger']:
195
+ nltk.data.find(f'taggers/{resource}')
196
+ elif resource in ['stopwords']:
197
+ nltk.data.find(f'corpora/{resource}')
198
+ else:
199
+ nltk.data.find(f'corpora/{resource}')
125
200
  except LookupError:
126
201
  try:
202
+ logger.info(f"Downloading NLTK resource '{resource}'...")
127
203
  nltk.download(resource, quiet=True)
204
+ logger.info(f"Successfully downloaded NLTK resource '{resource}'")
128
205
  except Exception as e:
129
206
  logger.warning(f"Failed to download NLTK resource '{resource}': {e}")
207
+ # Continue without this resource - some functionality may be degraded
130
208
 
131
209
  # Initialize NLTK resources
132
210
  ensure_nltk_resources()
@@ -186,7 +264,9 @@ def remove_duplicate_words(input_string: str) -> str:
186
264
 
187
265
  def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[List[str]] = None,
188
266
  max_synonyms: int = 5, debug: bool = False, vector: bool = False,
189
- vectorize_query_param: bool = False, nlp_backend: str = 'nltk') -> Dict[str, Any]:
267
+ vectorize_query_param: bool = False, nlp_backend: str = None,
268
+ query_nlp_backend: str = 'nltk', model_name: str = None,
269
+ preserve_original: bool = True) -> Dict[str, Any]:
190
270
  """
191
271
  Advanced query preprocessing with language detection, POS tagging, synonym expansion, and vectorization
192
272
 
@@ -198,12 +278,19 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
198
278
  debug: Enable debug output
199
279
  vector: Include vector embedding in output
200
280
  vectorize_query_param: If True, just vectorize without other processing
201
- nlp_backend: NLP backend to use ('nltk' for fast, 'spacy' for better quality)
281
+ nlp_backend: DEPRECATED - use query_nlp_backend instead
282
+ query_nlp_backend: NLP backend for query processing ('nltk' for fast, 'spacy' for better quality)
202
283
 
203
284
  Returns:
204
285
  Dict containing processed query, language, POS tags, and optionally vector
205
286
  """
206
287
 
288
+ # Handle backward compatibility
289
+ if nlp_backend is not None:
290
+ query_nlp_backend = nlp_backend
291
+ if debug:
292
+ logger.info(f"Using deprecated 'nlp_backend' parameter, please use 'query_nlp_backend' instead")
293
+
207
294
  if vectorize_query_param:
208
295
  # Vectorize the query directly
209
296
  vectorized_query = vectorize_query(query)
@@ -226,18 +313,32 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
226
313
 
227
314
  # Load spaCy model based on the language and backend choice
228
315
  nlp = None
229
- if nlp_backend == 'spacy':
316
+ if query_nlp_backend == 'spacy':
230
317
  nlp = load_spacy_model(language)
231
318
  if nlp is None and debug:
232
319
  logger.info("spaCy backend requested but not available, falling back to NLTK")
233
- elif nlp_backend == 'nltk':
320
+ elif query_nlp_backend == 'nltk':
234
321
  if debug:
235
- logger.info("Using NLTK backend for NLP processing")
322
+ logger.info("Using NLTK backend for query processing")
236
323
  else:
237
- logger.warning(f"Unknown NLP backend '{nlp_backend}', using NLTK")
324
+ logger.warning(f"Unknown query NLP backend '{query_nlp_backend}', using NLTK")
325
+ query_nlp_backend = 'nltk'
238
326
 
239
327
  # Tokenization and stop word removal
240
- tokens = nltk.word_tokenize(query)
328
+ try:
329
+ tokens = nltk.word_tokenize(query)
330
+ except LookupError as e:
331
+ # If tokenization fails, try to download punkt resources
332
+ logger.warning(f"NLTK tokenization failed: {e}")
333
+ try:
334
+ nltk.download('punkt', quiet=True)
335
+ nltk.download('punkt_tab', quiet=True)
336
+ tokens = nltk.word_tokenize(query)
337
+ except Exception as fallback_error:
338
+ # If all else fails, use simple split as fallback
339
+ logger.warning(f"NLTK tokenization fallback failed: {fallback_error}. Using simple word splitting.")
340
+ tokens = query.split()
341
+
241
342
  nltk_language = stopwords_language_map.get(language, 'english')
242
343
 
243
344
  try:
@@ -258,7 +359,7 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
258
359
  lemmas = []
259
360
  pos_tags = {}
260
361
 
261
- if nlp and nlp_backend == 'spacy':
362
+ if nlp and query_nlp_backend == 'spacy':
262
363
  # Use spaCy for better POS tagging
263
364
  doc = nlp(" ".join(tokens))
264
365
  for token in doc:
@@ -270,27 +371,51 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
270
371
  logger.info(f"POS Tagging Results (spaCy): {pos_tags}")
271
372
  else:
272
373
  # Use NLTK (default or fallback)
273
- nltk_pos_tags = nltk.pos_tag(tokens)
274
- for token, pos_tag in nltk_pos_tags:
275
- lemma = lemmatizer.lemmatize(token, get_wordnet_pos(pos_tag)).lower()
276
- stemmed = stemmer.stem(lemma)
277
- lemmas.append((token.lower(), stemmed))
278
- pos_tags[token.lower()] = pos_tag
279
- if debug:
280
- logger.info(f"POS Tagging Results (NLTK): {pos_tags}")
374
+ try:
375
+ nltk_pos_tags = nltk.pos_tag(tokens)
376
+ for token, pos_tag in nltk_pos_tags:
377
+ try:
378
+ lemma = lemmatizer.lemmatize(token, get_wordnet_pos(pos_tag)).lower()
379
+ except Exception:
380
+ # Fallback if lemmatization fails
381
+ lemma = token.lower()
382
+ stemmed = stemmer.stem(lemma)
383
+ lemmas.append((token.lower(), stemmed))
384
+ pos_tags[token.lower()] = pos_tag
385
+ if debug:
386
+ logger.info(f"POS Tagging Results (NLTK): {pos_tags}")
387
+ except Exception as pos_error:
388
+ # Fallback if POS tagging fails completely
389
+ logger.warning(f"NLTK POS tagging failed: {pos_error}. Using basic token processing.")
390
+ for token in tokens:
391
+ lemma = token.lower()
392
+ stemmed = stemmer.stem(lemma)
393
+ lemmas.append((token.lower(), stemmed))
394
+ pos_tags[token.lower()] = 'NN' # Default to noun
395
+ if debug:
396
+ logger.info(f"Using fallback token processing for: {tokens}")
281
397
 
282
398
  # Expanding query with synonyms
283
399
  expanded_query_set = set()
284
400
  expanded_query = []
285
401
 
402
+ # If preserve_original is True, always include the original query first
403
+ if preserve_original:
404
+ # Add original query terms first (maintains exact phrases)
405
+ original_tokens = query.lower().split()
406
+ for token in original_tokens:
407
+ if token not in expanded_query_set:
408
+ expanded_query.append(token)
409
+ expanded_query_set.add(token)
410
+
286
411
  for original, lemma in lemmas:
287
412
  if original not in expanded_query_set:
288
413
  expanded_query.append(original)
289
414
  expanded_query_set.add(original)
290
- if lemma not in expanded_query_set:
415
+ if lemma not in expanded_query_set and not preserve_original: # Only add lemmas if not preserving original
291
416
  expanded_query.append(lemma)
292
417
  expanded_query_set.add(lemma)
293
- if pos_tags.get(original) in pos_to_expand:
418
+ if pos_tags.get(original) in pos_to_expand and max_synonyms > 0:
294
419
  synonyms = get_synonyms(lemma, pos_tags[original], max_synonyms)
295
420
  for synonym in synonyms:
296
421
  if synonym not in expanded_query_set:
@@ -303,19 +428,19 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
303
428
 
304
429
  if debug:
305
430
  logger.info(f"Expanded Query: {final_query_str}")
306
- logger.info(f"NLP Backend Used: {nlp_backend if nlp or nlp_backend == 'nltk' else 'nltk (fallback)'}")
431
+ logger.info(f"NLP Backend Used: {query_nlp_backend if nlp or query_nlp_backend == 'nltk' else 'nltk (fallback)'}")
307
432
 
308
433
  formatted_output = {
309
434
  'input': final_query_str,
310
435
  'enhanced_text': final_query_str, # Alias for compatibility
311
436
  'language': language,
312
437
  'POS': pos_tags,
313
- 'nlp_backend_used': nlp_backend if nlp or nlp_backend == 'nltk' else 'nltk'
438
+ 'nlp_backend_used': query_nlp_backend if nlp or query_nlp_backend == 'nltk' else 'nltk'
314
439
  }
315
440
 
316
441
  # Vectorize query if requested
317
442
  if vector:
318
- vectorized_query = vectorize_query(final_query_str)
443
+ vectorized_query = vectorize_query(final_query_str, model_name=model_name)
319
444
  if vectorized_query is not None:
320
445
  formatted_output['vector'] = vectorized_query.tolist()
321
446
  else:
@@ -323,19 +448,25 @@ def preprocess_query(query: str, language: str = 'en', pos_to_expand: Optional[L
323
448
 
324
449
  return formatted_output
325
450
 
326
- def preprocess_document_content(content: str, language: str = 'en', nlp_backend: str = 'nltk') -> Dict[str, Any]:
451
+ def preprocess_document_content(content: str, language: str = 'en', nlp_backend: str = None,
452
+ index_nlp_backend: str = 'nltk') -> Dict[str, Any]:
327
453
  """
328
454
  Preprocess document content for better searchability
329
455
 
330
456
  Args:
331
457
  content: Document content to process
332
458
  language: Language code for processing
333
- nlp_backend: NLP backend to use ('nltk' for fast, 'spacy' for better quality)
459
+ nlp_backend: DEPRECATED - use index_nlp_backend instead
460
+ index_nlp_backend: NLP backend for document processing ('nltk' for fast, 'spacy' for better quality)
334
461
 
335
462
  Returns:
336
463
  Dict containing enhanced text and extracted keywords
337
464
  """
338
465
 
466
+ # Handle backward compatibility
467
+ if nlp_backend is not None:
468
+ index_nlp_backend = nlp_backend
469
+
339
470
  # Use existing preprocessing but adapted for documents
340
471
  processed = preprocess_query(
341
472
  content,
@@ -344,7 +475,7 @@ def preprocess_document_content(content: str, language: str = 'en', nlp_backend:
344
475
  max_synonyms=2, # Fewer synonyms for documents
345
476
  debug=False,
346
477
  vector=False,
347
- nlp_backend=nlp_backend
478
+ query_nlp_backend=index_nlp_backend
348
479
  )
349
480
 
350
481
  # Extract key terms for keyword search