lfx-nightly 0.2.0.dev26__py3-none-any.whl → 0.2.1.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. lfx/_assets/component_index.json +1 -1
  2. lfx/base/agents/agent.py +9 -4
  3. lfx/base/agents/altk_base_agent.py +16 -3
  4. lfx/base/agents/altk_tool_wrappers.py +1 -1
  5. lfx/base/agents/utils.py +4 -0
  6. lfx/base/composio/composio_base.py +78 -41
  7. lfx/base/data/base_file.py +14 -4
  8. lfx/base/data/cloud_storage_utils.py +156 -0
  9. lfx/base/data/docling_utils.py +191 -65
  10. lfx/base/data/storage_utils.py +109 -0
  11. lfx/base/datastax/astradb_base.py +75 -64
  12. lfx/base/mcp/util.py +2 -2
  13. lfx/base/models/__init__.py +11 -1
  14. lfx/base/models/anthropic_constants.py +21 -12
  15. lfx/base/models/google_generative_ai_constants.py +33 -9
  16. lfx/base/models/model_metadata.py +6 -0
  17. lfx/base/models/ollama_constants.py +196 -30
  18. lfx/base/models/openai_constants.py +37 -10
  19. lfx/base/models/unified_models.py +1123 -0
  20. lfx/base/models/watsonx_constants.py +36 -0
  21. lfx/base/tools/component_tool.py +2 -9
  22. lfx/cli/commands.py +6 -1
  23. lfx/cli/run.py +65 -409
  24. lfx/cli/script_loader.py +13 -3
  25. lfx/components/__init__.py +0 -3
  26. lfx/components/composio/github_composio.py +1 -1
  27. lfx/components/cuga/cuga_agent.py +39 -27
  28. lfx/components/data_source/api_request.py +4 -2
  29. lfx/components/docling/__init__.py +45 -11
  30. lfx/components/docling/chunk_docling_document.py +3 -1
  31. lfx/components/docling/docling_inline.py +39 -49
  32. lfx/components/docling/export_docling_document.py +3 -1
  33. lfx/components/elastic/opensearch_multimodal.py +215 -57
  34. lfx/components/files_and_knowledge/file.py +439 -39
  35. lfx/components/files_and_knowledge/ingestion.py +8 -0
  36. lfx/components/files_and_knowledge/retrieval.py +10 -0
  37. lfx/components/files_and_knowledge/save_file.py +123 -53
  38. lfx/components/ibm/watsonx.py +7 -1
  39. lfx/components/input_output/chat_output.py +7 -1
  40. lfx/components/langchain_utilities/tool_calling.py +14 -6
  41. lfx/components/llm_operations/batch_run.py +80 -25
  42. lfx/components/llm_operations/lambda_filter.py +33 -6
  43. lfx/components/llm_operations/llm_conditional_router.py +39 -7
  44. lfx/components/llm_operations/structured_output.py +38 -12
  45. lfx/components/models/__init__.py +16 -74
  46. lfx/components/models_and_agents/agent.py +51 -201
  47. lfx/components/models_and_agents/embedding_model.py +185 -339
  48. lfx/components/models_and_agents/language_model.py +54 -318
  49. lfx/components/models_and_agents/mcp_component.py +58 -9
  50. lfx/components/ollama/ollama.py +9 -4
  51. lfx/components/ollama/ollama_embeddings.py +2 -1
  52. lfx/components/openai/openai_chat_model.py +1 -1
  53. lfx/components/processing/__init__.py +0 -3
  54. lfx/components/vllm/__init__.py +37 -0
  55. lfx/components/vllm/vllm.py +141 -0
  56. lfx/components/vllm/vllm_embeddings.py +110 -0
  57. lfx/custom/custom_component/custom_component.py +8 -6
  58. lfx/custom/directory_reader/directory_reader.py +5 -2
  59. lfx/graph/utils.py +64 -18
  60. lfx/inputs/__init__.py +2 -0
  61. lfx/inputs/input_mixin.py +54 -0
  62. lfx/inputs/inputs.py +115 -0
  63. lfx/interface/initialize/loading.py +42 -12
  64. lfx/io/__init__.py +2 -0
  65. lfx/run/__init__.py +5 -0
  66. lfx/run/base.py +494 -0
  67. lfx/schema/data.py +1 -1
  68. lfx/schema/image.py +28 -19
  69. lfx/schema/message.py +19 -3
  70. lfx/services/interfaces.py +5 -0
  71. lfx/services/manager.py +5 -4
  72. lfx/services/mcp_composer/service.py +45 -13
  73. lfx/services/settings/auth.py +18 -11
  74. lfx/services/settings/base.py +12 -24
  75. lfx/services/settings/constants.py +2 -0
  76. lfx/services/storage/local.py +37 -0
  77. lfx/services/storage/service.py +19 -0
  78. lfx/utils/constants.py +1 -0
  79. lfx/utils/image.py +29 -11
  80. lfx/utils/validate_cloud.py +14 -3
  81. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/METADATA +5 -2
  82. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/RECORD +84 -78
  83. lfx/components/processing/dataframe_to_toolset.py +0 -259
  84. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/WHEEL +0 -0
  85. {lfx_nightly-0.2.0.dev26.dist-info → lfx_nightly-0.2.1.dev7.dist-info}/entry_points.txt +0 -0
@@ -641,8 +641,14 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
641
641
  @check_cached_vector_store
642
642
  def build_vector_store(self) -> OpenSearch:
643
643
  # Return raw OpenSearch client as our "vector store."
644
- self.log(self.ingest_data)
645
644
  client = self.build_client()
645
+
646
+ # Check if we're in ingestion-only mode (no search query)
647
+ has_search_query = bool((self.search_query or "").strip())
648
+ if not has_search_query:
649
+ logger.debug("Ingestion-only mode activated: search operations will be skipped")
650
+ logger.debug("Starting ingestion mode...")
651
+
646
652
  logger.warning(f"Embedding: {self.embedding}")
647
653
  self._add_documents_to_vector_store(client=client)
648
654
  return client
@@ -660,25 +666,41 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
660
666
  Args:
661
667
  client: OpenSearch client for performing operations
662
668
  """
669
+ logger.debug("[INGESTION] _add_documents_to_vector_store called")
663
670
  # Convert DataFrame to Data if needed using parent's method
664
671
  self.ingest_data = self._prepare_ingest_data()
665
672
 
673
+ logger.debug(
674
+ f"[INGESTION] ingest_data type: "
675
+ f"{type(self.ingest_data)}, length: {len(self.ingest_data) if self.ingest_data else 0}"
676
+ )
677
+ logger.debug(
678
+ f"[INGESTION] ingest_data content: "
679
+ f"{self.ingest_data[:2] if self.ingest_data and len(self.ingest_data) > 0 else 'empty'}"
680
+ )
681
+
666
682
  docs = self.ingest_data or []
667
683
  if not docs:
668
- self.log("No documents to ingest.")
684
+ logger.debug("Ingestion complete: No documents provided")
669
685
  return
670
686
 
671
687
  if not self.embedding:
672
688
  msg = "Embedding handle is required to embed documents."
673
689
  raise ValueError(msg)
674
690
 
675
- # Normalize embedding to list
691
+ # Normalize embedding to list first
676
692
  embeddings_list = self.embedding if isinstance(self.embedding, list) else [self.embedding]
677
693
 
694
+ # Filter out None values (fail-safe mode) - do this BEFORE checking if empty
695
+ embeddings_list = [e for e in embeddings_list if e is not None]
696
+
697
+ # NOW check if we have any valid embeddings left after filtering
678
698
  if not embeddings_list:
679
- msg = "At least one embedding is required to embed documents."
680
- raise ValueError(msg)
699
+ logger.warning("All embeddings returned None (fail-safe mode enabled). Skipping document ingestion.")
700
+ self.log("Embedding returned None (fail-safe mode enabled). Skipping document ingestion.")
701
+ return
681
702
 
703
+ logger.debug(f"[INGESTION] Valid embeddings after filtering: {len(embeddings_list)}")
682
704
  self.log(f"Available embedding models: {len(embeddings_list)}")
683
705
 
684
706
  # Select the embedding to use for ingestion
@@ -790,6 +812,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
790
812
 
791
813
  dynamic_field_name = get_embedding_field_name(embedding_model)
792
814
 
815
+ logger.info(f"Selected embedding model for ingestion: '{embedding_model}'")
793
816
  self.log(f"Using embedding model for ingestion: {embedding_model}")
794
817
  self.log(f"Dynamic vector field: {dynamic_field_name}")
795
818
 
@@ -814,6 +837,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
814
837
  metadatas = []
815
838
  # Process docs_metadata table input into a dict
816
839
  additional_metadata = {}
840
+ logger.debug(f"[LF] Docs metadata {self.docs_metadata}")
817
841
  if hasattr(self, "docs_metadata") and self.docs_metadata:
818
842
  logger.info(f"[LF] Docs metadata {self.docs_metadata}")
819
843
  if isinstance(self.docs_metadata[-1], Data):
@@ -841,51 +865,96 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
841
865
  metadatas.append(data_copy)
842
866
  self.log(metadatas)
843
867
 
844
- # Generate embeddings (threaded for concurrency) with retries
845
- def embed_chunk(chunk_text: str) -> list[float]:
846
- return selected_embedding.embed_documents([chunk_text])[0]
868
+ # Generate embeddings with rate-limit-aware retry logic using tenacity
869
+ from tenacity import (
870
+ retry,
871
+ retry_if_exception,
872
+ stop_after_attempt,
873
+ wait_exponential,
874
+ )
847
875
 
848
- vectors: list[list[float]] | None = None
849
- last_exception: Exception | None = None
850
- delay = 1.0
851
- attempts = 0
852
- max_attempts = 3
876
+ def is_rate_limit_error(exception: Exception) -> bool:
877
+ """Check if exception is a rate limit error (429)."""
878
+ error_str = str(exception).lower()
879
+ return "429" in error_str or "rate_limit" in error_str or "rate limit" in error_str
880
+
881
+ def is_other_retryable_error(exception: Exception) -> bool:
882
+ """Check if exception is retryable but not a rate limit error."""
883
+ # Retry on most exceptions except for specific non-retryable ones
884
+ # Add other non-retryable exceptions here if needed
885
+ return not is_rate_limit_error(exception)
886
+
887
+ # Create retry decorator for rate limit errors (longer backoff)
888
+ retry_on_rate_limit = retry(
889
+ retry=retry_if_exception(is_rate_limit_error),
890
+ stop=stop_after_attempt(5),
891
+ wait=wait_exponential(multiplier=2, min=2, max=30),
892
+ reraise=True,
893
+ before_sleep=lambda retry_state: logger.warning(
894
+ f"Rate limit hit for chunk (attempt {retry_state.attempt_number}/5), "
895
+ f"backing off for {retry_state.next_action.sleep:.1f}s"
896
+ ),
897
+ )
898
+
899
+ # Create retry decorator for other errors (shorter backoff)
900
+ retry_on_other_errors = retry(
901
+ retry=retry_if_exception(is_other_retryable_error),
902
+ stop=stop_after_attempt(3),
903
+ wait=wait_exponential(multiplier=1, min=1, max=8),
904
+ reraise=True,
905
+ before_sleep=lambda retry_state: logger.warning(
906
+ f"Error embedding chunk (attempt {retry_state.attempt_number}/3), "
907
+ f"retrying in {retry_state.next_action.sleep:.1f}s: {retry_state.outcome.exception()}"
908
+ ),
909
+ )
910
+
911
+ def embed_chunk_with_retry(chunk_text: str, chunk_idx: int) -> list[float]:
912
+ """Embed a single chunk with rate-limit-aware retry logic."""
913
+
914
+ @retry_on_rate_limit
915
+ @retry_on_other_errors
916
+ def _embed(text: str) -> list[float]:
917
+ return selected_embedding.embed_documents([text])[0]
853
918
 
854
- while attempts < max_attempts:
855
- attempts += 1
856
919
  try:
857
- max_workers = min(max(len(texts), 1), 8)
858
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
859
- futures = {executor.submit(embed_chunk, chunk): idx for idx, chunk in enumerate(texts)}
860
- vectors = [None] * len(texts)
861
- for future in as_completed(futures):
862
- idx = futures[future]
863
- vectors[idx] = future.result()
864
- break
865
- except Exception as exc:
866
- last_exception = exc
867
- if attempts >= max_attempts:
868
- logger.error(
869
- f"Embedding generation failed for model {embedding_model} after retries",
870
- error=str(exc),
871
- )
872
- raise
873
- logger.warning(
874
- "Threaded embedding generation failed for model %s (attempt %s/%s), retrying in %.1fs",
875
- embedding_model,
876
- attempts,
877
- max_attempts,
878
- delay,
920
+ return _embed(chunk_text)
921
+ except Exception as e:
922
+ logger.error(
923
+ f"Failed to embed chunk {chunk_idx} after all retries: {e}",
924
+ error=str(e),
879
925
  )
880
- time.sleep(delay)
881
- delay = min(delay * 2, 8.0)
882
-
883
- if vectors is None:
884
- raise RuntimeError(
885
- f"Embedding generation failed for {embedding_model}: {last_exception}"
886
- if last_exception
887
- else f"Embedding generation failed for {embedding_model}"
888
- )
926
+ raise
927
+
928
+ # Restrict concurrency for IBM/Watsonx models to avoid rate limits
929
+ is_ibm = (embedding_model and "ibm" in str(embedding_model).lower()) or (
930
+ selected_embedding and "watsonx" in type(selected_embedding).__name__.lower()
931
+ )
932
+ logger.debug(f"Is IBM: {is_ibm}")
933
+
934
+ # For IBM models, use sequential processing with rate limiting
935
+ # For other models, use parallel processing
936
+ vectors: list[list[float]] = [None] * len(texts)
937
+
938
+ if is_ibm:
939
+ # Sequential processing with inter-request delay for IBM models
940
+ inter_request_delay = 0.6 # ~1.67 req/s, safely under 2 req/s limit
941
+ logger.info(f"Using sequential processing for IBM model with {inter_request_delay}s delay between requests")
942
+
943
+ for idx, chunk in enumerate(texts):
944
+ if idx > 0:
945
+ # Add delay between requests (but not before the first one)
946
+ time.sleep(inter_request_delay)
947
+ vectors[idx] = embed_chunk_with_retry(chunk, idx)
948
+ else:
949
+ # Parallel processing for non-IBM models
950
+ max_workers = min(max(len(texts), 1), 8)
951
+ logger.debug(f"Using parallel processing with {max_workers} workers")
952
+
953
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
954
+ futures = {executor.submit(embed_chunk_with_retry, chunk, idx): idx for idx, chunk in enumerate(texts)}
955
+ for future in as_completed(futures):
956
+ idx = futures[future]
957
+ vectors[idx] = future.result()
889
958
 
890
959
  if not vectors:
891
960
  self.log(f"No vectors generated from documents for model {embedding_model}.")
@@ -956,6 +1025,9 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
956
1025
  )
957
1026
  self.log(metadatas)
958
1027
 
1028
+ logger.info(
1029
+ f"Ingestion complete: Successfully indexed {len(return_ids)} documents with model '{embedding_model}'"
1030
+ )
959
1031
  self.log(f"Successfully indexed {len(return_ids)} documents with model {embedding_model}.")
960
1032
 
961
1033
  # ---------- helpers for filters ----------
@@ -1050,6 +1122,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1050
1122
  if filter_clauses:
1051
1123
  agg_query["query"] = {"bool": {"filter": filter_clauses}}
1052
1124
 
1125
+ logger.debug(f"Model detection query: {agg_query}")
1053
1126
  result = client.search(
1054
1127
  index=self.index_name,
1055
1128
  body=agg_query,
@@ -1058,14 +1131,25 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1058
1131
  buckets = result.get("aggregations", {}).get("embedding_models", {}).get("buckets", [])
1059
1132
  models = [b["key"] for b in buckets if b["key"]]
1060
1133
 
1134
+ # Log detailed bucket info for debugging
1061
1135
  logger.info(
1062
1136
  f"Detected embedding models in corpus: {models}"
1063
1137
  + (f" (with {len(filter_clauses)} filters)" if filter_clauses else "")
1064
1138
  )
1139
+ if not models:
1140
+ total_hits = result.get("hits", {}).get("total", {})
1141
+ total_count = total_hits.get("value", 0) if isinstance(total_hits, dict) else total_hits
1142
+ logger.warning(
1143
+ f"No embedding_model values found in index '{self.index_name}'. "
1144
+ f"Total docs in index: {total_count}. "
1145
+ f"This may indicate documents were indexed without the embedding_model field."
1146
+ )
1065
1147
  except (OpenSearchException, KeyError, ValueError) as e:
1066
1148
  logger.warning(f"Failed to detect embedding models: {e}")
1067
1149
  # Fallback to current model
1068
- return [self._get_embedding_model_name()]
1150
+ fallback_model = self._get_embedding_model_name()
1151
+ logger.info(f"Using fallback model: {fallback_model}")
1152
+ return [fallback_model]
1069
1153
  else:
1070
1154
  return models
1071
1155
 
@@ -1172,6 +1256,11 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1172
1256
  msg = "Embedding is required to run hybrid search (KNN + keyword)."
1173
1257
  raise ValueError(msg)
1174
1258
 
1259
+ # Check if embedding is None (fail-safe mode)
1260
+ if self.embedding is None or (isinstance(self.embedding, list) and all(e is None for e in self.embedding)):
1261
+ logger.error("Embedding returned None (fail-safe mode enabled). Cannot perform search.")
1262
+ return []
1263
+
1175
1264
  # Build filter clauses first so we can use them in model detection
1176
1265
  filter_clauses = self._coerce_filter_clauses(filter_obj)
1177
1266
 
@@ -1187,6 +1276,14 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1187
1276
 
1188
1277
  # Normalize embedding to list
1189
1278
  embeddings_list = self.embedding if isinstance(self.embedding, list) else [self.embedding]
1279
+ # Filter out None values (fail-safe mode)
1280
+ embeddings_list = [e for e in embeddings_list if e is not None]
1281
+
1282
+ if not embeddings_list:
1283
+ logger.error(
1284
+ "No valid embeddings available after filtering None values (fail-safe mode). Cannot perform search."
1285
+ )
1286
+ return []
1190
1287
 
1191
1288
  # Create a comprehensive map of model names to embedding objects
1192
1289
  # Check all possible identifiers (deployment, model, model_id, model_name)
@@ -1203,18 +1300,20 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1203
1300
  model_id = getattr(emb_obj, "model_id", None)
1204
1301
  model_name = getattr(emb_obj, "model_name", None)
1205
1302
  dimensions = getattr(emb_obj, "dimensions", None)
1206
- available_models = getattr(emb_obj, "available_models", None)
1303
+ available_models_attr = getattr(emb_obj, "available_models", None)
1207
1304
 
1208
1305
  logger.info(
1209
1306
  f"Embedding object {idx}: deployment={deployment}, model={model}, "
1210
1307
  f"model_id={model_id}, model_name={model_name}, dimensions={dimensions}, "
1211
- f"available_models={available_models}"
1308
+ f"available_models={available_models_attr}"
1212
1309
  )
1213
1310
 
1214
1311
  # If this embedding has available_models dict, map all models to their dedicated instances
1215
- if available_models and isinstance(available_models, dict):
1216
- logger.info(f"Embedding object {idx} provides {len(available_models)} models via available_models dict")
1217
- for model_name_key, dedicated_embedding in available_models.items():
1312
+ if available_models_attr and isinstance(available_models_attr, dict):
1313
+ logger.info(
1314
+ f"Embedding object {idx} provides {len(available_models_attr)} models via available_models dict"
1315
+ )
1316
+ for model_name_key, dedicated_embedding in available_models_attr.items():
1218
1317
  if model_name_key and str(model_name_key).strip():
1219
1318
  model_str = str(model_name_key).strip()
1220
1319
  if model_str not in embedding_by_model:
@@ -1269,6 +1368,12 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1269
1368
 
1270
1369
  logger.info(f"Generating embeddings for {len(available_models)} models in index")
1271
1370
  logger.info(f"Available embedding identifiers: {list(embedding_by_model.keys())}")
1371
+ self.log(f"[SEARCH] Models detected in index: {available_models}")
1372
+ self.log(f"[SEARCH] Available embedding identifiers: {list(embedding_by_model.keys())}")
1373
+
1374
+ # Track matching status for debugging
1375
+ matched_models = []
1376
+ unmatched_models = []
1272
1377
 
1273
1378
  for model_name in available_models:
1274
1379
  try:
@@ -1298,18 +1403,34 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1298
1403
  # Use the embedding instance directly - no model switching needed!
1299
1404
  vec = emb_obj.embed_query(q)
1300
1405
  query_embeddings[model_name] = vec
1406
+ matched_models.append(model_name)
1301
1407
  logger.info(f"Generated embedding for model: {model_name} (actual dimensions: {len(vec)})")
1408
+ self.log(f"[MATCH] Model '{model_name}' - generated {len(vec)}-dim embedding")
1302
1409
  else:
1303
1410
  # No matching embedding found for this model
1411
+ unmatched_models.append(model_name)
1304
1412
  logger.warning(
1305
1413
  f"No matching embedding found for model '{model_name}'. "
1306
- f"This model will be skipped. Available models: {list(embedding_by_model.keys())}"
1414
+ f"This model will be skipped. Available identifiers: {list(embedding_by_model.keys())}"
1307
1415
  )
1416
+ self.log(f"[NO MATCH] Model '{model_name}' - available: {list(embedding_by_model.keys())}")
1308
1417
  except (RuntimeError, ValueError, ConnectionError, TimeoutError, AttributeError, KeyError) as e:
1309
1418
  logger.warning(f"Failed to generate embedding for {model_name}: {e}")
1419
+ self.log(f"[ERROR] Embedding generation failed for '{model_name}': {e}")
1420
+
1421
+ # Log summary of model matching
1422
+ logger.info(f"Model matching summary: {len(matched_models)} matched, {len(unmatched_models)} unmatched")
1423
+ self.log(f"[SUMMARY] Model matching: {len(matched_models)} matched, {len(unmatched_models)} unmatched")
1424
+ if unmatched_models:
1425
+ self.log(f"[WARN] Unmatched models in index: {unmatched_models}")
1310
1426
 
1311
1427
  if not query_embeddings:
1312
- msg = "Failed to generate embeddings for any model"
1428
+ msg = (
1429
+ f"Failed to generate embeddings for any model. "
1430
+ f"Index has models: {available_models}, but no matching embedding objects found. "
1431
+ f"Available embedding identifiers: {list(embedding_by_model.keys())}"
1432
+ )
1433
+ self.log(f"[FAIL] Search failed: {msg}")
1313
1434
  raise ValueError(msg)
1314
1435
 
1315
1436
  index_properties = self._get_index_properties(client)
@@ -1339,6 +1460,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1339
1460
  f"Skipping model {model_name}: field '{field_name}' is not mapped as knn_vector. "
1340
1461
  f"Documents must be indexed with this embedding model before querying."
1341
1462
  )
1463
+ self.log(f"[SKIP] Field '{selected_field}' not a knn_vector - skipping model '{model_name}'")
1342
1464
  continue
1343
1465
 
1344
1466
  # Validate vector dimensions match the field dimensions
@@ -1349,6 +1471,7 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1349
1471
  f"Query vector has {vector_dim} dimensions but field '{selected_field}' expects {field_dim}. "
1350
1472
  f"Skipping this model to prevent search errors."
1351
1473
  )
1474
+ self.log(f"[DIM MISMATCH] Model '{model_name}': query={vector_dim} vs field={field_dim} - skipping")
1352
1475
  continue
1353
1476
 
1354
1477
  logger.info(
@@ -1385,6 +1508,11 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1385
1508
  "This may indicate an empty index or missing field mappings. "
1386
1509
  "Returning empty search results."
1387
1510
  )
1511
+ self.log(
1512
+ f"[WARN] No valid KNN queries could be built. "
1513
+ f"Query embeddings generated: {list(query_embeddings.keys())}, "
1514
+ f"but no matching knn_vector fields found in index."
1515
+ )
1388
1516
  return []
1389
1517
 
1390
1518
  # Build exists filter - document must have at least one embedding field
@@ -1448,7 +1576,13 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1448
1576
  if isinstance(score_threshold, (int, float)) and score_threshold > 0:
1449
1577
  body["min_score"] = score_threshold
1450
1578
 
1451
- logger.info(f"Executing multi-model hybrid search with {len(knn_queries_with_candidates)} embedding models")
1579
+ logger.info(
1580
+ f"Executing multi-model hybrid search with {len(knn_queries_with_candidates)} embedding models: "
1581
+ f"{list(query_embeddings.keys())}"
1582
+ )
1583
+ self.log(f"[EXEC] Executing search with {len(knn_queries_with_candidates)} KNN queries, limit={limit}")
1584
+ self.log(f"[EXEC] Embedding models used: {list(query_embeddings.keys())}")
1585
+ self.log(f"[EXEC] KNN fields being queried: {embedding_fields}")
1452
1586
 
1453
1587
  try:
1454
1588
  resp = client.search(index=self.index_name, body=body, params={"terminate_after": 0})
@@ -1502,6 +1636,16 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1502
1636
  hits = resp.get("hits", {}).get("hits", [])
1503
1637
 
1504
1638
  logger.info(f"Found {len(hits)} results")
1639
+ self.log(f"[RESULT] Search complete: {len(hits)} results found")
1640
+
1641
+ if len(hits) == 0:
1642
+ self.log(
1643
+ f"[EMPTY] Debug info: "
1644
+ f"models_in_index={available_models}, "
1645
+ f"matched_models={matched_models}, "
1646
+ f"knn_fields={embedding_fields}, "
1647
+ f"filters={len(filter_clauses)} clauses"
1648
+ )
1505
1649
 
1506
1650
  return [
1507
1651
  {
@@ -1518,6 +1662,9 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1518
1662
  This is the main interface method that performs the multi-model search using the
1519
1663
  configured search_query and returns results in Langflow's Data format.
1520
1664
 
1665
+ Always builds the vector store (triggering ingestion if needed), then performs
1666
+ search only if a query is provided.
1667
+
1521
1668
  Returns:
1522
1669
  List of Data objects containing search results with text and metadata
1523
1670
 
@@ -1525,9 +1672,20 @@ class OpenSearchVectorStoreComponentMultimodalMultiEmbedding(LCVectorStoreCompon
1525
1672
  Exception: If search operation fails
1526
1673
  """
1527
1674
  try:
1528
- raw = self.search(self.search_query or "")
1675
+ # Always build/cache the vector store to ensure ingestion happens
1676
+ logger.info(f"Search query: {self.search_query}")
1677
+ if self._cached_vector_store is None:
1678
+ self.build_vector_store()
1679
+
1680
+ # Only perform search if query is provided
1681
+ search_query = (self.search_query or "").strip()
1682
+ if not search_query:
1683
+ self.log("No search query provided - ingestion completed, returning empty results")
1684
+ return []
1685
+
1686
+ # Perform search with the provided query
1687
+ raw = self.search(search_query)
1529
1688
  return [Data(text=hit["page_content"], **hit["metadata"]) for hit in raw]
1530
- self.log(self.ingest_data)
1531
1689
  except Exception as e:
1532
1690
  self.log(f"search_documents error: {e}")
1533
1691
  raise