langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. langroid/__init__.py +70 -0
  2. langroid/agent/__init__.py +22 -0
  3. langroid/agent/base.py +120 -33
  4. langroid/agent/batch.py +134 -35
  5. langroid/agent/callbacks/__init__.py +0 -0
  6. langroid/agent/callbacks/chainlit.py +608 -0
  7. langroid/agent/chat_agent.py +164 -100
  8. langroid/agent/chat_document.py +19 -2
  9. langroid/agent/openai_assistant.py +20 -10
  10. langroid/agent/special/__init__.py +33 -10
  11. langroid/agent/special/doc_chat_agent.py +521 -108
  12. langroid/agent/special/lance_doc_chat_agent.py +258 -0
  13. langroid/agent/special/lance_rag/__init__.py +9 -0
  14. langroid/agent/special/lance_rag/critic_agent.py +136 -0
  15. langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
  16. langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
  17. langroid/agent/special/lance_tools.py +44 -0
  18. langroid/agent/special/neo4j/__init__.py +0 -0
  19. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  20. langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
  21. langroid/agent/special/neo4j/utils/__init__.py +0 -0
  22. langroid/agent/special/neo4j/utils/system_message.py +46 -0
  23. langroid/agent/special/relevance_extractor_agent.py +23 -7
  24. langroid/agent/special/retriever_agent.py +29 -174
  25. langroid/agent/special/sql/__init__.py +7 -0
  26. langroid/agent/special/sql/sql_chat_agent.py +47 -23
  27. langroid/agent/special/sql/utils/__init__.py +11 -0
  28. langroid/agent/special/sql/utils/description_extractors.py +95 -46
  29. langroid/agent/special/sql/utils/populate_metadata.py +28 -21
  30. langroid/agent/special/table_chat_agent.py +43 -9
  31. langroid/agent/task.py +423 -114
  32. langroid/agent/tool_message.py +67 -10
  33. langroid/agent/tools/__init__.py +8 -0
  34. langroid/agent/tools/duckduckgo_search_tool.py +66 -0
  35. langroid/agent/tools/google_search_tool.py +11 -0
  36. langroid/agent/tools/metaphor_search_tool.py +67 -0
  37. langroid/agent/tools/recipient_tool.py +6 -24
  38. langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
  39. langroid/cachedb/__init__.py +6 -0
  40. langroid/embedding_models/__init__.py +24 -0
  41. langroid/embedding_models/base.py +9 -1
  42. langroid/embedding_models/models.py +117 -17
  43. langroid/embedding_models/protoc/embeddings.proto +19 -0
  44. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  45. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  46. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  47. langroid/embedding_models/remote_embeds.py +153 -0
  48. langroid/language_models/__init__.py +22 -0
  49. langroid/language_models/azure_openai.py +47 -4
  50. langroid/language_models/base.py +26 -10
  51. langroid/language_models/config.py +5 -0
  52. langroid/language_models/openai_gpt.py +407 -121
  53. langroid/language_models/prompt_formatter/__init__.py +9 -0
  54. langroid/language_models/prompt_formatter/base.py +4 -6
  55. langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
  56. langroid/language_models/utils.py +10 -9
  57. langroid/mytypes.py +10 -4
  58. langroid/parsing/__init__.py +33 -1
  59. langroid/parsing/document_parser.py +259 -63
  60. langroid/parsing/image_text.py +32 -0
  61. langroid/parsing/parse_json.py +143 -0
  62. langroid/parsing/parser.py +20 -7
  63. langroid/parsing/repo_loader.py +108 -46
  64. langroid/parsing/search.py +8 -0
  65. langroid/parsing/table_loader.py +44 -0
  66. langroid/parsing/url_loader.py +59 -13
  67. langroid/parsing/urls.py +18 -9
  68. langroid/parsing/utils.py +130 -9
  69. langroid/parsing/web_search.py +73 -0
  70. langroid/prompts/__init__.py +7 -0
  71. langroid/prompts/chat-gpt4-system-prompt.md +68 -0
  72. langroid/prompts/prompts_config.py +1 -1
  73. langroid/utils/__init__.py +10 -0
  74. langroid/utils/algorithms/__init__.py +3 -0
  75. langroid/utils/configuration.py +0 -1
  76. langroid/utils/constants.py +4 -0
  77. langroid/utils/logging.py +2 -5
  78. langroid/utils/output/__init__.py +15 -2
  79. langroid/utils/output/status.py +33 -0
  80. langroid/utils/pandas_utils.py +30 -0
  81. langroid/utils/pydantic_utils.py +446 -4
  82. langroid/utils/system.py +36 -1
  83. langroid/vector_store/__init__.py +34 -2
  84. langroid/vector_store/base.py +33 -2
  85. langroid/vector_store/chromadb.py +42 -13
  86. langroid/vector_store/lancedb.py +226 -60
  87. langroid/vector_store/meilisearch.py +7 -6
  88. langroid/vector_store/momento.py +3 -2
  89. langroid/vector_store/qdrantdb.py +82 -11
  90. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
  91. langroid-0.1.219.dist-info/RECORD +127 -0
  92. langroid/agent/special/recipient_validator_agent.py +0 -157
  93. langroid/parsing/json.py +0 -64
  94. langroid/utils/web/selenium_login.py +0 -36
  95. langroid-0.1.139.dist-info/RECORD +0 -103
  96. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
  97. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0
@@ -1,6 +1,26 @@
1
- from typing import Any, Dict, Tuple, Type, no_type_check
2
-
3
- from pydantic import BaseModel, create_model
1
+ import logging
2
+ from contextlib import contextmanager
3
+ from typing import (
4
+ Any,
5
+ Dict,
6
+ Generator,
7
+ List,
8
+ Optional,
9
+ Tuple,
10
+ Type,
11
+ TypeVar,
12
+ get_args,
13
+ get_origin,
14
+ no_type_check,
15
+ )
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+ from pydantic import BaseModel, ValidationError, create_model
20
+
21
+ from langroid.mytypes import DocMetaData, Document
22
+
23
+ logger = logging.getLogger(__name__)
4
24
 
5
25
 
6
26
  def has_field(model_class: Type[BaseModel], field_name: str) -> bool:
@@ -8,6 +28,16 @@ def has_field(model_class: Type[BaseModel], field_name: str) -> bool:
8
28
  return field_name in model_class.__fields__
9
29
 
10
30
 
31
+ def _recursive_purge_dict_key(d: Dict[str, Any], k: str) -> None:
32
+ """Remove a key from a dictionary recursively"""
33
+ if isinstance(d, dict):
34
+ for key in list(d.keys()):
35
+ if key == k and "type" in d.keys():
36
+ del d[key]
37
+ else:
38
+ _recursive_purge_dict_key(d[key], k)
39
+
40
+
11
41
  @no_type_check
12
42
  def _flatten_pydantic_model_ignore_defaults(
13
43
  model: Type[BaseModel],
@@ -105,6 +135,53 @@ def flatten_pydantic_model(
105
135
  return create_model("FlatModel", __base__=base_model, **flattened_fields)
106
136
 
107
137
 
138
+ def get_field_names(model: Type[BaseModel]) -> List[str]:
139
+ """Get all field names from a possibly nested Pydantic model."""
140
+ mdl = flatten_pydantic_model(model)
141
+ fields = list(mdl.__fields__.keys())
142
+ # fields may be like a__b__c , so we only want the last part
143
+ return [f.split("__")[-1] for f in fields]
144
+
145
+
146
+ def generate_simple_schema(
147
+ model: Type[BaseModel], exclude: List[str] = []
148
+ ) -> Dict[str, Any]:
149
+ """
150
+ Generates a JSON schema for a Pydantic model,
151
+ with options to exclude specific fields.
152
+
153
+ This function traverses the Pydantic model's fields, including nested models,
154
+ to generate a dictionary representing the JSON schema. Fields specified in
155
+ the exclude list will not be included in the generated schema.
156
+
157
+ Args:
158
+ model (Type[BaseModel]): The Pydantic model class to generate the schema for.
159
+ exclude (List[str]): A list of string field names to be excluded from the
160
+ generated schema. Defaults to an empty list.
161
+
162
+ Returns:
163
+ Dict[str, Any]: A dictionary representing the JSON schema of the provided model,
164
+ with specified fields excluded.
165
+ """
166
+ if hasattr(model, "__fields__"):
167
+ output: Dict[str, Any] = {}
168
+ for field_name, field in model.__fields__.items():
169
+ if field_name in exclude:
170
+ continue # Skip excluded fields
171
+
172
+ field_type = field.type_
173
+ if issubclass(field_type, BaseModel):
174
+ # Recursively generate schema for nested models
175
+ output[field_name] = generate_simple_schema(field_type, exclude)
176
+ else:
177
+ # Represent the type as a string here
178
+ output[field_name] = {"type": field_type.__name__}
179
+ return output
180
+ else:
181
+ # Non-model type, return a simplified representation
182
+ return {"type": model.__name__}
183
+
184
+
108
185
  def flatten_pydantic_instance(
109
186
  instance: BaseModel,
110
187
  prefix: str = "",
@@ -138,6 +215,62 @@ def flatten_pydantic_instance(
138
215
  return flat_data
139
216
 
140
217
 
218
+ def extract_fields(doc: BaseModel, fields: List[str]) -> Dict[str, Any]:
219
+ """
220
+ Extract specified fields from a Pydantic object.
221
+ Supports dotted field names, e.g. "metadata.author".
222
+ Dotted fields are matched exactly according to the corresponding path.
223
+ Non-dotted fields are matched against the last part of the path.
224
+ Clashes ignored.
225
+ Args:
226
+ doc (BaseModel): The Pydantic object.
227
+ fields (List[str]): The list of fields to extract.
228
+
229
+ Returns:
230
+ Dict[str, Any]: A dictionary of field names and values.
231
+
232
+ """
233
+
234
+ def get_value(obj: BaseModel, path: str) -> Any | None:
235
+ for part in path.split("."):
236
+ if hasattr(obj, part):
237
+ obj = getattr(obj, part)
238
+ else:
239
+ return None
240
+ return obj
241
+
242
+ def traverse(obj: BaseModel, result: Dict[str, Any], prefix: str = "") -> None:
243
+ for k, v in obj.__dict__.items():
244
+ key = f"{prefix}.{k}" if prefix else k
245
+ if isinstance(v, BaseModel):
246
+ traverse(v, result, key)
247
+ else:
248
+ result[key] = v
249
+
250
+ result: Dict[str, Any] = {}
251
+
252
+ # Extract values for dotted field names and use last part as key
253
+ for field in fields:
254
+ if "." in field:
255
+ value = get_value(doc, field)
256
+ if value is not None:
257
+ key = field.split(".")[-1]
258
+ result[key] = value
259
+
260
+ # Traverse the object to get non-dotted fields
261
+ all_fields: Dict[str, Any] = {}
262
+ traverse(doc, all_fields)
263
+
264
+ # Add non-dotted fields to the result,
265
+ # avoid overwriting if already present from dotted names
266
+ for field in [f for f in fields if "." not in f]:
267
+ for key, value in all_fields.items():
268
+ if key.split(".")[-1] == field and field not in result:
269
+ result[field] = value
270
+
271
+ return result
272
+
273
+
141
274
  def nested_dict_from_flat(
142
275
  flat_data: Dict[str, Any],
143
276
  sub_dict: str = "",
@@ -175,6 +308,315 @@ def pydantic_obj_from_flat_dict(
175
308
  model: Type[BaseModel],
176
309
  sub_dict: str = "",
177
310
  ) -> BaseModel:
178
- """flatened dict with a__b__c style keys -> nested dict -> pydantic object"""
311
+ """Flattened dict with a__b__c style keys -> nested dict -> pydantic object"""
179
312
  nested_data = nested_dict_from_flat(flat_data, sub_dict)
180
313
  return model(**nested_data)
314
+
315
+
316
+ def clean_schema(model: Type[BaseModel], excludes: List[str] = []) -> Dict[str, Any]:
317
+ """
318
+ Generate a simple schema for a given Pydantic model,
319
+ including inherited fields, with an option to exclude certain fields.
320
+ Handles cases where fields are Lists or other generic types and includes
321
+ field descriptions if available.
322
+
323
+ Args:
324
+ model (Type[BaseModel]): The Pydantic model class.
325
+ excludes (List[str]): A list of field names to exclude.
326
+
327
+ Returns:
328
+ Dict[str, Any]: A dictionary representing the simple schema.
329
+ """
330
+ schema = {}
331
+
332
+ for field_name, field_info in model.__fields__.items():
333
+ if field_name in excludes:
334
+ continue
335
+
336
+ field_type = field_info.outer_type_
337
+ description = field_info.field_info.description or ""
338
+
339
+ # Handle generic types like List[...]
340
+ if get_origin(field_type):
341
+ inner_types = get_args(field_type)
342
+ inner_type_names = [
343
+ t.__name__ if hasattr(t, "__name__") else str(t) for t in inner_types
344
+ ]
345
+ field_type_str = (
346
+ f"{get_origin(field_type).__name__}" f'[{", ".join(inner_type_names)}]'
347
+ )
348
+ schema[field_name] = {"type": field_type_str, "description": description}
349
+ elif issubclass(field_type, BaseModel):
350
+ # Directly use the nested model's schema,
351
+ # integrating it into the current level
352
+ nested_schema = clean_schema(field_type, excludes)
353
+ schema[field_name] = {**nested_schema, "description": description}
354
+ else:
355
+ # For basic types, use 'type'
356
+ schema[field_name] = {
357
+ "type": field_type.__name__,
358
+ "description": description,
359
+ }
360
+
361
+ return schema
362
+
363
+
364
+ @contextmanager
365
+ def temp_update(
366
+ pydantic_object: BaseModel, updates: Dict[str, Any]
367
+ ) -> Generator[None, None, None]:
368
+ original_values = {}
369
+ try:
370
+ for field, value in updates.items():
371
+ if hasattr(pydantic_object, field):
372
+ # Save original value
373
+ original_values[field] = getattr(pydantic_object, field)
374
+ setattr(pydantic_object, field, value)
375
+ else:
376
+ # Raise error for non-existent field
377
+ raise AttributeError(
378
+ f"The field '{field}' does not exist in the "
379
+ f"Pydantic model '{pydantic_object.__class__.__name__}'."
380
+ )
381
+ yield
382
+ except ValidationError as e:
383
+ # Handle validation error
384
+ print(f"Validation error: {e}")
385
+ finally:
386
+ # Restore original values
387
+ for field, value in original_values.items():
388
+ setattr(pydantic_object, field, value)
389
+
390
+
391
+ T = TypeVar("T", bound=BaseModel)
392
+
393
+
394
+ @contextmanager
395
+ def temp_params(config: T, field: str, temp: T) -> Generator[None, None, None]:
396
+ """Context manager to temporarily override `field` in a `config`"""
397
+ original_vals = getattr(config, field)
398
+ try:
399
+ # Apply temporary settings
400
+ setattr(config, field, temp)
401
+ yield
402
+ finally:
403
+ # Revert to original settings
404
+ setattr(config, field, original_vals)
405
+
406
+
407
+ def numpy_to_python_type(numpy_type: Type[Any]) -> Type[Any]:
408
+ """Converts a numpy data type to its Python equivalent."""
409
+ type_mapping = {
410
+ np.float64: float,
411
+ np.float32: float,
412
+ np.int64: int,
413
+ np.int32: int,
414
+ np.bool_: bool,
415
+ # Add other numpy types as necessary
416
+ }
417
+ return type_mapping.get(numpy_type, numpy_type)
418
+
419
+
420
+ def dataframe_to_pydantic_model(df: pd.DataFrame) -> Type[BaseModel]:
421
+ """Make a Pydantic model from a dataframe."""
422
+ fields = {col: (type(df[col].iloc[0]), ...) for col in df.columns}
423
+ return create_model("DataFrameModel", __base__=BaseModel, **fields) # type: ignore
424
+
425
+
426
+ def dataframe_to_pydantic_objects(df: pd.DataFrame) -> List[BaseModel]:
427
+ """Make a list of Pydantic objects from a dataframe."""
428
+ Model = dataframe_to_pydantic_model(df)
429
+ return [Model(**row.to_dict()) for index, row in df.iterrows()]
430
+
431
+
432
+ def first_non_null(series: pd.Series) -> Any | None:
433
+ """Find the first non-null item in a pandas Series."""
434
+ for item in series:
435
+ if item is not None:
436
+ return item
437
+ return None
438
+
439
+
440
+ def dataframe_to_document_model(
441
+ df: pd.DataFrame,
442
+ content: str = "content",
443
+ metadata: List[str] = [],
444
+ exclude: List[str] = [],
445
+ ) -> Type[BaseModel]:
446
+ """
447
+ Make a subclass of Document from a dataframe.
448
+
449
+ Args:
450
+ df (pd.DataFrame): The dataframe.
451
+ content (str): The name of the column containing the content,
452
+ which will map to the Document.content field.
453
+ metadata (List[str]): A list of column names containing metadata;
454
+ these will be included in the Document.metadata field.
455
+ exclude (List[str]): A list of column names to exclude from the model.
456
+ (e.g. "vector" when lance is used to add an embedding vector to the df)
457
+
458
+ Returns:
459
+ Type[BaseModel]: A pydantic model subclassing Document.
460
+ """
461
+
462
+ # Remove excluded columns
463
+ df = df.drop(columns=exclude, inplace=False)
464
+ # Check if metadata_cols is empty
465
+
466
+ if metadata:
467
+ # Define fields for the dynamic subclass of DocMetaData
468
+ metadata_fields = {
469
+ col: (
470
+ Optional[numpy_to_python_type(type(first_non_null(df[col])))],
471
+ None, # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
472
+ )
473
+ for col in metadata
474
+ }
475
+ DynamicMetaData = create_model( # type: ignore
476
+ "DynamicMetaData", __base__=DocMetaData, **metadata_fields
477
+ )
478
+ else:
479
+ # Use the base DocMetaData class directly
480
+ DynamicMetaData = DocMetaData
481
+
482
+ # Define additional top-level fields for DynamicDocument
483
+ additional_fields = {
484
+ col: (
485
+ Optional[numpy_to_python_type(type(first_non_null(df[col])))],
486
+ None, # Optional[numpy_to_python_type(type(first_non_null(df[col])))],
487
+ )
488
+ for col in df.columns
489
+ if col not in metadata and col != content
490
+ }
491
+
492
+ # Create a dynamic subclass of Document
493
+ DynamicDocumentFields = {
494
+ **{"metadata": (DynamicMetaData, ...)},
495
+ **additional_fields,
496
+ }
497
+ DynamicDocument = create_model( # type: ignore
498
+ "DynamicDocument", __base__=Document, **DynamicDocumentFields
499
+ )
500
+
501
+ def from_df_row(
502
+ cls: type[BaseModel],
503
+ row: pd.Series,
504
+ content: str = "content",
505
+ metadata: List[str] = [],
506
+ ) -> BaseModel | None:
507
+ content_val = row[content] if (content and content in row) else ""
508
+ metadata_values = (
509
+ {col: row[col] for col in metadata if col in row} if metadata else {}
510
+ )
511
+ additional_values = {
512
+ col: row[col] for col in additional_fields if col in row and col != content
513
+ }
514
+ metadata = DynamicMetaData(**metadata_values)
515
+ return cls(content=content_val, metadata=metadata, **additional_values)
516
+
517
+ # Bind the method to the class
518
+ DynamicDocument.from_df_row = classmethod(from_df_row)
519
+
520
+ return DynamicDocument # type: ignore
521
+
522
+
523
+ def dataframe_to_documents(
524
+ df: pd.DataFrame,
525
+ content: str = "content",
526
+ metadata: List[str] = [],
527
+ doc_cls: Type[BaseModel] | None = None,
528
+ ) -> List[Document]:
529
+ """
530
+ Make a list of Document objects from a dataframe.
531
+ Args:
532
+ df (pd.DataFrame): The dataframe.
533
+ content (str): The name of the column containing the content,
534
+ which will map to the Document.content field.
535
+ metadata (List[str]): A list of column names containing metadata;
536
+ these will be included in the Document.metadata field.
537
+ doc_cls (Type[BaseModel], optional): A Pydantic model subclassing
538
+ Document. Defaults to None.
539
+ Returns:
540
+ List[Document]: The list of Document objects.
541
+ """
542
+ Model = doc_cls or dataframe_to_document_model(df, content, metadata)
543
+ docs = [
544
+ Model.from_df_row(row, content, metadata) # type: ignore
545
+ for _, row in df.iterrows()
546
+ ]
547
+ return [m for m in docs if m is not None]
548
+
549
+
550
+ def extra_metadata(document: Document, doc_cls: Type[Document] = Document) -> List[str]:
551
+ """
552
+ Checks for extra fields in a document's metadata that are not defined in the
553
+ original metadata schema.
554
+
555
+ Args:
556
+ document (Document): The document instance to check for extra fields.
557
+ doc_cls (Type[Document]): The class type derived from Document, used
558
+ as a reference to identify extra fields in the document's metadata.
559
+
560
+ Returns:
561
+ List[str]: A list of strings representing the keys of the extra fields found
562
+ in the document's metadata.
563
+ """
564
+ # Convert metadata to dict, including extra fields.
565
+ metadata_fields = set(document.metadata.dict().keys())
566
+
567
+ # Get defined fields in the metadata of doc_cls
568
+ defined_fields = set(doc_cls.__fields__["metadata"].type_.__fields__.keys())
569
+
570
+ # Identify extra fields not in defined fields.
571
+ extra_fields = list(metadata_fields - defined_fields)
572
+
573
+ return extra_fields
574
+
575
+
576
+ def extend_document_class(d: Document) -> Type[Document]:
577
+ """Generates a new pydantic class based on a given document instance.
578
+
579
+ This function dynamically creates a new pydantic class with additional
580
+ fields based on the "extra" metadata fields present in the given document
581
+ instance. The new class is a subclass of the original Document class, with
582
+ the original metadata fields retained and extra fields added as normal
583
+ fields to the metadata.
584
+
585
+ Args:
586
+ d: An instance of the Document class.
587
+
588
+ Returns:
589
+ A new subclass of the Document class that includes the additional fields
590
+ found in the metadata of the given document instance.
591
+ """
592
+ # Extract the fields from the original metadata class, including types,
593
+ # correctly handling special types like List[str].
594
+ original_metadata_fields = {
595
+ k: (v.outer_type_ if v.shape != 1 else v.type_, ...)
596
+ for k, v in DocMetaData.__fields__.items()
597
+ }
598
+ # Extract extra fields from the metadata instance with their types
599
+ extra_fields = {
600
+ k: (type(v), ...)
601
+ for k, v in d.metadata.__dict__.items()
602
+ if k not in DocMetaData.__fields__
603
+ }
604
+
605
+ # Combine original and extra fields for the new metadata class
606
+ combined_fields = {**original_metadata_fields, **extra_fields}
607
+
608
+ # Create a new metadata class with combined fields
609
+ NewMetadataClass = create_model( # type: ignore
610
+ "ExtendedDocMetadata", **combined_fields, __base__=DocMetaData
611
+ )
612
+ # NewMetadataClass.__config__.arbitrary_types_allowed = True
613
+
614
+ # Create a new document class using the new metadata class
615
+ NewDocumentClass = create_model(
616
+ "ExtendedDocument",
617
+ content=(str, ...),
618
+ metadata=(NewMetadataClass, ...),
619
+ __base__=Document,
620
+ )
621
+
622
+ return NewDocumentClass
langroid/utils/system.py CHANGED
@@ -1,10 +1,12 @@
1
1
  import getpass
2
2
  import hashlib
3
+ import importlib
3
4
  import inspect
4
5
  import logging
5
6
  import shutil
6
7
  import socket
7
8
  import traceback
9
+ from typing import Any
8
10
 
9
11
  logger = logging.getLogger(__name__)
10
12
 
@@ -15,6 +17,39 @@ DELETION_ALLOWED_PATHS = [
15
17
  ]
16
18
 
17
19
 
20
+ class LazyLoad:
21
+ """Lazy loading of modules or classes."""
22
+
23
+ def __init__(self, import_path: str) -> None:
24
+ self.import_path = import_path
25
+ self._target = None
26
+ self._is_target_loaded = False
27
+
28
+ def _load_target(self) -> None:
29
+ if not self._is_target_loaded:
30
+ try:
31
+ # Attempt to import as a module
32
+ self._target = importlib.import_module(self.import_path) # type: ignore
33
+ except ImportError:
34
+ # If module import fails, attempt to import as a
35
+ # class or function from a module
36
+ module_path, attr_name = self.import_path.rsplit(".", 1)
37
+ module = importlib.import_module(module_path)
38
+ self._target = getattr(module, attr_name)
39
+ self._is_target_loaded = True
40
+
41
+ def __getattr__(self, name: str) -> Any:
42
+ self._load_target()
43
+ return getattr(self._target, name)
44
+
45
+ def __call__(self, *args: Any, **kwargs: Any) -> Any:
46
+ self._load_target()
47
+ if callable(self._target):
48
+ return self._target(*args, **kwargs)
49
+ else:
50
+ raise TypeError(f"{self.import_path!r} object is not callable")
51
+
52
+
18
53
  def rmdir(path: str) -> bool:
19
54
  """
20
55
  Remove a directory recursively.
@@ -96,7 +131,7 @@ def generate_user_id(org: str = "") -> str:
96
131
  def update_hash(hash: str | None = None, s: str = "") -> str:
97
132
  """
98
133
  Takes a SHA256 hash string and a new string, updates the hash with the new string,
99
- and returns the updated hash string along with the original string.
134
+ and returns the updated hash string.
100
135
 
101
136
  Args:
102
137
  hash (str): A SHA256 hash string.
@@ -1,8 +1,40 @@
1
1
  from . import base
2
- from . import chromadb
2
+
3
3
  from . import qdrantdb
4
4
  from . import meilisearch
5
+ from . import lancedb
5
6
 
6
- from .chromadb import ChromaDBConfig, ChromaDB
7
+ from .base import VectorStoreConfig, VectorStore
7
8
  from .qdrantdb import QdrantDBConfig, QdrantDB
8
9
  from .meilisearch import MeiliSearch, MeiliSearchConfig
10
+ from .lancedb import LanceDB, LanceDBConfig
11
+
12
+ has_chromadb = False
13
+ try:
14
+ from . import chromadb
15
+ from .chromadb import ChromaDBConfig, ChromaDB
16
+
17
+ chromadb # silence linters
18
+ ChromaDB
19
+ ChromaDBConfig
20
+ has_chromadb = True
21
+ except ImportError:
22
+ pass
23
+
24
+ __all__ = [
25
+ "base",
26
+ "VectorStore",
27
+ "VectorStoreConfig",
28
+ "qdrantdb",
29
+ "meilisearch",
30
+ "lancedb",
31
+ "QdrantDBConfig",
32
+ "QdrantDB",
33
+ "MeiliSearch",
34
+ "MeiliSearchConfig",
35
+ "LanceDB",
36
+ "LanceDBConfig",
37
+ ]
38
+
39
+ if has_chromadb:
40
+ __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
4
4
  from typing import Dict, List, Optional, Sequence, Tuple
5
5
 
6
6
  import numpy as np
7
+ import pandas as pd
7
8
  from pydantic import BaseSettings
8
9
 
9
10
  from langroid.embedding_models.base import EmbeddingModel, EmbeddingModelsConfig
@@ -12,6 +13,7 @@ from langroid.mytypes import Document
12
13
  from langroid.utils.algorithms.graph import components, topological_sort
13
14
  from langroid.utils.configuration import settings
14
15
  from langroid.utils.output.printing import print_long_text
16
+ from langroid.utils.pandas_utils import stringify
15
17
 
16
18
  logger = logging.getLogger(__name__)
17
19
 
@@ -127,6 +129,35 @@ class VectorStore(ABC):
127
129
  def add_documents(self, documents: Sequence[Document]) -> None:
128
130
  pass
129
131
 
132
+ def compute_from_docs(self, docs: List[Document], calc: str) -> str:
133
+ """Compute a result on a set of documents,
134
+ using a dataframe calc string like `df.groupby('state')['income'].mean()`.
135
+ """
136
+ dicts = [doc.dict() for doc in docs]
137
+ df = pd.DataFrame(dicts)
138
+
139
+ try:
140
+ result = pd.eval( # safer than eval but limited to single expression
141
+ calc,
142
+ engine="python",
143
+ parser="pandas",
144
+ local_dict={"df": df},
145
+ )
146
+ except Exception as e:
147
+ # return error message so LLM can fix the calc string if needed
148
+ err = f"""
149
+ Error encountered in pandas eval: {str(e)}
150
+ """
151
+ if isinstance(e, KeyError) and "not in index" in str(e):
152
+ # Pd.eval sometimes fails on a perfectly valid exprn like
153
+ # df.loc[..., 'column'] with a KeyError.
154
+ err += """
155
+ Maybe try a different way, e.g.
156
+ instead of df.loc[..., 'column'], try df.loc[...]['column']
157
+ """
158
+ return err
159
+ return stringify(result)
160
+
130
161
  def maybe_add_ids(self, documents: Sequence[Document]) -> None:
131
162
  """Add ids to metadata if absent, since some
132
163
  vecdbs don't like having blank ids."""
@@ -289,9 +320,9 @@ class VectorStore(ABC):
289
320
  return new_windows
290
321
 
291
322
  @abstractmethod
292
- def get_all_documents(self) -> List[Document]:
323
+ def get_all_documents(self, where: str = "") -> List[Document]:
293
324
  """
294
- Get all documents in the current collection.
325
+ Get all documents in the current collection, possibly filtered by `where`.
295
326
  """
296
327
  pass
297
328