langroid 0.1.101__py3-none-any.whl → 0.1.102__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langroid/agent/batch.py CHANGED
@@ -9,7 +9,7 @@ from rich.console import Console
9
9
  from langroid.agent.base import Agent
10
10
  from langroid.agent.chat_document import ChatDocument
11
11
  from langroid.agent.task import Task
12
- from langroid.utils.configuration import Settings, settings, temporary_settings
12
+ from langroid.utils.configuration import quiet_mode, settings
13
13
  from langroid.utils.logging import setup_colored_logging
14
14
 
15
15
  console = Console(quiet=settings.quiet)
@@ -53,7 +53,7 @@ def run_batch_tasks(
53
53
  return output_map(result)
54
54
 
55
55
  async def _do_all() -> List[Any]:
56
- with temporary_settings(Settings(quiet=True)):
56
+ with quiet_mode():
57
57
  return await asyncio.gather( # type: ignore
58
58
  *(_do_task(input, i) for i, input in enumerate(inputs))
59
59
  )
@@ -66,6 +66,10 @@ You are a helpful assistant, helping me understand a collection of documents.
66
66
  """
67
67
 
68
68
 
69
+ class DocChunkMetqdata(DocMetaData):
70
+ id: str
71
+
72
+
69
73
  class DocChatAgentConfig(ChatAgentConfig):
70
74
  """
71
75
  Attributes:
@@ -95,6 +99,7 @@ class DocChatAgentConfig(ChatAgentConfig):
95
99
  # It is False by default; its benefits depends on the context.
96
100
  hypothetical_answer: bool = False
97
101
  n_query_rephrases: int = 0
102
+ n_neighbor_chunks: int = 0 # how many neighbors on either side of match to retrieve
98
103
  use_fuzzy_match: bool = True
99
104
  use_bm25_search: bool = True
100
105
  cross_encoder_reranking_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
@@ -122,6 +127,7 @@ class DocChatAgentConfig(ChatAgentConfig):
122
127
  min_chunk_chars=200,
123
128
  discard_chunk_chars=5, # discard chunks with fewer than this many chars
124
129
  n_similar_docs=3,
130
+ n_neighbor_ids=0, # num chunk IDs to store on either side of each chunk
125
131
  pdf=PdfParsingConfig(
126
132
  # NOTE: PDF parsing is extremely challenging, and each library
127
133
  # has its own strengths and weaknesses.
@@ -195,6 +201,7 @@ class DocChatAgent(ChatAgent):
195
201
  if self.vecdb is None:
196
202
  raise ValueError("VecDB not set")
197
203
  self.chunked_docs = self.vecdb.get_all_documents()
204
+ # used for lexical similarity e.g. keyword search (bm25 etc)
198
205
  self.chunked_docs_clean = [
199
206
  Document(content=preprocess_text(d.content), metadata=d.metadata)
200
207
  for d in self.chunked_docs
@@ -509,9 +516,13 @@ class DocChatAgent(ChatAgent):
509
516
  if self.chunked_docs is None:
510
517
  logger.warning("No chunked docs; cannot use fuzzy matching")
511
518
  return []
519
+ if self.chunked_docs_clean is None:
520
+ logger.warning("No cleaned chunked docs; cannot use fuzzy-search")
521
+ return []
512
522
  fuzzy_match_docs = find_fuzzy_matches_in_docs(
513
523
  query,
514
524
  self.chunked_docs,
525
+ self.chunked_docs_clean,
515
526
  k=self.config.parsing.n_similar_docs * multiple,
516
527
  words_before=1000,
517
528
  words_after=1000,
@@ -546,6 +557,36 @@ class DocChatAgent(ChatAgent):
546
557
  ]
547
558
  return passages
548
559
 
560
+ def add_context_window(
561
+ self,
562
+ docs_scores: List[Tuple[Document, float]],
563
+ ) -> List[Tuple[Document, float]]:
564
+ """
565
+ In each doc's metadata, there may be a window_ids field indicating
566
+ the ids of the chunks around the current chunk.
567
+ These window_ids may overlap, so we
568
+ - gather connected-components of overlapping windows,
569
+ - split each component into roughly equal parts,
570
+ - create a new document for each part, preserving metadata,
571
+
572
+ We may have stored a longer set of window_ids than we need.
573
+ We just want `neighbors` on each side of the center of window_ids.
574
+
575
+ Args:
576
+ docs (List[Document]): List of documents to add context window to.
577
+ scores (List[float]): List of match scores for each document.
578
+ neighbors (int, optional): Number of neighbors on "each side" of match to
579
+ retrieve. Defaults to 0.
580
+ "Each side" here means before and after the match,
581
+ in the original text.
582
+
583
+ Returns:
584
+ List[Tuple[Document, float]]: List of (Document, score) tuples.
585
+ """
586
+ if self.vecdb is None or self.config.n_neighbor_chunks == 0:
587
+ return docs_scores
588
+ return self.vecdb.add_context_window(docs_scores, self.config.n_neighbor_chunks)
589
+
549
590
  def get_relevant_chunks(
550
591
  self, query: str, query_proxies: List[str] = []
551
592
  ) -> List[Document]:
@@ -560,10 +601,11 @@ class DocChatAgent(ChatAgent):
560
601
  dynamically retrieved based on a window around a lexical match.
561
602
 
562
603
  These are the steps (some optional based on config):
563
- - vector-embedding distance, from vecdb
564
- - bm25-ranking (keyword similarity)
604
+ - semantic search based on vector-embedding distance, from vecdb
605
+ - lexical search using bm25-ranking (keyword similarity)
565
606
  - fuzzy matching (keyword similarity)
566
- - re-ranking of doc-chunks using cross-encoder, pick top k
607
+ - re-ranking of doc-chunks by relevance to query, using cross-encoder,
608
+ and pick top k
567
609
 
568
610
  Args:
569
611
  query: original query (assumed to be in stand-alone form)
@@ -612,6 +654,9 @@ class DocChatAgent(ChatAgent):
612
654
  if len(passages) == 0:
613
655
  return []
614
656
 
657
+ passages_scores = [(p, 0.0) for p in passages]
658
+ passages_scores = self.add_context_window(passages_scores)
659
+ passages = [p for p, _ in passages_scores]
615
660
  # now passages can potentially have a lot of doc chunks,
616
661
  # so we re-rank them using a cross-encoder scoring model
617
662
  # https://www.sbert.net/examples/applications/retrieve_rerank
@@ -28,7 +28,7 @@ logger = logging.getLogger(__name__)
28
28
 
29
29
 
30
30
  class RecordMetadata(DocMetaData):
31
- id: None | int | str = None
31
+ id: None | str = None
32
32
 
33
33
 
34
34
  class RecordDoc(Document):
langroid/mytypes.py CHANGED
@@ -26,6 +26,8 @@ class DocMetaData(BaseModel):
26
26
 
27
27
  source: str = "context"
28
28
  is_chunk: bool = False # if it is a chunk, don't split
29
+ id: str | None = None # unique id for the document
30
+ window_ids: List[str] = [] # for RAG: ids of chunks around this one
29
31
 
30
32
  def dict(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
31
33
  """
@@ -51,9 +53,10 @@ class Document(BaseModel):
51
53
  content: str
52
54
  metadata: DocMetaData
53
55
 
54
- def _unique_hash_id(self) -> str:
56
+ @staticmethod
57
+ def hash_id(doc: str) -> str:
55
58
  # Encode the document as UTF-8
56
- doc_utf8 = str(self).encode("utf-8")
59
+ doc_utf8 = str(doc).encode("utf-8")
57
60
 
58
61
  # Create a SHA256 hash object
59
62
  sha256_hash = hashlib.sha256()
@@ -69,8 +72,11 @@ class Document(BaseModel):
69
72
 
70
73
  return str(hash_uuid)
71
74
 
72
- def id(self) -> Any:
73
- if hasattr(self.metadata, "id"):
75
+ def _unique_hash_id(self) -> str:
76
+ return self.hash_id(str(self))
77
+
78
+ def id(self) -> str:
79
+ if hasattr(self.metadata, "id") and self.metadata.id is not None:
74
80
  return self.metadata.id
75
81
  else:
76
82
  return self._unique_hash_id()
@@ -200,6 +200,7 @@ class DocumentParser(Parser):
200
200
  ),
201
201
  )
202
202
  )
203
+ self.add_window_ids(docs)
203
204
  return docs
204
205
 
205
206
 
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from enum import Enum
3
- from functools import reduce
4
3
  from typing import List
5
4
 
6
5
  import tiktoken
@@ -36,6 +35,7 @@ class ParsingConfig(BaseSettings):
36
35
  min_chunk_chars: int = 350
37
36
  discard_chunk_chars: int = 5 # discard chunks with fewer than this many chars
38
37
  n_similar_docs: int = 4
38
+ n_neighbor_ids: int = 0 # window size to store around each chunk
39
39
  separators: List[str] = ["\n\n", "\n", " ", ""]
40
40
  token_encoding_model: str = "text-embedding-ada-002"
41
41
  pdf: PdfParsingConfig = PdfParsingConfig()
@@ -51,17 +51,42 @@ class Parser:
51
51
  tokens = self.tokenizer.encode(text)
52
52
  return len(tokens)
53
53
 
54
+ def add_window_ids(self, chunks: List[Document]) -> None:
55
+ """Chunks are consecutive parts of a single original document.
56
+ Add window_ids in metadata"""
57
+
58
+ # The original metadata.id (if any) is ignored since it will be same for all
59
+ # chunks and is useless. We want a distinct id for each chunk.
60
+ ids = [Document.hash_id(str(c)) for c in chunks]
61
+
62
+ k = self.config.n_neighbor_ids
63
+ n = len(ids)
64
+ window_ids = [ids[max(0, i - k) : min(n, i + k + 1)] for i in range(n)]
65
+ for i, c in enumerate(chunks):
66
+ if c.content.strip() == "":
67
+ continue
68
+ c.metadata.window_ids = window_ids[i]
69
+ c.metadata.id = ids[i]
70
+ c.metadata.is_chunk = True
71
+
54
72
  def split_simple(self, docs: List[Document]) -> List[Document]:
55
73
  if len(self.config.separators) == 0:
56
74
  raise ValueError("Must have at least one separator")
57
- return [
58
- Document(content=chunk.strip(), metadata=d.metadata)
59
- for d in docs
60
- for chunk in remove_extra_whitespace(d.content).split(
61
- self.config.separators[0]
62
- )
63
- if chunk.strip() != ""
64
- ]
75
+ final_docs = []
76
+ for d in docs:
77
+ if d.content.strip() == "":
78
+ continue
79
+ chunks = remove_extra_whitespace(d.content).split(self.config.separators[0])
80
+ chunk_docs = [
81
+ Document(
82
+ content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
83
+ )
84
+ for c in chunks
85
+ if c.strip() != ""
86
+ ]
87
+ self.add_window_ids(chunk_docs)
88
+ final_docs += chunk_docs
89
+ return final_docs
65
90
 
66
91
  def split_para_sentence(self, docs: List[Document]) -> List[Document]:
67
92
  final_chunks = []
@@ -95,28 +120,37 @@ class Parser:
95
120
  return final_chunks + chunks
96
121
 
97
122
  def _split_para_sentence_once(self, docs: List[Document]) -> List[Document]:
98
- chunked_docs = [
99
- [
100
- Document(content=chunk.strip(), metadata=d.metadata)
101
- for chunk in create_chunks(
102
- d.content, self.config.chunk_size, self.num_tokens
123
+ final_chunks = []
124
+ for d in docs:
125
+ if d.content.strip() == "":
126
+ continue
127
+ chunks = create_chunks(d.content, self.config.chunk_size, self.num_tokens)
128
+ chunk_docs = [
129
+ Document(
130
+ content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
103
131
  )
104
- if chunk.strip() != ""
132
+ for c in chunks
133
+ if c.strip() != ""
105
134
  ]
106
- for d in docs
107
- ]
108
- return reduce(lambda x, y: x + y, chunked_docs)
135
+ self.add_window_ids(chunk_docs)
136
+ final_chunks += chunk_docs
137
+
138
+ return final_chunks
109
139
 
110
140
  def split_chunk_tokens(self, docs: List[Document]) -> List[Document]:
111
- chunked_docs = [
112
- [
113
- Document(content=chunk.strip(), metadata=d.metadata)
114
- for chunk in self.chunk_tokens(d.content)
115
- if chunk.strip() != ""
141
+ final_docs = []
142
+ for d in docs:
143
+ chunks = self.chunk_tokens(d.content)
144
+ chunk_docs = [
145
+ Document(
146
+ content=c, metadata=d.metadata.copy(update=dict(is_chunk=True))
147
+ )
148
+ for c in chunks
149
+ if c.strip() != ""
116
150
  ]
117
- for d in docs
118
- ]
119
- return reduce(lambda x, y: x + y, chunked_docs)
151
+ self.add_window_ids(chunk_docs)
152
+ final_docs += chunk_docs
153
+ return final_docs
120
154
 
121
155
  def chunk_tokens(
122
156
  self,
@@ -198,11 +232,8 @@ class Parser:
198
232
  # Increment the number of chunks
199
233
  num_chunks += 1
200
234
 
201
- # Handle the remaining tokens
202
- if tokens:
203
- remaining_text = self.tokenizer.decode(tokens).replace("\n", " ").strip()
204
- if len(remaining_text) > self.config.discard_chunk_chars:
205
- chunks.append(remaining_text)
235
+ # There may be remaining tokens, but we discard them
236
+ # since we have already reached the maximum number of chunks
206
237
 
207
238
  return chunks
208
239
 
@@ -7,7 +7,6 @@ See tests for examples: tests/main/test_string_search.py
7
7
  """
8
8
 
9
9
  import difflib
10
- import re
11
10
  from typing import List, Tuple
12
11
 
13
12
  from nltk.corpus import stopwords
@@ -24,6 +23,7 @@ from .utils import download_nltk_resource
24
23
  def find_fuzzy_matches_in_docs(
25
24
  query: str,
26
25
  docs: List[Document],
26
+ docs_clean: List[Document],
27
27
  k: int,
28
28
  words_before: int | None = None,
29
29
  words_after: int | None = None,
@@ -49,45 +49,45 @@ def find_fuzzy_matches_in_docs(
49
49
  return []
50
50
  best_matches = process.extract(
51
51
  query,
52
- [d.content for d in docs],
52
+ [d.content for d in docs_clean],
53
53
  limit=k,
54
54
  scorer=fuzz.partial_ratio,
55
55
  )
56
56
 
57
57
  real_matches = [m for m, score in best_matches if score > 50]
58
-
59
- results = []
60
- for match in real_matches:
61
- words = match.split()
62
- for doc in docs:
63
- if match in doc.content:
64
- words_in_text = doc.content.split()
65
- first_word_idx = next(
66
- (
67
- i
68
- for i, word in enumerate(words_in_text)
69
- if word.startswith(words[0])
70
- ),
71
- -1,
72
- )
73
- if words_before is None:
74
- words_before = len(words_in_text)
75
- if words_after is None:
76
- words_after = len(words_in_text)
77
- if first_word_idx != -1:
78
- start_idx = max(0, first_word_idx - words_before)
79
- end_idx = min(
80
- len(words_in_text),
81
- first_word_idx + len(words) + words_after,
82
- )
83
- doc_match = Document(
84
- content=" ".join(words_in_text[start_idx:end_idx]),
85
- metadata=doc.metadata,
86
- )
87
- results.append(doc_match)
58
+ # find the original docs that corresponding to the matches
59
+ orig_doc_matches = []
60
+ for i, m in enumerate(real_matches):
61
+ for j, doc_clean in enumerate(docs_clean):
62
+ if m in doc_clean.content:
63
+ orig_doc_matches.append(docs[j])
88
64
  break
65
+ if words_after is None and words_before is None:
66
+ return orig_doc_matches
67
+
68
+ contextual_matches = []
69
+ for match in orig_doc_matches:
70
+ choice_text = match.content
71
+ contexts = []
72
+ while choice_text != "":
73
+ context, start_pos, end_pos = get_context(
74
+ query, choice_text, words_before, words_after
75
+ )
76
+ if context == "" or end_pos == 0:
77
+ break
78
+ contexts.append(context)
79
+ words = choice_text.split()
80
+ end_pos = min(end_pos, len(words))
81
+ choice_text = " ".join(words[end_pos:])
82
+ if len(contexts) > 0:
83
+ contextual_matches.append(
84
+ Document(
85
+ content=" ... ".join(contexts),
86
+ metadata=match.metadata,
87
+ )
88
+ )
89
89
 
90
- return results
90
+ return contextual_matches
91
91
 
92
92
 
93
93
  def preprocess_text(text: str) -> str:
@@ -171,7 +171,7 @@ def get_context(
171
171
  text: str,
172
172
  words_before: int | None = 100,
173
173
  words_after: int | None = 100,
174
- ) -> str:
174
+ ) -> Tuple[str, int, int]:
175
175
  """
176
176
  Returns a portion of text containing the best approximate match of the query,
177
177
  including b words before and a words after the match.
@@ -185,7 +185,9 @@ def get_context(
185
185
  Returns:
186
186
  str: A string containing b words before, the match, and a words after
187
187
  the best approximate match position of the query in the text. If no
188
- match is found, returns "No match found".
188
+ match is found, returns empty string.
189
+ int: The start position of the match in the text.
190
+ int: The end position of the match in the text.
189
191
 
190
192
  Example:
191
193
  >>> get_context("apple", "The quick brown fox jumps over the apple.", 3, 2)
@@ -193,26 +195,29 @@ def get_context(
193
195
  """
194
196
  if words_after is None and words_before is None:
195
197
  # return entire text since we're not asked to return a bounded context
196
- return text
198
+ return text, 0, 0
199
+
200
+ # make sure there is a good enough fu
201
+ if fuzz.partial_ratio(query, text) < 70:
202
+ return "", 0, 0
197
203
 
198
204
  sequence_matcher = difflib.SequenceMatcher(None, text, query)
199
205
  match = sequence_matcher.find_longest_match(0, len(text), 0, len(query))
200
206
 
201
207
  if match.size == 0:
202
- return "No match found"
203
-
204
- words = re.findall(r"\b\w+\b", text)
205
- if words_after is None:
206
- words_after = len(words)
207
- if words_before is None:
208
- words_before = len(words)
209
- start_word_pos = len(re.findall(r"\b\w+\b", text[: match.a]))
210
- start_pos = max(0, start_word_pos - words_before)
211
- end_pos = min(
212
- len(words), start_word_pos + words_after + len(re.findall(r"\b\w+\b", query))
213
- )
208
+ return "", 0, 0
209
+
210
+ segments = text.split()
211
+ n_segs = len(segments)
212
+
213
+ start_segment_pos = len(text[: match.a].split())
214
+
215
+ words_before = words_before or n_segs
216
+ words_after = words_after or n_segs
217
+ start_pos = max(0, start_segment_pos - words_before)
218
+ end_pos = min(len(segments), start_segment_pos + words_after + len(query.split()))
214
219
 
215
- return " ".join(words[start_pos:end_pos])
220
+ return " ".join(segments[start_pos:end_pos]), start_pos, end_pos
216
221
 
217
222
 
218
223
  def eliminate_near_duplicates(passages: List[str], threshold: float = 0.8) -> List[str]:
langroid/parsing/utils.py CHANGED
@@ -165,6 +165,32 @@ def parse_number_range_list(specs: str) -> List[int]:
165
165
  return sorted(list(spec_indices))
166
166
 
167
167
 
168
+ def strip_k(s: str, k: int = 2) -> str:
169
+ """
170
+ Strip any leading and trailing whitespaces from the input text beyond length k.
171
+ This is useful for removing leading/trailing whitespaces from a text while
172
+ preserving paragraph structure.
173
+
174
+ Args:
175
+ s (str): The input text.
176
+ k (int): The number of leading and trailing whitespaces to retain.
177
+
178
+ Returns:
179
+ str: The text with leading and trailing whitespaces removed beyond length k.
180
+ """
181
+
182
+ # Count leading and trailing whitespaces
183
+ leading_count = len(s) - len(s.lstrip())
184
+ trailing_count = len(s) - len(s.rstrip())
185
+
186
+ # Determine how many whitespaces to retain
187
+ leading_keep = min(leading_count, k)
188
+ trailing_keep = min(trailing_count, k)
189
+
190
+ # Use slicing to get the desired output
191
+ return s[leading_count - leading_keep : len(s) - (trailing_count - trailing_keep)]
192
+
193
+
168
194
  def clean_whitespace(text: str) -> str:
169
195
  """Remove extra whitespace from the input text, while preserving
170
196
  paragraph structure.
@@ -0,0 +1,49 @@
1
+ """
2
+ Graph algos.
3
+ """
4
+
5
+ from typing import List, no_type_check
6
+
7
+ import numpy as np
8
+
9
+
10
+ @no_type_check
11
+ def topological_sort(order: np.array) -> List[int]:
12
+ """
13
+ Given a directed adjacency matrix, return a topological sort of the nodes.
14
+ order[i,j] = -1 means there is an edge from i to j.
15
+ order[i,j] = 0 means there is no edge from i to j.
16
+ order[i,j] = 1 means there is an edge from j to i.
17
+
18
+ Args:
19
+ order (np.array): The adjacency matrix.
20
+
21
+ Returns:
22
+ List[int]: The topological sort of the nodes.
23
+
24
+ """
25
+ n = order.shape[0]
26
+
27
+ # Calculate the in-degrees
28
+ in_degree = [0] * n
29
+ for i in range(n):
30
+ for j in range(n):
31
+ if order[i, j] == -1:
32
+ in_degree[j] += 1
33
+
34
+ # Initialize the queue with nodes of in-degree 0
35
+ queue = [i for i in range(n) if in_degree[i] == 0]
36
+ result = []
37
+
38
+ while queue:
39
+ node = queue.pop(0)
40
+ result.append(node)
41
+
42
+ for i in range(n):
43
+ if order[node, i] == -1:
44
+ in_degree[i] -= 1
45
+ if in_degree[i] == 0:
46
+ queue.append(i)
47
+
48
+ assert len(result) == n, "Cycle detected"
49
+ return result
@@ -71,6 +71,19 @@ def temporary_settings(temp_settings: Settings) -> Iterator[None]:
71
71
  settings.__dict__.update(original_settings.__dict__)
72
72
 
73
73
 
74
+ @contextmanager
75
+ def quiet_mode() -> Iterator[None]:
76
+ """Temporarily set quiet=True in global settings and restore afterward."""
77
+ original_quiet = settings.quiet
78
+
79
+ set_global(Settings(quiet=True))
80
+
81
+ try:
82
+ yield
83
+ finally:
84
+ settings.quiet = original_quiet
85
+
86
+
74
87
  def set_env(settings: BaseSettings) -> None:
75
88
  """
76
89
  Set environment variables from a BaseSettings instance
@@ -79,7 +79,9 @@ def flatten_pydantic_model(
79
79
  current_model, current_prefix = models_to_process.pop()
80
80
 
81
81
  for name, field in current_model.__fields__.items():
82
- if issubclass(field.outer_type_, BaseModel):
82
+ if isinstance(field.outer_type_, type) and issubclass(
83
+ field.outer_type_, BaseModel
84
+ ):
83
85
  new_prefix = (
84
86
  f"{current_prefix}{name}__" if current_prefix else f"{name}__"
85
87
  )
@@ -1,12 +1,16 @@
1
+ import copy
1
2
  import logging
2
3
  from abc import ABC, abstractmethod
3
- from typing import List, Optional, Sequence, Tuple
4
+ from math import ceil
5
+ from typing import Dict, List, Optional, Sequence, Tuple
4
6
 
7
+ import numpy as np
5
8
  from pydantic import BaseSettings
6
9
 
7
10
  from langroid.embedding_models.base import EmbeddingModelsConfig
8
11
  from langroid.embedding_models.models import OpenAIEmbeddingsConfig
9
12
  from langroid.mytypes import Document
13
+ from langroid.utils.algorithms.graph import topological_sort
10
14
  from langroid.utils.configuration import settings
11
15
  from langroid.utils.output.printing import print_long_text
12
16
 
@@ -130,8 +134,160 @@ class VectorStore(ABC):
130
134
  k: int = 1,
131
135
  where: Optional[str] = None,
132
136
  ) -> List[Tuple[Document, float]]:
137
+ """
138
+ Find k most similar texts to the given text, in terms of vector distance metric
139
+ (e.g., cosine similarity).
140
+
141
+ Args:
142
+ text (str): The text to find similar texts for.
143
+ k (int, optional): Number of similar texts to retrieve. Defaults to 1.
144
+ where (Optional[str], optional): Where clause to filter the search.
145
+
146
+ Returns:
147
+ List[Tuple[Document,float]]: List of (Document, score) tuples.
148
+
149
+ """
133
150
  pass
134
151
 
152
+ def add_context_window(
153
+ self, docs_scores: List[Tuple[Document, float]], neighbors: int = 0
154
+ ) -> List[Tuple[Document, float]]:
155
+ """
156
+ In each doc's metadata, there may be a window_ids field indicating
157
+ the ids of the chunks around the current chunk.
158
+ These window_ids may overlap, so we
159
+ - gather connected-components of overlapping windows,
160
+ - split each component into roughly equal parts,
161
+ - create a new document for each part, preserving metadata,
162
+
163
+ We may have stored a longer set of window_ids than we need.
164
+ We just want `neighbors` on each side of the center of window_ids.
165
+
166
+ Args:
167
+ docs (List[Document]): List of documents to add context window to.
168
+ scores (List[float]): List of match scores for each document.
169
+ neighbors (int, optional): Number of neighbors on "each side" of match to
170
+ retrieve. Defaults to 0.
171
+ "Each side" here means before and after the match,
172
+ in the original text.
173
+
174
+ Returns:
175
+ List[Tuple[Document, float]]: List of (Document, score) tuples.
176
+ """
177
+ # We return a larger context around each match, i.e.
178
+ # a window of `neighbors` on each side of the match.
179
+ docs = [d for d, s in docs_scores]
180
+ scores = [s for d, s in docs_scores]
181
+ if neighbors == 0:
182
+ return docs_scores
183
+ doc_chunks = [d for d in docs if d.metadata.is_chunk]
184
+ if len(doc_chunks) == 0:
185
+ return docs_scores
186
+ window_ids_list = []
187
+ id2metadata = {}
188
+ # id -> highest score of a doc it appears in
189
+ id2max_score: Dict[int | str, float] = {}
190
+ for i, d in enumerate(docs):
191
+ window_ids = d.metadata.window_ids
192
+ id2metadata.update({id: d.metadata for id in window_ids})
193
+
194
+ id2max_score.update(
195
+ {id: max(id2max_score.get(id, 0), scores[i]) for id in window_ids}
196
+ )
197
+ n = len(window_ids)
198
+ chunk_idx = window_ids.index(d.id())
199
+ neighbor_ids = window_ids[
200
+ max(0, chunk_idx - neighbors) : min(n, chunk_idx + neighbors + 1)
201
+ ]
202
+ window_ids_list += [neighbor_ids]
203
+
204
+ # window_ids could be from different docs,
205
+ # and they may overlap, so we first remove overlaps
206
+ window_ids_list = self.remove_overlaps(window_ids_list)
207
+ final_docs = []
208
+ final_scores = []
209
+ for w in window_ids_list:
210
+ metadata = copy.deepcopy(id2metadata[w[0]])
211
+ metadata.window_ids = w
212
+ document = Document(
213
+ content=" ".join([d.content for d in self.get_documents_by_ids(w)]),
214
+ metadata=metadata,
215
+ )
216
+ # make a fresh id since content is in general different
217
+ document.metadata.id = document.hash_id(document.content)
218
+ final_docs += [document]
219
+ final_scores += [max(id2max_score[id] for id in w)]
220
+ return list(zip(final_docs, final_scores))
221
+
222
+ @staticmethod
223
+ def remove_overlaps(windows: List[List[str]]) -> List[List[str]]:
224
+ """
225
+ Given a collection of windows, where each window is a sequence of ids,
226
+ identify groups of overlapping windows, and for each overlapping k-group,
227
+ split the ids into k roughly equal sequences.
228
+
229
+ Args:
230
+ windows (List[int|str]): List of windows, where each window is a
231
+ sequence of ids.
232
+
233
+ Returns:
234
+ List[int|str]: List of windows, where each window is a sequence of ids,
235
+ and no two windows overlap.
236
+ """
237
+ ids = set(id for w in windows for id in w)
238
+ # id -> {win -> # pos}
239
+ id2win2pos: Dict[str, Dict[int, int]] = {id: {} for id in ids}
240
+
241
+ for i, w in enumerate(windows):
242
+ for j, id in enumerate(w):
243
+ id2win2pos[id][i] = j
244
+
245
+ n = len(windows)
246
+ # relation between windows:
247
+ order = np.zeros((n, n), dtype=np.int8)
248
+ for i, w in enumerate(windows):
249
+ for j, x in enumerate(windows):
250
+ if i == j:
251
+ continue
252
+ if len(set(w).intersection(x)) == 0:
253
+ continue
254
+ id = list(set(w).intersection(x))[0] # any common id
255
+ if id2win2pos[id][i] > id2win2pos[id][j]:
256
+ order[i, j] = -1 # win i is before win j
257
+ else:
258
+ order[i, j] = 1 # win i is after win j
259
+
260
+ # find groups of windows that overlap, like connected components in a graph
261
+ groups = [[0]]
262
+ for i in range(1, n):
263
+ found = False
264
+ for g in groups:
265
+ if any(order[i, j] != 0 for j in g):
266
+ g.append(i)
267
+ found = True
268
+ break
269
+ if not found:
270
+ groups.append([i])
271
+
272
+ # split each group into roughly equal parts
273
+ new_windows = []
274
+ max_window_len = max(len(w) for w in windows)
275
+ for g in groups:
276
+ # find total ordering among windows in group based on order matrix
277
+ # (this is a topological sort)
278
+ _g = np.array(g)
279
+ order_matrix = order[_g][:, _g]
280
+ ordered_window_indices = topological_sort(order_matrix)
281
+ ordered_window_ids = [windows[i] for i in _g[ordered_window_indices]]
282
+ flattened = [id for w in ordered_window_ids for id in w]
283
+ flattened_deduped = list(dict.fromkeys(flattened))
284
+ # split into k parts where k is the smallest integer such that
285
+ # each part has length <= max_window_len
286
+ k = max(1, int(ceil(len(flattened_deduped) / max_window_len)))
287
+ new_windows += np.array_split(flattened_deduped, k)
288
+
289
+ return [w.tolist() for w in new_windows]
290
+
135
291
  @abstractmethod
136
292
  def get_all_documents(self) -> List[Document]:
137
293
  """
@@ -109,14 +109,17 @@ class ChromaDB(VectorStore):
109
109
  if documents is None:
110
110
  return
111
111
  contents: List[str] = [document.content for document in documents]
112
- metadatas: List[dict[str, Any]] = [
113
- document.metadata.dict() for document in documents
114
- ]
112
+ # convert metadatas to dicts so chroma can handle them
113
+ metadata_dicts: List[dict[str, Any]] = [d.metadata.dict() for d in documents]
114
+ for m in metadata_dicts:
115
+ # chroma does not handle non-atomic types in metadata
116
+ m["window_ids"] = ",".join(m["window_ids"])
117
+
115
118
  ids = [str(d.id()) for d in documents]
116
119
  self.collection.add(
117
120
  # embedding_models=embedding_models,
118
121
  documents=contents,
119
- metadatas=metadatas,
122
+ metadatas=metadata_dicts,
120
123
  ids=ids,
121
124
  )
122
125
 
@@ -145,7 +148,8 @@ class ChromaDB(VectorStore):
145
148
  include=["documents", "distances", "metadatas"],
146
149
  )
147
150
  docs = self._docs_from_results(results)
148
- scores = results["distances"][0]
151
+ # chroma distances are 1 - cosine.
152
+ scores = [1 - s for s in results["distances"][0]]
149
153
  return list(zip(docs, scores))
150
154
 
151
155
  def _docs_from_results(self, results: Dict[str, Any]) -> List[Document]:
@@ -164,22 +168,11 @@ class ChromaDB(VectorStore):
164
168
  for i, c in enumerate(contents):
165
169
  print_long_text("red", "italic red", f"MATCH-{i}", c)
166
170
  metadatas = results["metadatas"][0]
171
+ for m in metadatas:
172
+ # restore the stringified list of window_ids into the original List[str]
173
+ m["window_ids"] = m["window_ids"].split(",")
167
174
  docs = [
168
175
  Document(content=d, metadata=DocMetaData(**m))
169
176
  for d, m in zip(contents, metadatas)
170
177
  ]
171
178
  return docs
172
-
173
-
174
- # Example usage and testing
175
- # chroma_db = ChromaDB.from_documents(
176
- # collection_name="all-my-documents",
177
- # documents=["doc1000101", "doc288822"],
178
- # metadatas=[{"style": "style1"}, {"style": "style2"}],
179
- # ids=["uri9", "uri10"]
180
- # )
181
- # results = chroma_db.query(
182
- # query_texts=["This is a query document"],
183
- # n_results=2
184
- # )
185
- # print(results)
@@ -263,6 +263,7 @@ class MeiliSearch(VectorStore):
263
263
  text: str,
264
264
  k: int = 20,
265
265
  where: Optional[str] = None,
266
+ neighbors: int = 0, # ignored
266
267
  ) -> List[Tuple[Document, float]]:
267
268
  filter = [] if where is None else where
268
269
  if self.config.collection_name is None:
@@ -222,6 +222,7 @@ class MomentoVI(VectorStore):
222
222
  text: str,
223
223
  k: int = 1,
224
224
  where: Optional[str] = None,
225
+ neighbors: int = 0, # ignored
225
226
  ) -> List[Tuple[Document, float]]:
226
227
  if self.config.collection_name is None:
227
228
  raise ValueError("No collection name set, cannot search")
@@ -244,7 +244,11 @@ class QdrantDB(VectorStore):
244
244
  with_vectors=False,
245
245
  with_payload=True,
246
246
  )
247
- docs = [Document(**record.payload) for record in records] # type: ignore
247
+ # Note the records may NOT be in the order of the ids,
248
+ # so we re-order them here.
249
+ id2payload = {record.id: record.payload for record in records}
250
+ ordered_payloads = [id2payload[id] for id in _ids]
251
+ docs = [Document(**payload) for payload in ordered_payloads] # type: ignore
248
252
  return docs
249
253
 
250
254
  def similar_texts_with_scores(
@@ -252,6 +256,7 @@ class QdrantDB(VectorStore):
252
256
  text: str,
253
257
  k: int = 1,
254
258
  where: Optional[str] = None,
259
+ neighbors: int = 0,
255
260
  ) -> List[Tuple[Document, float]]:
256
261
  embedding = self.embedding_fn([text])[0]
257
262
  # TODO filter may not work yet
@@ -268,7 +273,7 @@ class QdrantDB(VectorStore):
268
273
  exact=False, # use Apx NN, not exact NN
269
274
  ),
270
275
  )
271
- scores = [match.score for match in search_result]
276
+ scores = [match.score for match in search_result if match is not None]
272
277
  docs = [
273
278
  Document(**(match.payload)) # type: ignore
274
279
  for match in search_result
@@ -277,8 +282,9 @@ class QdrantDB(VectorStore):
277
282
  if len(docs) == 0:
278
283
  logger.warning(f"No matches found for {text}")
279
284
  return []
280
- if settings.debug:
281
- logger.info(f"Found {len(docs)} matches, max score: {max(scores)}")
282
285
  doc_score_pairs = list(zip(docs, scores))
286
+ max_score = max(ds[1] for ds in doc_score_pairs)
287
+ if settings.debug:
288
+ logger.info(f"Found {len(doc_score_pairs)} matches, max score: {max_score}")
283
289
  self.show_if_debug(doc_score_pairs)
284
290
  return doc_score_pairs
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.101
3
+ Version: 0.1.102
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -1,16 +1,16 @@
1
1
  langroid/__init__.py,sha256=-AWkFhhW0b0paHQ11SORyIVPnXv0nyT2X_0_xh3zLjw,408
2
2
  langroid/agent/__init__.py,sha256=ZqDw3Ktw7XGDl6mC8DN61F71V4ckf0rBoEOydH9l6C4,428
3
3
  langroid/agent/base.py,sha256=NjRf_y5ymZqpFlXh2sK94lcJRJbrBhw5pd1_Qe1Da_E,30151
4
- langroid/agent/batch.py,sha256=in7OGXcOhKnJo9WDfIuNcP1xb-QZ1Y4rCy2w0dzR9C0,5496
4
+ langroid/agent/batch.py,sha256=p5UPdvRn6QOpb3V4B517nPCF7nZemKk7_9YlJ7jR7w4,5450
5
5
  langroid/agent/chat_agent.py,sha256=qjCwvR7i9DtonTmm1d1mbBHN4aW0LzxABAL-2JfGcF8,33548
6
6
  langroid/agent/chat_document.py,sha256=k7Klav3FIBTf2w95bQtxgqBrf2fMo1ydSlklQvv4RCg,6252
7
7
  langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
9
9
  langroid/agent/special/__init__.py,sha256=ciNhdoIIjFxNk-5xcy8H76A3d-TldbIYaFexlgfN-2A,575
10
- langroid/agent/special/doc_chat_agent.py,sha256=bQsDmrZrnydIW7Cll9WkhTgqMqt3bndknziiB2iTbV4,31330
10
+ langroid/agent/special/doc_chat_agent.py,sha256=ko6_oYJuv70zQ0mvLXcQMj_ZB1UVE8bw2V2Ng-FoVoI,33425
11
11
  langroid/agent/special/recipient_validator_agent.py,sha256=R3Rit93BNWQar_9stuDBGzmLr2W-IYOQ7oq-tlNNlps,6035
12
12
  langroid/agent/special/relevance_extractor_agent.py,sha256=JU52PbY5FO72kfnA902-UKzVgxExndlwEC7Lb-XqDNI,4348
13
- langroid/agent/special/retriever_agent.py,sha256=q_JMZEe_4Ha1zWJrEyIWAesmrlAa7_VOSHTi8AQNXyo,6579
13
+ langroid/agent/special/retriever_agent.py,sha256=ze8jXJW9A_twsrRXVECAQCYicfjm8-a6qv1vDk41AAc,6573
14
14
  langroid/agent/special/sql/__init__.py,sha256=3kR5nC0wnYIzmMrr9L8RJa7JAJpbwBLx7KKygiwz0v0,111
15
15
  langroid/agent/special/sql/sql_chat_agent.py,sha256=Ua_gfK_1k5ct59Zkbe78bzs-2jabtFkEVx76a0pGs9Y,12867
16
16
  langroid/agent/special/sql/utils/__init__.py,sha256=_IBHt3iNXvPqxvDrs5_T86qdj0gPugVGnGNi6Cx7F-I,238
@@ -45,24 +45,24 @@ langroid/language_models/prompt_formatter/__init__.py,sha256=wj2e6j7R9d3m63HCbSD
45
45
  langroid/language_models/prompt_formatter/base.py,sha256=2y_GcwhstvB5ih3haS7l5Fv79jVnFJ_vEw1jqWJzB9k,1247
46
46
  langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeuMENVIVvVqSYuEpvYSTndUe_jd6hVTko4,2899
47
47
  langroid/language_models/utils.py,sha256=rmnSn-sJ3aKl_wBdeLPkck0Li4Ed6zkCxZYYl7n1V34,4668
48
- langroid/mytypes.py,sha256=5jl4vpnwN2U19Eyh0mH1JhoVFpa8Ml7-HYpNyrgSArw,2110
48
+ langroid/mytypes.py,sha256=XmEUL_xAZfeWuJLEvQe_4g-W9P7rpY6zOIAHhtYikwk,2363
49
49
  langroid/parsing/__init__.py,sha256=_EZ8iuixxU39zuaydtfjyap8g9C_c1dnrCQ0QR81U2E,340
50
50
  langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
51
51
  langroid/parsing/code-parsing.md,sha256=--cyyNiSZSDlIwcjAV4-shKrSiRe2ytF3AdSoS_hD2g,3294
52
52
  langroid/parsing/code_parser.py,sha256=BbDAzp35wkYQ9U1dpf1ARL0lVyi0tfqEc6_eox2C090,3727
53
53
  langroid/parsing/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- langroid/parsing/document_parser.py,sha256=wsBwZ8LHzobAoxxyV46iZQ5B69lRkZjxDEIV6d7SwEg,14581
54
+ langroid/parsing/document_parser.py,sha256=YC3IXQ9ErpBGBZh6Be9gfJWHcTwGTSMfNQMT5ARrj5g,14615
55
55
  langroid/parsing/json.py,sha256=MVqBUfInALQm1QKbcfEvLzWxBz_UztCIyGk7AK5uFPo,1650
56
56
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
57
- langroid/parsing/parser.py,sha256=99RE4sQg5CHH4xEznuJOE_yl3lIIehkRyGmUdq4hmuo,8070
57
+ langroid/parsing/parser.py,sha256=3EVPkOfXehZwUvdM-tn7LN951722_2c7umGtwzwdxts,9297
58
58
  langroid/parsing/repo_loader.py,sha256=nmtvorVip4VQbUMDxoxpVyAlbLt8R8eJjxpAX0vVlfs,27695
59
- langroid/parsing/search.py,sha256=OEIR8a2f_IRiTuo6EjBzWtJz0cJjsHWwhk5lvoRn5qs,8247
59
+ langroid/parsing/search.py,sha256=h-C0Ij111cI7lcddr_vdABjfNKXDqBkJVG48WyJCovA,8424
60
60
  langroid/parsing/spider.py,sha256=aX0ucHQ9SVgpieNjtEn_G1bhq5DH_03VpBXoxcdZPl8,3008
61
61
  langroid/parsing/table_loader.py,sha256=uqbupGr4y_7os18RtaY5GpD0hWcgzROoNy8dQIHB4kc,1767
62
62
  langroid/parsing/url_loader.py,sha256=dhmUTysS_YZyIXVAekxCGPiCbFsOsHXj_eHMow0xoGQ,2153
63
63
  langroid/parsing/url_loader_cookies.py,sha256=Lg4sNpRz9MByWq2mde6T0hKv68VZSV3mtMjNEHuFeSU,2327
64
64
  langroid/parsing/urls.py,sha256=vJ-ZJROtmLwykoE690w5y0BxWN2QOpbxR4hy03knx6Q,7520
65
- langroid/parsing/utils.py,sha256=UL-8Klr5sxDuZkMBeBXJlYnGe3kG4JAry3X58_j9o7E,6763
65
+ langroid/parsing/utils.py,sha256=nuCW_sRe5js0d-K6EtDEIbFQpMicS1ntr3FXxtYtGzw,7639
66
66
  langroid/parsing/web_search.py,sha256=hGUVoSJNdpoT5rsm-ikAteMiUropHrzKaxN8EVVqO2U,2496
67
67
  langroid/prompts/__init__.py,sha256=aTW86CbDZM7tntqiTVeNLYJv7pbRDcKOI3qHVXCEHUY,99
68
68
  langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
@@ -70,7 +70,8 @@ langroid/prompts/prompts_config.py,sha256=EMK1Fm7EmS8y3CV4AkrVgn5K4NipiM4m7J8819
70
70
  langroid/prompts/templates.py,sha256=4X-07tnmUQ8Z_zaWRQAUUyKiErGztp3tERujqnG8sGA,6369
71
71
  langroid/prompts/transforms.py,sha256=GsQo1klGxUy0fACh6j0lTblk6XEl2erRnhRWlN2M4-c,2706
72
72
  langroid/utils/__init__.py,sha256=3aMfdwFizpl3W2H5Q-TMqUFqMoYgec1NiX-caSnClmQ,167
73
- langroid/utils/configuration.py,sha256=KqhNU1ar-R-3KoWXyhHk727cEAxYajLIIBY3A3jFiZQ,2760
73
+ langroid/utils/algorithms/graph.py,sha256=5D7scuxeofllU6xh8_tIcc2WiHVn0MjVQ7lSPrOgKr4,1173
74
+ langroid/utils/configuration.py,sha256=p_MlevqGdS3681u2IiDgrMXBCytg9xZwQH5OK9PUNno,3044
74
75
  langroid/utils/constants.py,sha256=edJ5J-sC9CeUwwNey_uLQbGbHgjX-T8XLf_J53h3Tys,484
75
76
  langroid/utils/docker.py,sha256=kJQOLTgM0x9j9pgIIqp0dZNZCTvoUDhp6i8tYBq1Jr0,1105
76
77
  langroid/utils/globals.py,sha256=UubMelOGkLy3BxByl1vprITU4dbysZmCtYBvZWL8dto,1337
@@ -79,20 +80,20 @@ langroid/utils/llms/strings.py,sha256=CSAX9Z6FQOLXOzbLMe_Opqtc3ruDAKTTk7cPqc6Blh
79
80
  langroid/utils/logging.py,sha256=xXpohbvK74_reomdkIWTeyDjGG8GT1fuU7zcLL3Ngt8,3951
80
81
  langroid/utils/output/__init__.py,sha256=IpfqnCkfXa4HaOx39EMUhXuA7GPZFd7N_QMm1n43C_I,174
81
82
  langroid/utils/output/printing.py,sha256=RZoY8S-8UljiVURe5o5SljpzwF77LTCO7-68nf_uvA4,2277
82
- langroid/utils/pydantic_utils.py,sha256=xV6vItb6LfIEgSCEsHm5JUTKVfqi-mJoct4SWqx4o0E,6226
83
+ langroid/utils/pydantic_utils.py,sha256=00ajeBTvxJEOyqd7M7FveRz7oa9wdQ0QFKvOjy_ZfRE,6296
83
84
  langroid/utils/system.py,sha256=LyFrSPfvAnhA8GSRjT-2HOkLzxmziZ8wfpDYMqSv01M,1518
84
85
  langroid/utils/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
86
  langroid/utils/web/login.py,sha256=1iz9eUAHa87vpKIkzwkmFa00avwFWivDSAr7QUhK7U0,2528
86
87
  langroid/utils/web/selenium_login.py,sha256=mYI6EvVmne34N9RajlsxxRqJQJvV-WG4LGp6sEECHPw,1156
87
88
  langroid/vector_store/__init__.py,sha256=NhAXOCKX_x2whfghOn44e0O3-vV0nJRz6ZLsCBqYFyQ,242
88
- langroid/vector_store/base.py,sha256=VVsk6WvGW6Rc0TLEcyVVOTAB0pFlfZ1kSd9VyDl_eHM,5492
89
- langroid/vector_store/chromadb.py,sha256=2vWoOwWIgeRVIMiywAl084eruBBQhkd8_XzAg-K_saU,6744
89
+ langroid/vector_store/base.py,sha256=VQb_7EIJ1r3g-fzyP4b-WSfZg73rtdYsILIcHZLl4NM,11989
90
+ langroid/vector_store/chromadb.py,sha256=EJONjIa77Bkr8ych5JLykYV9n-DP_9jqFechmmZHHwI,6803
90
91
  langroid/vector_store/lancedb.py,sha256=_d7Mz7O8j4keYgHzFSpEOBFq6L13kDJ3eQOZAIrIaOc,11262
91
- langroid/vector_store/meilisearch.py,sha256=r5-2sybfE0zPt7wEO_HB7JqVI9Mf5O55uVS1L-Mx-jM,11168
92
- langroid/vector_store/momento.py,sha256=gr9Mig72OMH3sLGEh3jS5p-8txiuMNkZoCrl2x0we4E,9891
92
+ langroid/vector_store/meilisearch.py,sha256=aQ5Bo-Rk-BnMxbcCTpR7yVm4aNNZHy4hlJBJxn-UpYw,11207
93
+ langroid/vector_store/momento.py,sha256=krw1KwyVRE-ekq1KUAktsMxrJfeolsAC5BmK-1zdxsg,9930
93
94
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
94
- langroid/vector_store/qdrantdb.py,sha256=t6ITLTFrtAru7J0DqYXo9JhJjciHGxvs7zWejx2P9Ts,11062
95
- langroid-0.1.101.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
96
- langroid-0.1.101.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
97
- langroid-0.1.101.dist-info/METADATA,sha256=9cQIK4t1NhfNMGh40LO1BTKlkAvCbEEE5aLFE4jt6-Y,38599
98
- langroid-0.1.101.dist-info/RECORD,,
95
+ langroid/vector_store/qdrantdb.py,sha256=YfH0t5nzBBMmwyH0_QndQNnrSfv_3_LFpjlVzcEhbso,11409
96
+ langroid-0.1.102.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
97
+ langroid-0.1.102.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
98
+ langroid-0.1.102.dist-info/METADATA,sha256=zcrmh544o2NXQXuOhUt4YTSf9P6McMB3WQQUxVYkp_g,38599
99
+ langroid-0.1.102.dist-info/RECORD,,