beekeeper-ai 0.6.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. beekeeper/_bundle/__init__.py +0 -0
  2. beekeeper_ai-1.0.0.dist-info/METADATA +41 -0
  3. beekeeper_ai-1.0.0.dist-info/RECORD +5 -0
  4. {beekeeper_ai-0.6.5.dist-info → beekeeper_ai-1.0.0.dist-info}/WHEEL +1 -1
  5. beekeeper_ai-1.0.0.dist-info/licenses/LICENSE +176 -0
  6. beekeeper/__init__.py +0 -1
  7. beekeeper/core/document/__init__.py +0 -6
  8. beekeeper/core/document/schema.py +0 -97
  9. beekeeper/core/document_loaders/__init__.py +0 -5
  10. beekeeper/core/document_loaders/base.py +0 -24
  11. beekeeper/core/embeddings/__init__.py +0 -6
  12. beekeeper/core/embeddings/base.py +0 -44
  13. beekeeper/core/text_splitters/utils.py +0 -142
  14. beekeeper/core/utils/pairwise.py +0 -20
  15. beekeeper/document_loaders/__init__.py +0 -17
  16. beekeeper/document_loaders/directory.py +0 -65
  17. beekeeper/document_loaders/docx.py +0 -31
  18. beekeeper/document_loaders/html.py +0 -77
  19. beekeeper/document_loaders/json.py +0 -53
  20. beekeeper/document_loaders/pdf.py +0 -38
  21. beekeeper/document_loaders/s3.py +0 -72
  22. beekeeper/document_loaders/watson_discovery.py +0 -121
  23. beekeeper/embeddings/__init__.py +0 -7
  24. beekeeper/embeddings/huggingface.py +0 -66
  25. beekeeper/embeddings/watsonx.py +0 -100
  26. beekeeper/evaluation/__init__.py +0 -5
  27. beekeeper/evaluation/knowledge_base_coverage.py +0 -62
  28. beekeeper/monitor/__init__.py +0 -11
  29. beekeeper/monitor/watsonx.py +0 -843
  30. beekeeper/retrievers/__init__.py +0 -5
  31. beekeeper/retrievers/watson_discovery.py +0 -121
  32. beekeeper/text_splitters/__init__.py +0 -9
  33. beekeeper/text_splitters/semantic.py +0 -139
  34. beekeeper/text_splitters/sentence.py +0 -107
  35. beekeeper/text_splitters/token.py +0 -101
  36. beekeeper/vector_stores/__init__.py +0 -7
  37. beekeeper/vector_stores/chroma.py +0 -115
  38. beekeeper/vector_stores/elasticsearch.py +0 -183
  39. beekeeper_ai-0.6.5.dist-info/LICENSE +0 -7
  40. beekeeper_ai-0.6.5.dist-info/METADATA +0 -49
  41. beekeeper_ai-0.6.5.dist-info/RECORD +0 -37
@@ -1,31 +0,0 @@
1
- import os
2
- from pathlib import Path
3
- from typing import List, Optional
4
-
5
- from beekeeper.core.document import Document
6
- from beekeeper.core.document_loaders import BaseLoader
7
-
8
-
9
- class DocxLoader(BaseLoader):
10
- """Microsoft Word (Docx) loader."""
11
-
12
- def load_data(self, input_file: str, extra_info: Optional[dict] = None) -> List[Document]:
13
- """Loads data from the specified directory.
14
-
15
- Args:
16
- input_file (str): File path to load.
17
- """
18
- try:
19
- import docx2txt # noqa: F401
20
- except ImportError:
21
- raise ImportError("docx2txt package not found, please install it with `pip install docx2txt`")
22
-
23
- if not os.path.isfile(input_file):
24
- raise ValueError(f"File `{input_file}` does not exist")
25
-
26
- input_file = str(Path(input_file).resolve())
27
-
28
- text = docx2txt.process(input_file)
29
- metadata = {"source": input_file}
30
-
31
- return [Document(text=text, metadata=metadata)]
@@ -1,77 +0,0 @@
1
- import os
2
- from pathlib import Path
3
- from typing import List, Optional
4
-
5
- from beekeeper.core.document import Document
6
- from beekeeper.core.document_loaders import BaseLoader
7
-
8
-
9
- class HTMLLoader(BaseLoader):
10
- """Load HTML file and extract text from a specific tag.
11
-
12
- Args:
13
- tag (str): HTML tag to extract. Defaults to ``section``.
14
- """
15
-
16
- tag: str = "section"
17
-
18
- def load_data(self, input_file: str, extra_info: Optional[dict] = None) -> List[Document]:
19
- """Loads data from the specified directory.
20
-
21
- Args:
22
- input_file (str): File path to load.
23
- """
24
- try:
25
- from bs4 import BeautifulSoup # noqa: F401
26
- except ImportError:
27
- raise ImportError("beautifulsoup4 package not found, please install it with `pip install beautifulsoup4`")
28
-
29
- if not os.path.isfile(input_file):
30
- raise ValueError(f"File `{input_file}` does not exist")
31
-
32
- input_file = str(Path(input_file).resolve())
33
-
34
- with open(input_file, encoding="utf-8") as html_file:
35
- soup = BeautifulSoup(html_file, "html.parser")
36
-
37
- tags = soup.find_all(self.tag)
38
- documents = []
39
-
40
- for tag in tags:
41
- tag_text = self._extract_text_from_tag(tag)
42
-
43
- metadata = {
44
- "tag": self.tag,
45
- "source": input_file,
46
- }
47
-
48
- doc = Document(
49
- text=tag_text,
50
- metadata=metadata,
51
- )
52
-
53
- documents.append(doc)
54
-
55
- return documents
56
-
57
- def _extract_text_from_tag(self, tag) -> str:
58
- """Extract the text from an HTML tag, ignoring other nested tags."""
59
- try:
60
- from bs4 import NavigableString # noqa: F401
61
- except ImportError:
62
- raise ImportError("beautifulsoup4 package not found, please install it with `pip install beautifulsoup4`")
63
-
64
- texts = []
65
-
66
- for elem in tag.children:
67
- # Check if the element is a text node, not a tag
68
- if isinstance(elem, NavigableString):
69
- if elem.strip():
70
- texts.append(elem.strip())
71
- # Ignore any tag that matches the main tag being processed (to avoid recursion)
72
- elif elem.name == self.tag:
73
- continue
74
- else:
75
- texts.append(elem.get_text().strip())
76
-
77
- return "\n".join(texts)
@@ -1,53 +0,0 @@
1
- import json
2
- import os
3
- from pathlib import Path
4
- from typing import List, Optional
5
-
6
- from beekeeper.core.document import Document
7
- from beekeeper.core.document_loaders import BaseLoader
8
-
9
-
10
- class JSONLoader(BaseLoader):
11
- """JSON loader.
12
-
13
- Args:
14
- jq_schema (str, optional): jq schema to use to extract the data from the JSON.
15
- """
16
-
17
- jq_schema: Optional[str] = None
18
-
19
- def load_data(self, input_file: str, extra_info: Optional[dict] = None) -> List[Document]:
20
- """Loads data from the specified directory.
21
-
22
- Args:
23
- input_file (str): File path to load.
24
- """
25
- try:
26
- import jq # noqa: F401
27
- except ImportError:
28
- raise ImportError("jq package not found, please install it with `pip install jq`")
29
-
30
- if not os.path.isfile(input_file):
31
- raise ValueError(f"File `{input_file}` does not exist")
32
-
33
- documents = []
34
- jq_compiler = jq.compile(self.jq_schema)
35
- json_file = Path(input_file).resolve().read_text(encoding="utf-8")
36
- json_data = jq_compiler.input(json.loads(json_file))
37
-
38
-
39
- for content in json_data:
40
-
41
- if isinstance(content, str):
42
- content = content
43
- elif isinstance(content, dict):
44
- content = json.dumps(content) if content else ""
45
- else:
46
- content = str(content) if content is not None else ""
47
-
48
- if content.strip() != "":
49
- documents.append(Document(
50
- text=content,
51
- metadata={"source": str(Path(input_file).resolve())}))
52
-
53
- return documents
@@ -1,38 +0,0 @@
1
- import logging
2
- import os
3
- from pathlib import Path
4
- from typing import List, Optional
5
-
6
- from beekeeper.core.document import Document
7
- from beekeeper.core.document_loaders import BaseLoader
8
-
9
- logging.getLogger("pypdf").setLevel(logging.ERROR)
10
-
11
- class PDFLoader(BaseLoader):
12
- """PDF loader using PyPDF."""
13
-
14
- def load_data(self, input_file: str, extra_info: Optional[dict] = None) -> List[Document]:
15
- """Loads data from the specified directory.
16
-
17
- Args:
18
- input_file (str): File path to load.
19
- """
20
- try:
21
- import pypdf # noqa: F401
22
-
23
- except ImportError:
24
- raise ImportError("pypdf package not found, please install it with `pip install pypdf`")
25
-
26
- if not os.path.isfile(input_file):
27
- raise ValueError(f"File `{input_file}` does not exist")
28
-
29
- input_file = str(Path(input_file).resolve())
30
- pdf_loader = pypdf.PdfReader(input_file)
31
-
32
- return [
33
- Document(
34
- text=page.extract_text().strip(),
35
- metadata={"source": input_file, "page": page_number}
36
- )
37
- for page_number, page in enumerate(pdf_loader.pages)
38
- ]
@@ -1,72 +0,0 @@
1
- import os
2
- import re
3
- import tempfile
4
- from typing import List, Optional
5
-
6
- from beekeeper.core.document import Document
7
- from beekeeper.core.document_loaders import BaseLoader
8
- from beekeeper.document_loaders import DirectoryLoader
9
-
10
-
11
- class S3Loader(BaseLoader):
12
- """S3 bucket loader.
13
-
14
- Args:
15
- bucket (str): Name of the S3 bucket.
16
- ibm_api_key_id (str): IBM Cloud API key.
17
- ibm_service_instance_id (str): Service instance ID for the IBM COS.
18
- s3_endpoint_url (str): Endpoint URL for the S3 service.
19
-
20
- **Example**
21
-
22
- .. code-block:: python
23
-
24
- from beekeeper.document_loaders import S3Loader
25
-
26
- loader = S3Loader(bucket="your_bucket",
27
- ibm_api_key_id="your_api_key",
28
- ibm_service_instance_id="your_instance_id",
29
- s3_endpoint_url="your_api_url")
30
- """
31
-
32
- def __init__(self, bucket: str,
33
- ibm_api_key_id: str = None,
34
- ibm_service_instance_id: str = None,
35
- s3_endpoint_url: str = None
36
- ):
37
-
38
- try:
39
- import ibm_boto3
40
- from ibm_botocore.client import Config
41
-
42
- self._ibm_boto3 = ibm_boto3
43
- self._boto_config = Config
44
- except ImportError:
45
- raise ImportError("ibm-cos-sdk package not found, please install it with `pip install ibm-cos-sdk`")
46
-
47
- self.bucket = bucket
48
- self.ibm_api_key_id = ibm_api_key_id
49
- self.ibm_service_instance_id = ibm_service_instance_id
50
- self.s3_endpoint_url = s3_endpoint_url
51
-
52
- def load_data(self, extra_info: Optional[dict] = None) -> List[Document]:
53
- """Loads data from the specified S3 bucket."""
54
- ibm_s3 = self._ibm_boto3.resource(
55
- "s3",
56
- ibm_api_key_id=self.ibm_api_key_id,
57
- ibm_service_instance_id=self.ibm_service_instance_id,
58
- config=self._boto_config(signature_version="oauth"),
59
- endpoint_url=self.s3_endpoint_url,
60
- )
61
-
62
- bucket = ibm_s3.Bucket(self.bucket)
63
-
64
- with tempfile.TemporaryDirectory() as temp_dir:
65
- for obj in bucket.objects.filter(Prefix=""):
66
- file_path = f"{temp_dir}/{obj.key}"
67
- os.makedirs(os.path.dirname(file_path), exist_ok=True)
68
- ibm_s3.meta.client.download_file(self.bucket, obj.key, file_path)
69
-
70
- s3_source = re.sub(r"^(https?)://", "", self.s3_endpoint_url)
71
-
72
- return DirectoryLoader(input_dir=temp_dir).load_data(extra_info={"source": f"{s3_source}/{self.bucket}"})
@@ -1,121 +0,0 @@
1
- from datetime import datetime
2
- from logging import getLogger
3
- from typing import List, Optional
4
-
5
- from beekeeper.core.document import Document
6
- from beekeeper.core.document_loaders import BaseLoader
7
-
8
- logger = getLogger(__name__)
9
-
10
-
11
- class WatsonDiscoveryLoader(BaseLoader):
12
- """Provides functionality to load documents from IBM Watson Discovery.
13
-
14
- See https://cloud.ibm.com/docs/discovery-data?topic=discovery-data-getting-started for more info.
15
-
16
- Args:
17
- url (str): Watson Discovery instance url.
18
- api_key (str): Watson Discovery API key.
19
- project_id (str): Watson Discovery project_id.
20
- version (str, optional): Watson Discovery API version. Defaults to ``2023-03-31``.
21
- batch_size (int, optional): Batch size for bulk operations. Defaults to ``50``.
22
- created_date (str, optional): Load documents created after the date. Expected format ``YYYY-MM-DD``. Defaults to ``datetime.today()``.
23
- pre_additional_data_field (str, optional): Additional data field to be added to the beginning of the Document content. Defaults to ``None``.
24
-
25
- **Example**
26
-
27
- .. code-block:: python
28
-
29
- from beekeeper.document_loaders import WatsonDiscoveryLoader
30
-
31
- doc_loader = WatsonDiscoveryLoader(url="your_url",
32
- api_key="your_api_key",
33
- project_id="your_project_id")
34
- """
35
-
36
- def __init__(self,
37
- url: str,
38
- api_key: str,
39
- project_id: str,
40
- version: str = "2023-03-31",
41
- batch_size: int = 50,
42
- created_date: str = datetime.today().strftime("%Y-%m-%d"),
43
- pre_additional_data_field: str = None
44
- ) -> None:
45
- try:
46
- from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
47
- from ibm_watson import DiscoveryV2
48
-
49
- except ImportError:
50
- raise ImportError("ibm-watson package not found, please install it with `pip install ibm-watson`")
51
-
52
- self.project_id = project_id
53
- self.batch_size = batch_size
54
- self.created_date = created_date
55
- self.pre_additional_data_field = pre_additional_data_field
56
-
57
- try:
58
- authenticator = IAMAuthenticator(api_key)
59
- self._client = DiscoveryV2(authenticator=authenticator,
60
- version=version)
61
-
62
- self._client.set_service_url(url)
63
- except Exception as e:
64
- logger.error(f"Error connecting to IBM Watson Discovery: {e}")
65
- raise
66
-
67
- def load_data(self, extra_info: Optional[dict] = None) -> List[Document]:
68
- """Loads documents from the Watson Discovery.
69
-
70
- **Example**
71
-
72
- .. code-block:: python
73
-
74
- docs = doc_loader.load_data()
75
- """
76
- from ibm_watson.discovery_v2 import QueryLargePassages
77
- last_batch_size = self.batch_size
78
- offset_len = 0
79
- documents = []
80
- return_fields = ["extracted_metadata.filename", "extracted_metadata.file_type", "text"]
81
-
82
- if self.pre_additional_data_field:
83
- return_fields.append(self.pre_additional_data_field)
84
-
85
- while last_batch_size == self.batch_size:
86
- results = self._client.query(
87
- project_id=self.project_id,
88
- count=self.batch_size,
89
- offset=offset_len,
90
- return_=return_fields,
91
- filter="extracted_metadata.publicationdate>={}".format(self.created_date),
92
- passages=QueryLargePassages(enabled=False)).get_result()
93
-
94
- last_batch_size = len(results["results"])
95
- offset_len = offset_len + last_batch_size
96
-
97
- # Make sure all retrieved document 'text' exist
98
- results_documents = [doc for doc in results["results"] if "text" in doc]
99
-
100
- if self.pre_additional_data_field:
101
- for i, doc in enumerate(results_documents):
102
- doc["text"].insert(0, self._get_nested_value(doc, self.pre_additional_data_field))
103
-
104
- documents.extend([Document(doc_id=doc["document_id"],
105
- text="\n".join(doc["text"]),
106
- metadata={"collection_id": doc["result_metadata"]["collection_id"]} | doc[
107
- "extracted_metadata"])
108
- for doc in results_documents])
109
-
110
- return documents
111
-
112
- @staticmethod
113
- def _get_nested_value(d, key_path, separator: Optional[str] = "."):
114
- """Accesses a nested value in a dictionary using a string key path."""
115
- keys = key_path.split(separator) # Split the key_path using the separator
116
- nested_value = d
117
-
118
- for key in keys:
119
- nested_value = nested_value[key] # Traverse the dictionary by each key
120
-
121
- return nested_value
@@ -1,7 +0,0 @@
1
- from beekeeper.embeddings.huggingface import HuggingFaceEmbedding
2
- from beekeeper.embeddings.watsonx import WatsonxEmbedding
3
-
4
- __all__ = [
5
- "HuggingFaceEmbedding",
6
- "WatsonxEmbedding"
7
- ]
@@ -1,66 +0,0 @@
1
- from typing import Any, List, Literal
2
-
3
- from pydantic.v1 import BaseModel, PrivateAttr
4
-
5
- from beekeeper.core.document import Document
6
- from beekeeper.core.embeddings import BaseEmbedding, Embedding
7
-
8
-
9
- class HuggingFaceEmbedding(BaseModel, BaseEmbedding):
10
- """HuggingFace sentence_transformers embedding models.
11
-
12
- Args:
13
- model_name (str): Hugging Face model to be used. Defaults to ``sentence-transformers/all-MiniLM-L6-v2``.
14
- device (str, optional): Device to run the model on. Currently supports "cpu" and "cuda". Defaults to ``cpu``.
15
-
16
- **Example**
17
-
18
- .. code-block:: python
19
-
20
- from beekeeper.embeddings import HuggingFaceEmbedding
21
-
22
- embedding = HuggingFaceEmbedding()
23
- """
24
-
25
- model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
26
- device: Literal["cpu", "cuda"] = "cpu"
27
-
28
- _client: Any = PrivateAttr()
29
-
30
- def __init__(self, **kwargs: Any) -> None:
31
- super().__init__(**kwargs)
32
- from sentence_transformers import SentenceTransformer
33
-
34
- self._client = SentenceTransformer(self.model_name, device=self.device)
35
-
36
- def get_query_embedding(self, query: str) -> Embedding:
37
- """Compute embedding for a text.
38
-
39
- Args:
40
- query (str): Input query to compute embedding.
41
-
42
- **Example**
43
-
44
- .. code-block:: python
45
-
46
- embedded_query = embedding.get_query_embedding("Beekeeper is a data framework to load any data in one line of code and connect with AI applications.")
47
- """
48
- return self.get_texts_embedding([query])[0]
49
-
50
- def get_texts_embedding(self, texts: List[str]) -> List[Embedding]:
51
- """Compute embeddings for list of texts.
52
-
53
- Args:
54
- texts (List[str]): List of text to compute embeddings.
55
- """
56
- return self._client.encode(texts).tolist()
57
-
58
- def get_documents_embedding(self, documents: List[Document]) -> List[Embedding]:
59
- """Compute embeddings for a list of documents.
60
-
61
- Args:
62
- documents (List[Document]): List of `Document` objects to compute embeddings.
63
- """
64
- texts = [document.get_content() for document in documents]
65
-
66
- return self.get_texts_embedding(texts)
@@ -1,100 +0,0 @@
1
- from typing import Any, List, Optional
2
-
3
- from pydantic.v1 import BaseModel, PrivateAttr
4
-
5
- from beekeeper.core.document import Document
6
- from beekeeper.core.embeddings import BaseEmbedding, Embedding
7
-
8
-
9
- class WatsonxEmbedding(BaseModel, BaseEmbedding):
10
- """IBM watsonx embedding models.
11
-
12
- Note:
13
- One of these parameters is required: ``project_id`` or ``space_id``. Not both.
14
-
15
- See https://cloud.ibm.com/apidocs/watsonx-ai#endpoint-url for the watsonx.ai API endpoints.
16
-
17
- Args:
18
- model_name (str): IBM watsonx.ai model to be used. Defaults to ``ibm/slate-30m-english-rtrvr``.
19
- api_key (str): watsonx API key.
20
- url (str): watsonx instance url.
21
- truncate_input_tokens (str): Maximum number of input tokens accepted. Defaults to ``512``
22
- project_id (str, optional): watsonx project_id.
23
- space_id (str, optional): watsonx space_id.
24
-
25
- **Example**
26
-
27
- .. code-block:: python
28
-
29
- from beekeeper.embeddings import WatsonxEmbedding
30
-
31
- watsonx_embedding = WatsonxEmbedding(api_key="your_api_key",
32
- url="your_instance_url",
33
- project_id="your_project_id")
34
- """
35
-
36
- model_name: str = "ibm/slate-30m-english-rtrvr"
37
- api_key: str
38
- url: str
39
- truncate_input_tokens: int = 512
40
- project_id: Optional[str] = None
41
- space_id: Optional[str] = None
42
-
43
- _client: Any = PrivateAttr()
44
-
45
- def __init__(self, **kwargs: Any) -> None:
46
- super().__init__(**kwargs)
47
- try:
48
- from ibm_watsonx_ai import Credentials
49
- from ibm_watsonx_ai.foundation_models import Embeddings as WatsonxEmbeddings
50
-
51
- except ImportError:
52
- raise ImportError("ibm-watsonx-ai package not found, please install it with `pip install ibm-watsonx-ai`")
53
-
54
- if (not (self.project_id or self.space_id)) or (self.project_id and self.space_id):
55
- raise ValueError("Must provide one of these parameters [`project_id`, `space_id`], not both.")
56
-
57
- kwargs_params = {
58
- "model_id": self.model_name,
59
- "params": {"truncate_input_tokens": self.truncate_input_tokens, "return_options": {"input_text": False}},
60
- "credentials": Credentials(api_key=self.api_key, url=self.url)
61
- }
62
-
63
- if self.project_id:
64
- kwargs_params["project_id"] = self.project_id
65
- else:
66
- kwargs_params["space_id"] = self.space_id
67
-
68
- self._client = WatsonxEmbeddings(**kwargs_params)
69
-
70
- def get_query_embedding(self, query: str) -> Embedding:
71
- """Compute embedding for a text.
72
-
73
- Args:
74
- query (str): Input query to compute embedding.
75
-
76
- **Example**
77
-
78
- .. code-block:: python
79
-
80
- embedded_query = watsonx_embedding.get_query_embedding("Beekeeper is a data framework to load any data in one line of code and connect with AI applications.")
81
- """
82
- return self.get_texts_embedding([query])[0]
83
-
84
- def get_texts_embedding(self, texts: List[str]) -> List[Embedding]:
85
- """Compute embeddings for list of texts.
86
-
87
- Args:
88
- texts (List[str]): List of text to compute embeddings.
89
- """
90
- return self._client.embed_documents(texts)
91
-
92
- def get_documents_embedding(self, documents: List[Document]) -> List[Embedding]:
93
- """Compute embeddings for a list of documents.
94
-
95
- Args:
96
- documents (List[Document]): List of `Document` objects to compute embeddings.
97
- """
98
- texts = [document.get_content() for document in documents]
99
-
100
- return self.get_texts_embedding(texts)
@@ -1,5 +0,0 @@
1
- from beekeeper.evaluation.knowledge_base_coverage import KnowledgeBaseCoverage
2
-
3
- __all__ = [
4
- "KnowledgeBaseCoverage",
5
- ]
@@ -1,62 +0,0 @@
1
- from typing import Dict, List, Literal
2
-
3
- import numpy as np
4
- from pydantic.v1 import BaseModel
5
-
6
- from beekeeper.core.embeddings import BaseEmbedding
7
-
8
-
9
- class KnowledgeBaseCoverage(BaseModel):
10
- """Measures how much the knowledge base (context) has contributed to the answer’s coverage.
11
- Higher value suggests greater proportion of context are in LLM response.
12
-
13
- Args:
14
- embed_model (BaseEmbedding):
15
- similarity_mode (str, optional): Similarity strategy. Currently supports "cosine", "dot_product" and "euclidean". Defaults to ``cosine``.
16
- similarity_threshold (float, optional): Embedding similarity threshold for "passing". Defaults to ``0.8``.
17
-
18
- **Example**
19
-
20
- .. code-block:: python
21
-
22
- from beekeeper.embeddings import HuggingFaceEmbedding
23
- from beekeeper.evaluation import KnowledgeBaseCoverage
24
-
25
- embedding = HuggingFaceEmbedding()
26
- eval_coverage = KnowledgeBaseCoverage(embed_model=embedding)
27
- """
28
-
29
- embed_model: BaseEmbedding
30
- similarity_mode: Literal["cosine", "dot_product", "euclidean"] = "cosine"
31
- similarity_threshold: float = 0.8
32
-
33
- class Config:
34
- arbitrary_types_allowed = True
35
-
36
- def compute_metric(self, contexts: List[str], candidate: str) -> Dict:
37
- """
38
- Args:
39
- contexts (List[str]): List text used as LLM context.
40
- candidate (str): LLM response based on given context.
41
-
42
- **Example**
43
-
44
- .. code-block:: python
45
-
46
- context_coverage = eval_coverage.compute_metric(context=[], candidate="<candidate>")
47
- """
48
- if not contexts or not candidate:
49
- raise ValueError("Must provide these parameters [`contexts`, `candidate`]")
50
-
51
- coverage = {"contexts_score": [], "score": 0}
52
- candidate_embedding = self.embed_model.get_query_embedding(candidate)
53
-
54
- for context in contexts:
55
- context_embedding = self.embed_model.get_query_embedding(context)
56
- coverage["contexts_score"].append(
57
- self.embed_model.similarity(candidate_embedding, context_embedding, mode=self.similarity_mode))
58
-
59
- coverage["score"] = np.mean(coverage["contexts_score"])
60
- coverage["passing"] = coverage["score"] >= self.similarity_threshold
61
-
62
- return coverage
@@ -1,11 +0,0 @@
1
- from beekeeper.monitor.watsonx import (
2
- CloudPakforDataCredentials,
3
- WatsonxExternalPromptMonitoring,
4
- WatsonxPromptMonitoring,
5
- )
6
-
7
- __all__ = [
8
- "CloudPakforDataCredentials",
9
- "WatsonxExternalPromptMonitoring",
10
- "WatsonxPromptMonitoring"
11
- ]