PyPI - bisheng-langchain - Versions diffs - 0.0.1__py3-none-any.whl - Mend

bisheng-langchain 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

bisheng_langchain/__init__.py +0 -0
bisheng_langchain/chains/__init__.py +5 -0
bisheng_langchain/chains/combine_documents/__init__.py +0 -0
bisheng_langchain/chains/combine_documents/stuff.py +56 -0
bisheng_langchain/chains/question_answering/__init__.py +240 -0
bisheng_langchain/chains/retrieval_qa/__init__.py +0 -0
bisheng_langchain/chains/retrieval_qa/base.py +89 -0
bisheng_langchain/chat_models/__init__.py +11 -0
bisheng_langchain/chat_models/host_llm.py +409 -0
bisheng_langchain/chat_models/interface/__init__.py +10 -0
bisheng_langchain/chat_models/interface/minimax.py +123 -0
bisheng_langchain/chat_models/interface/openai.py +68 -0
bisheng_langchain/chat_models/interface/types.py +61 -0
bisheng_langchain/chat_models/interface/utils.py +5 -0
bisheng_langchain/chat_models/interface/wenxin.py +114 -0
bisheng_langchain/chat_models/interface/xunfei.py +233 -0
bisheng_langchain/chat_models/interface/zhipuai.py +81 -0
bisheng_langchain/chat_models/minimax.py +354 -0
bisheng_langchain/chat_models/proxy_llm.py +354 -0
bisheng_langchain/chat_models/wenxin.py +349 -0
bisheng_langchain/chat_models/xunfeiai.py +355 -0
bisheng_langchain/chat_models/zhipuai.py +379 -0
bisheng_langchain/document_loaders/__init__.py +3 -0
bisheng_langchain/document_loaders/elem_html.py +0 -0
bisheng_langchain/document_loaders/elem_image.py +0 -0
bisheng_langchain/document_loaders/elem_pdf.py +655 -0
bisheng_langchain/document_loaders/parsers/__init__.py +5 -0
bisheng_langchain/document_loaders/parsers/image.py +28 -0
bisheng_langchain/document_loaders/parsers/test_image.py +286 -0
bisheng_langchain/embeddings/__init__.py +7 -0
bisheng_langchain/embeddings/host_embedding.py +133 -0
bisheng_langchain/embeddings/interface/__init__.py +3 -0
bisheng_langchain/embeddings/interface/types.py +23 -0
bisheng_langchain/embeddings/interface/wenxin.py +86 -0
bisheng_langchain/embeddings/wenxin.py +139 -0
bisheng_langchain/vectorstores/__init__.py +3 -0
bisheng_langchain/vectorstores/elastic_keywords_search.py +284 -0
bisheng_langchain-0.0.1.dist-info/METADATA +64 -0
bisheng_langchain-0.0.1.dist-info/RECORD +41 -0
bisheng_langchain-0.0.1.dist-info/WHEEL +5 -0
bisheng_langchain-0.0.1.dist-info/top_level.txt +1 -0

bisheng_langchain/vectorstores/elastic_keywords_search.py ADDED Viewed

@@ -0,0 +1,284 @@
+"""Wrapper around Elasticsearch vector database."""
+from __future__ import annotations
+import uuid
+from abc import ABC
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple
+import jieba.analyse
+from langchain.docstore.document import Document
+from langchain.embeddings.base import Embeddings
+from langchain.utils import get_from_dict_or_env
+from langchain.vectorstores.base import VectorStore
+if TYPE_CHECKING:
+    from elasticsearch import Elasticsearch  # noqa: F401
+def _default_text_mapping() -> Dict:
+    return {'properties': {'text': {'type': 'text'}}}
+# ElasticKeywordsSearch is a concrete implementation of the abstract base class
+# VectorStore, which defines a common interface for all vector database
+# implementations. By inheriting from the ABC class, ElasticKeywordsSearch can be
+# defined as an abstract base class itself, allowing the creation of subclasses with
+# their own specific implementations. If you plan to subclass ElasticKeywordsSearch,
+# you can inherit from it and define your own implementation of the necessary methods
+# and attributes.
+class ElasticKeywordsSearch(VectorStore, ABC):
+    """Wrapper around Elasticsearch as a vector database.
+    To connect to an Elasticsearch instance that does not require
+    login credentials, pass the Elasticsearch URL and index name along with the
+    Example:
+        .. code-block:: python
+            from langchain import ElasticKeywordsSearch
+            elastic_vector_search = ElasticKeywordsSearch(
+                elasticsearch_url="http://localhost:9200",
+                index_name="test_index",
+            )
+    To connect to an Elasticsearch instance that requires login credentials,
+    including Elastic Cloud, use the Elasticsearch URL format
+    https://username:password@es_host:9243. For example, to connect to Elastic
+    Cloud, create the Elasticsearch URL with the required authentication details and
+    pass it to the ElasticKeywordsSearch constructor as the named parameter
+    elasticsearch_url.
+    You can obtain your Elastic Cloud URL and login credentials by logging in to the
+    Elastic Cloud console at https://cloud.elastic.co, selecting your deployment, and
+    navigating to the "Deployments" page.
+    To obtain your Elastic Cloud password for the default "elastic" user:
+    1. Log in to the Elastic Cloud console at https://cloud.elastic.co
+    2. Go to "Security" > "Users"
+    3. Locate the "elastic" user and click "Edit"
+    4. Click "Reset password"
+    5. Follow the prompts to reset the password
+    The format for Elastic Cloud URLs is
+    https://username:password@cluster_id.region_id.gcp.cloud.es.io:9243.
+    Example:
+        .. code-block:: python
+            from langchain import ElasticKeywordsSearch
+            elastic_host = "cluster_id.region_id.gcp.cloud.es.io"
+            elasticsearch_url = f"https://username:password@{elastic_host}:9243"
+            elastic_keywords_search = ElasticKeywordsSearch(
+                elasticsearch_url=elasticsearch_url,
+                index_name="test_index"
+            )
+    Args:
+        elasticsearch_url (str): The URL for the Elasticsearch instance.
+        index_name (str): The name of the Elasticsearch index for the keywords.
+    Raises:
+        ValueError: If the elasticsearch python package is not installed.
+    """
+    def __init__(
+        self,
+        elasticsearch_url: str,
+        index_name: str,
+        *,
+        ssl_verify: Optional[Dict[str, Any]] = None,
+    ):
+        """Initialize with necessary components."""
+        try:
+            import elasticsearch
+        except ImportError:
+            raise ImportError(
+                'Could not import elasticsearch python package. '
+                'Please install it with `pip install elasticsearch`.')
+        self.index_name = index_name
+        _ssl_verify = ssl_verify or {}
+        try:
+            self.client = elasticsearch.Elasticsearch(elasticsearch_url,
+                                                      **_ssl_verify)
+        except ValueError as e:
+            raise ValueError(
+                f'Your elasticsearch client string is mis-formatted. Got error: {e} '
+            )
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        refresh_indices: bool = True,
+        **kwargs: Any,
+    ) -> List[str]:
+        """Run more texts through the keywords and add to the vectorstore.
+        Args:
+            texts: Iterable of strings to add to the vectorstore.
+            metadatas: Optional list of metadatas associated with the texts.
+            ids: Optional list of unique IDs.
+            refresh_indices: bool to refresh ElasticSearch indices
+        Returns:
+            List of ids from adding the texts into the vectorstore.
+        """
+        try:
+            from elasticsearch.exceptions import NotFoundError
+            from elasticsearch.helpers import bulk
+        except ImportError:
+            raise ImportError(
+                'Could not import elasticsearch python package. '
+                'Please install it with `pip install elasticsearch`.')
+        requests = []
+        ids = ids or [str(uuid.uuid4()) for _ in texts]
+        mapping = _default_text_mapping()
+        # check to see if the index already exists
+        try:
+            self.client.indices.get(index=self.index_name)
+        except NotFoundError:
+            # TODO would be nice to create index before embedding,
+            # just to save expensive steps for last
+            self.create_index(self.client, self.index_name, mapping)
+        for i, text in enumerate(texts):
+            metadata = metadatas[i] if metadatas else {}
+            request = {
+                '_op_type': 'index',
+                '_index': self.index_name,
+                'text': text,
+                'metadata': metadata,
+                '_id': ids[i],
+            }
+            requests.append(request)
+        bulk(self.client, requests)
+        if refresh_indices:
+            self.client.indices.refresh(index=self.index_name)
+        return ids
+    def similarity_search(self,
+                          query: str,
+                          k: int = 4,
+                          query_strategy: str = 'match_phrase',
+                          must_or_should: str = 'should',
+                          **kwargs: Any) -> List[Document]:
+        assert must_or_should in ['must',
+                                  'should'], 'only support must and should.'
+        keywords = jieba.analyse.extract_tags(query, topK=10, withWeight=False)
+        match_query = {'bool': {must_or_should: []}}
+        for key in keywords:
+            match_query['bool'][must_or_should].append(
+                {query_strategy: {
+                    'text': key
+                }})
+        docs_and_scores = self.similarity_search_with_score(match_query, k)
+        documents = [d[0] for d in docs_and_scores]
+        return documents
+    def similarity_search_with_score(
+            self,
+            query: str,
+            k: int = 4,
+            **kwargs: Any) -> List[Tuple[Document, float]]:
+        response = self.client_search(self.client,
+                                      self.index_name,
+                                      query,
+                                      size=k)
+        hits = [hit for hit in response['hits']['hits']]
+        docs_and_scores = [(
+            Document(
+                page_content=hit['_source']['text'],
+                metadata=hit['_source']['metadata'],
+            ),
+            hit['_score'],
+        ) for hit in hits]
+        return docs_and_scores
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        _: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        ids: Optional[List[str]] = None,
+        index_name: Optional[str] = None,
+        refresh_indices: bool = True,
+        **kwargs: Any,
+    ) -> ElasticKeywordsSearch:
+        """Construct ElasticKeywordsSearch wrapper from raw documents.
+        This is a user-friendly interface that:
+            1. Embeds documents.
+            2. Creates a new index for the embeddings in the Elasticsearch instance.
+            3. Adds the documents to the newly created Elasticsearch index.
+        This is intended to be a quick way to get started.
+        Example:
+            .. code-block:: python
+                from langchain import ElasticKeywordsSearch
+                from langchain.embeddings import OpenAIEmbeddings
+                embeddings = OpenAIEmbeddings()
+                elastic_vector_search = ElasticKeywordsSearch.from_texts(
+                    texts,
+                    embeddings,
+                    elasticsearch_url="http://localhost:9200"
+                )
+        """
+        elasticsearch_url = get_from_dict_or_env(kwargs, 'elasticsearch_url',
+                                                 'ELASTICSEARCH_URL')
+        if 'elasticsearch_url' in kwargs:
+            del kwargs['elasticsearch_url']
+        index_name = index_name or uuid.uuid4().hex
+        vectorsearch = cls(elasticsearch_url, index_name, **kwargs)
+        vectorsearch.add_texts(texts,
+                               metadatas=metadatas,
+                               ids=ids,
+                               refresh_indices=refresh_indices)
+        return vectorsearch
+    def create_index(self, client: Any, index_name: str,
+                     mapping: Dict) -> None:
+        version_num = client.info()['version']['number'][0]
+        version_num = int(version_num)
+        if version_num >= 8:
+            client.indices.create(index=index_name, mappings=mapping)
+        else:
+            client.indices.create(index=index_name, body={'mappings': mapping})
+    def client_search(self, client: Any, index_name: str, script_query: Dict,
+                      size: int) -> Any:
+        version_num = client.info()['version']['number'][0]
+        version_num = int(version_num)
+        if version_num >= 8:
+            response = client.search(index=index_name,
+                                     query=script_query,
+                                     size=size)
+        else:
+            response = client.search(index=index_name,
+                                     body={
+                                         'query': script_query,
+                                         'size': size
+                                     })
+        return response
+    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
+        """Delete by vector IDs.
+        Args:
+            ids: List of ids to delete.
+        """
+        if ids is None:
+            raise ValueError('No ids provided to delete.')
+        # TODO: Check if this can be done in bulk
+        for id in ids:
+            self.client.delete(index=self.index_name, id=id)

bisheng_langchain-0.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,64 @@
+Metadata-Version: 2.1
+Name: bisheng-langchain
+Version: 0.0.1
+Summary: bisheng langchain modules
+Home-page: https://github.com/dataelement/bisheng
+Author: DataElem
+Author-email: contact@dataelem.com
+License: Apache 2.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
+Requires-Dist: langchain
+Requires-Dist: openai
+Requires-Dist: zhipuai
+Requires-Dist: websocket-client
+Requires-Dist: elasticsearch
+## What is bisheng-langchain?
+bisheng-langchain is an open-source langchain extending library built to power building LLM application.
+bisheng-langchain provides more components to support Chinese LLMs and and Chinese based token enviroments for prompt engineering and ICL template.
+The project is a sub-module of [bisheng](https://github.com/dataelement/bisheng).
+## Key features
+- Retrival Enhancement components, like ESIndex, DBIndex, GraphIndex
+- Supporting Open LLMs and embeddings models
+- High performance QAs Chains
+- High Semanticly Chinese token processing
+## Quick start
+### Start with Bisheng Platform.
+We provide a open cloud service for easily use. See [free trial](https://bisheng.dataelem.com/).
+### Install bisheng-langchain
+- Install from pip: `pip install bisheng-langchain`
+- [Quick Start Guide](https://m7a7tqsztt.feishu.cn/wiki/CTXNwpqGKiMs5FkKlPJcylfonuD)
+## Documentation
+For guidance on installation, development, deployment, and administration,
+check out [bisheng-langchain Dev Docs](https://dataelem.feishu.cn/wiki/Xaq8wKQjkiYEHNkXuYLc7JkxnPf).
+## Acknowledgments
+bisheng-langchain adopts dependencies from the following:
+- Thanks to [langchain](https://github.com/langchain-ai/langchain) for the main framework.

bisheng_langchain-0.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,41 @@
+bisheng_langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bisheng_langchain/chains/__init__.py,sha256=uCZGGsjlCJsNkQ3VQH9VpqQ-Ny2Ze3WUmEdy9xsdEVQ,123
+bisheng_langchain/chains/combine_documents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bisheng_langchain/chains/combine_documents/stuff.py,sha256=z_E_wfhJrAYWcNVRPomPm5fGRDI3hqoC52wcMzgzxVA,2369
+bisheng_langchain/chains/question_answering/__init__.py,sha256=RWbSgTQ0IqZhrXkhaJUKzEXurA9NJE7_6P0zLy0IBjs,8636
+bisheng_langchain/chains/retrieval_qa/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bisheng_langchain/chains/retrieval_qa/base.py,sha256=1lP3c2A1q7TZkl0G38q7VQ6KjftNaH14zR4x3vHcAvo,3678
+bisheng_langchain/chat_models/__init__.py,sha256=7b3X2_aULq9mSihlds99Q_FfgIo_X4gejsTRWBcP_7k,399
+bisheng_langchain/chat_models/host_llm.py,sha256=B1g8F9hPGuLPv29KPqvO-gcs2Ak-b-kDFL4nHasxCP4,15246
+bisheng_langchain/chat_models/minimax.py,sha256=2ofLC-fpQIwA2MrqJCYj0v73VgkrQMqzv_5KX0xYnfg,13961
+bisheng_langchain/chat_models/proxy_llm.py,sha256=AdvCOPhEPTux7nVx7geXryHdRF9Bcm3pYKfRRMzF89c,13949
+bisheng_langchain/chat_models/wenxin.py,sha256=D9gnVGFovoQBuLvAo9DaVOjrym2elgbVrcBq3KYSd4A,13791
+bisheng_langchain/chat_models/xunfeiai.py,sha256=Guo0oPl-o-LhhzAqnBDgtXYsl-C7M00id3pWj1CYBKM,14013
+bisheng_langchain/chat_models/zhipuai.py,sha256=O4M2u6cIgO-ERKJsjirdAPEfcmDHHXidxvp5DKuZql8,14939
+bisheng_langchain/chat_models/interface/__init__.py,sha256=KwcZMPSxFiXu6joXoZEgq6THxZeDXA8neZcOuLKBpUk,443
+bisheng_langchain/chat_models/interface/minimax.py,sha256=uEh0lI_wa_sfiiJkDTMyiSbjxMjgl392bH828vleogA,4420
+bisheng_langchain/chat_models/interface/openai.py,sha256=6h25vgJWV79tHelJfbKsrbgjNrRq9DYZ30aOxlGIQN4,2078
+bisheng_langchain/chat_models/interface/types.py,sha256=SUCcDTnHeuLEQtTGZW6QCIRDuCTPIbddvV7XEQTI8qI,1134
+bisheng_langchain/chat_models/interface/utils.py,sha256=qww_uYsWDqK7cLuv-KzZmmlg9SZAHOi4R_6I6S4XLIk,65
+bisheng_langchain/chat_models/interface/wenxin.py,sha256=2r5sBjym-fihWgHT8v7LZ0374p3umQtkczLFFSB9Z5s,3998
+bisheng_langchain/chat_models/interface/xunfei.py,sha256=UvOCBzUcXBCSzITCsCJ04_pRcAu31iv-fgcPAiF-UAE,7049
+bisheng_langchain/chat_models/interface/zhipuai.py,sha256=A9yST0a_qCPs5IP67bICL4nhbHaWIzvLiSiuzpCdPGY,2633
+bisheng_langchain/document_loaders/__init__.py,sha256=nxi44U76yvkY7OmwLGRi4Xjz-COPkCdQmmviGAyLADE,81
+bisheng_langchain/document_loaders/elem_html.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bisheng_langchain/document_loaders/elem_image.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bisheng_langchain/document_loaders/elem_pdf.py,sha256=64kUITkrTVJe9CH6IAVSdDVcn2Ekx2PM-jT0cdClXlo,22716
+bisheng_langchain/document_loaders/parsers/__init__.py,sha256=nou6EOshbI3PQ8OgU3Ni2rjmjsZSZXvBVs-DB9tYsCw,67
+bisheng_langchain/document_loaders/parsers/image.py,sha256=7Vx4dD_WiSTojS4TMIJFxfE8nvze0kwNnwTd6f1cLds,938
+bisheng_langchain/document_loaders/parsers/test_image.py,sha256=EJHozq5oFfLBlLL5Lr6XFkrkvSttPpohprs9OjDzAKM,8685
+bisheng_langchain/embeddings/__init__.py,sha256=QMsnkSfVLW70OdplI6X5N38cKXTPC7yP-x1i2A4crfI,234
+bisheng_langchain/embeddings/host_embedding.py,sha256=_a_ggQoxSRgPv3aKZTXIzjsYAwmtPt3AJZ-oEQgcz_A,4422
+bisheng_langchain/embeddings/wenxin.py,sha256=8kYqWuHydx5Cylb_Lmdti0YLHrOM1Qha3eMuVIPitOk,4828
+bisheng_langchain/embeddings/interface/__init__.py,sha256=GNY3tibpRxpAdAfSvQmXBKo0xKSLke_9y4clofi_WOE,98
+bisheng_langchain/embeddings/interface/types.py,sha256=VdurbtsnjCPdlOjPFcK2Mg6r9bJYYHb3tepvkk-y3nM,461
+bisheng_langchain/embeddings/interface/wenxin.py,sha256=5d9gI4enmfkD80s0FHKiDt33O0mwM8Xc5WTubnMUy8c,3104
+bisheng_langchain/vectorstores/__init__.py,sha256=K3xQouSGl05Q0ehFCKZafip-35NzCrv8SCANvfxDpKE,96
+bisheng_langchain/vectorstores/elastic_keywords_search.py,sha256=MrNrY1YdSmqKVpWt1y6VKD3Daw9ZummWx_iOtfUk-TE,10922
+bisheng_langchain-0.0.1.dist-info/METADATA,sha256=wQ8dM4fN0tClJUtUmrBLnKVn_rag4aCG5mjmfyZtDUA,2036
+bisheng_langchain-0.0.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+bisheng_langchain-0.0.1.dist-info/top_level.txt,sha256=Z6pPNyCo4ihyr9iqGQbH8sJiC4dAUwA_mAyGRQB5_Fs,18
+bisheng_langchain-0.0.1.dist-info/RECORD,,

bisheng_langchain-0.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.37.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

bisheng_langchain-0.0.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ bisheng_langchain