pembot 0.0.9__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- pembot/TextEmbedder/mongodb_embedder.py +21 -14
- pembot/__init__.py +1 -1
- pembot/config/config.yaml +1 -1
- pembot/query.py +8 -1
- {pembot-0.0.9.dist-info → pembot-0.1.0.dist-info}/METADATA +1 -1
- {pembot-0.0.9.dist-info → pembot-0.1.0.dist-info}/RECORD +8 -8
- {pembot-0.0.9.dist-info → pembot-0.1.0.dist-info}/WHEEL +0 -0
- {pembot-0.0.9.dist-info → pembot-0.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -69,22 +69,27 @@ def search_within_document(
|
|
|
69
69
|
if document_belongs_to_a_type:
|
|
70
70
|
project_dict['type']= 1
|
|
71
71
|
|
|
72
|
+
vectorSearchParams= {
|
|
73
|
+
'queryVector': aggregate_query_embedding,
|
|
74
|
+
'path': 'embedding',
|
|
75
|
+
|
|
76
|
+
#number of nearest neighbors to consider
|
|
77
|
+
'numCandidates': 100,
|
|
78
|
+
'limit': limit,
|
|
79
|
+
'index': index_name,
|
|
80
|
+
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
#filter (if a type or docid filter is given) to search only within the specified documents search space
|
|
84
|
+
if document_name_id:
|
|
85
|
+
vectorSearchParams['filter']= (
|
|
86
|
+
{ "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
|
|
87
|
+
{ 'docId': document_name_id }
|
|
88
|
+
)
|
|
89
|
+
|
|
72
90
|
pipeline = [
|
|
73
91
|
{
|
|
74
|
-
'$vectorSearch':
|
|
75
|
-
'queryVector': aggregate_query_embedding,
|
|
76
|
-
'path': 'embedding',
|
|
77
|
-
|
|
78
|
-
#number of nearest neighbors to consider
|
|
79
|
-
'numCandidates': 100,
|
|
80
|
-
'limit': limit,
|
|
81
|
-
'index': index_name,
|
|
82
|
-
|
|
83
|
-
#filter to search only within the specified document
|
|
84
|
-
'filter':
|
|
85
|
-
{ "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
|
|
86
|
-
{ 'docId': document_name_id }
|
|
87
|
-
}
|
|
92
|
+
'$vectorSearch': vectorSearchParams
|
|
88
93
|
},
|
|
89
94
|
|
|
90
95
|
# to exclude the MongoDB internal _id
|
|
@@ -93,6 +98,8 @@ def search_within_document(
|
|
|
93
98
|
}
|
|
94
99
|
]
|
|
95
100
|
|
|
101
|
+
|
|
102
|
+
|
|
96
103
|
# print("sesraching now:")
|
|
97
104
|
results = list(embeddings_collection.aggregate(pipeline))
|
|
98
105
|
# print("search results: ", results)
|
pembot/__init__.py
CHANGED
pembot/config/config.yaml
CHANGED
pembot/query.py
CHANGED
|
@@ -82,7 +82,13 @@ def multi_embedding_average(llm_client, inference_client, descriptions, model= "
|
|
|
82
82
|
|
|
83
83
|
|
|
84
84
|
|
|
85
|
-
def rag_query_llm(db_client, llm_client, inference_client,
|
|
85
|
+
def rag_query_llm(db_client, llm_client, inference_client,
|
|
86
|
+
user_query: str, document_id: str, required_fields_descriptions: list[str],
|
|
87
|
+
model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
|
|
88
|
+
ollama_base_url: str = "http://localhost:11434", no_of_fields= 4,
|
|
89
|
+
embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita",
|
|
90
|
+
index_name: str= "test_search", embeddings_collection= "doc_chunks",
|
|
91
|
+
document_belongs_to_a_type= "", prompt_prefix= ""):
|
|
86
92
|
"""
|
|
87
93
|
Performs a RAG (Retrieval Augmented Generation) query using a Hugging Face
|
|
88
94
|
embedding model, ChromaDB for retrieval, and a local Ollama model for generation.
|
|
@@ -139,6 +145,7 @@ def rag_query_llm(db_client, llm_client, inference_client, user_query: str, docu
|
|
|
139
145
|
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
|
140
146
|
|
|
141
147
|
Context:
|
|
148
|
+
{prompt_prefix}
|
|
142
149
|
{context}
|
|
143
150
|
|
|
144
151
|
Question: {user_query}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
|
|
2
2
|
pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
pembot/__init__.py,sha256=
|
|
3
|
+
pembot/__init__.py,sha256=oq-JydA8x2prvUKvfrjXWp-GeFLh_qZPesfmB4ad6HE,211
|
|
4
4
|
pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
|
|
5
5
|
pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
|
|
6
6
|
pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
|
|
7
7
|
pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
|
|
8
|
-
pembot/query.py,sha256=
|
|
8
|
+
pembot/query.py,sha256=0RBnLDBz8pctbsw9GA2UHG-uZPippKYjBBfBBKe2Oec,8705
|
|
9
9
|
pembot/requirements.txt,sha256=vnjZ3uO4Dd8qPFDq1F2qGromJ5jrEBfBeiDwz7rKijk,1360
|
|
10
10
|
pembot/.git/COMMIT_EDITMSG,sha256=iRLy0abINakd4FBRWJuD-QQ_WWbXZusnr_9e3wo4d90,38
|
|
11
11
|
pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
|
@@ -124,10 +124,10 @@ pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
124
124
|
pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
|
|
125
125
|
pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
126
126
|
pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
|
|
127
|
-
pembot/TextEmbedder/mongodb_embedder.py,sha256=
|
|
127
|
+
pembot/TextEmbedder/mongodb_embedder.py,sha256=i5M56jaC2t3yUinqlXfC70GaTos4kSR_1LGsbljpEhU,10762
|
|
128
128
|
pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
|
|
129
129
|
pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
|
|
130
|
-
pembot/config/config.yaml,sha256
|
|
130
|
+
pembot/config/config.yaml,sha256=-6niASpCQzNynkHSTHWi3MawUWUHpOVuNg0Fhmun30M,156
|
|
131
131
|
pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
|
|
132
132
|
pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
|
|
133
133
|
pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -183,7 +183,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
|
|
|
183
183
|
pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
184
|
pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
|
|
185
185
|
pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
|
|
186
|
-
pembot-0.0.
|
|
187
|
-
pembot-0.0.
|
|
188
|
-
pembot-0.0.
|
|
189
|
-
pembot-0.0.
|
|
186
|
+
pembot-0.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
187
|
+
pembot-0.1.0.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
|
|
188
|
+
pembot-0.1.0.dist-info/METADATA,sha256=3tLhZEY9302ZgIQ2itAMbcM354jWyEcZ_Z3h2FeD1-8,313
|
|
189
|
+
pembot-0.1.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|