pembot 0.0.9__py2.py3-none-any.whl → 0.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

@@ -69,22 +69,27 @@ def search_within_document(
69
69
  if document_belongs_to_a_type:
70
70
  project_dict['type']= 1
71
71
 
72
+ vectorSearchParams= {
73
+ 'queryVector': aggregate_query_embedding,
74
+ 'path': 'embedding',
75
+
76
+ #number of nearest neighbors to consider
77
+ 'numCandidates': 100,
78
+ 'limit': limit,
79
+ 'index': index_name,
80
+
81
+ }
82
+
83
+ #filter (if a type or docid filter is given) to search only within the specified documents search space
84
+ if document_name_id:
85
+ vectorSearchParams['filter']= (
86
+ { "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
87
+ { 'docId': document_name_id }
88
+ )
89
+
72
90
  pipeline = [
73
91
  {
74
- '$vectorSearch': {
75
- 'queryVector': aggregate_query_embedding,
76
- 'path': 'embedding',
77
-
78
- #number of nearest neighbors to consider
79
- 'numCandidates': 100,
80
- 'limit': limit,
81
- 'index': index_name,
82
-
83
- #filter to search only within the specified document
84
- 'filter':
85
- { "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
86
- { 'docId': document_name_id }
87
- }
92
+ '$vectorSearch': vectorSearchParams
88
93
  },
89
94
 
90
95
  # to exclude the MongoDB internal _id
@@ -93,6 +98,8 @@ def search_within_document(
93
98
  }
94
99
  ]
95
100
 
101
+
102
+
96
103
  # print("sesraching now:")
97
104
  results = list(embeddings_collection.aggregate(pipeline))
98
105
  # print("search results: ", results)
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.0.9'
4
+ __version__ = '0.1.0'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.0.9
5
+ version: 0.1.0
pembot/query.py CHANGED
@@ -82,7 +82,13 @@ def multi_embedding_average(llm_client, inference_client, descriptions, model= "
82
82
 
83
83
 
84
84
 
85
- def rag_query_llm(db_client, llm_client, inference_client, user_query: str, document_id: str, required_fields_descriptions: list[str], model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", ollama_base_url: str = "http://localhost:11434", no_of_fields= 4, embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita", index_name: str= "test_search", embeddings_collection= "doc_chunks", document_belongs_to_a_type= ""):
85
+ def rag_query_llm(db_client, llm_client, inference_client,
86
+ user_query: str, document_id: str, required_fields_descriptions: list[str],
87
+ model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
88
+ ollama_base_url: str = "http://localhost:11434", no_of_fields= 4,
89
+ embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita",
90
+ index_name: str= "test_search", embeddings_collection= "doc_chunks",
91
+ document_belongs_to_a_type= "", prompt_prefix= ""):
86
92
  """
87
93
  Performs a RAG (Retrieval Augmented Generation) query using a Hugging Face
88
94
  embedding model, ChromaDB for retrieval, and a local Ollama model for generation.
@@ -139,6 +145,7 @@ def rag_query_llm(db_client, llm_client, inference_client, user_query: str, docu
139
145
  If you don't know the answer, just say that you don't know, don't try to make up an answer.
140
146
 
141
147
  Context:
148
+ {prompt_prefix}
142
149
  {context}
143
150
 
144
151
  Question: {user_query}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.0.9
3
+ Version: 0.1.0
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,11 +1,11 @@
1
1
  pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=1DmlCS-ZGr0pEr9CQ6lxoYtHSpRMghjac-_MbV29DhU,211
3
+ pembot/__init__.py,sha256=oq-JydA8x2prvUKvfrjXWp-GeFLh_qZPesfmB4ad6HE,211
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
- pembot/query.py,sha256=d6K2PyDDGoIOqwn7A_KIBr83w0zjMAHjhmx1S9VlVgg,8642
8
+ pembot/query.py,sha256=0RBnLDBz8pctbsw9GA2UHG-uZPippKYjBBfBBKe2Oec,8705
9
9
  pembot/requirements.txt,sha256=vnjZ3uO4Dd8qPFDq1F2qGromJ5jrEBfBeiDwz7rKijk,1360
10
10
  pembot/.git/COMMIT_EDITMSG,sha256=iRLy0abINakd4FBRWJuD-QQ_WWbXZusnr_9e3wo4d90,38
11
11
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
@@ -124,10 +124,10 @@ pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
124
124
  pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
125
125
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
126
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
127
- pembot/TextEmbedder/mongodb_embedder.py,sha256=n-xQwCxSCpGC9jvxHoCVWhBhtm4fVmeg_Gvcn0FDJQ4,10699
127
+ pembot/TextEmbedder/mongodb_embedder.py,sha256=i5M56jaC2t3yUinqlXfC70GaTos4kSR_1LGsbljpEhU,10762
128
128
  pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
129
129
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
130
- pembot/config/config.yaml,sha256=9Ysd58bptzE9_od_DQQj1ebzrYsyLrOrlzN52dTA3zQ,156
130
+ pembot/config/config.yaml,sha256=-6niASpCQzNynkHSTHWi3MawUWUHpOVuNg0Fhmun30M,156
131
131
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
132
132
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
133
133
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -183,7 +183,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
183
183
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
184
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
185
185
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
186
- pembot-0.0.9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
187
- pembot-0.0.9.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
188
- pembot-0.0.9.dist-info/METADATA,sha256=Ba3uu0dcTllzXwQqeTwRIJx3--FKl_eXCCorQAPp2pU,313
189
- pembot-0.0.9.dist-info/RECORD,,
186
+ pembot-0.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
187
+ pembot-0.1.0.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
188
+ pembot-0.1.0.dist-info/METADATA,sha256=3tLhZEY9302ZgIQ2itAMbcM354jWyEcZ_Z3h2FeD1-8,313
189
+ pembot-0.1.0.dist-info/RECORD,,
File without changes