pembot 0.0.7__py2.py3-none-any.whl → 0.0.9__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (33) hide show
  1. pembot/.git/COMMIT_EDITMSG +1 -1
  2. pembot/.git/index +0 -0
  3. pembot/.git/logs/HEAD +2 -0
  4. pembot/.git/logs/refs/heads/main +2 -0
  5. pembot/.git/logs/refs/remotes/origin/main +2 -0
  6. pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64 +0 -0
  7. pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2 +0 -0
  8. pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705 +3 -0
  9. pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49 +0 -0
  10. pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31 +0 -0
  11. pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f +0 -0
  12. pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa +0 -0
  13. pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632 +0 -0
  14. pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88 +0 -0
  15. pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b +1 -0
  16. pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547 +0 -0
  17. pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269 +0 -0
  18. pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd +0 -0
  19. pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c +0 -0
  20. pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1 +0 -0
  21. pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5 +0 -0
  22. pembot/.git/refs/heads/main +1 -1
  23. pembot/.git/refs/remotes/origin/main +1 -1
  24. pembot/TextEmbedder/mongodb_embedder.py +50 -19
  25. pembot/TextEmbedder/mongodb_index_creator.py +29 -24
  26. pembot/__init__.py +1 -1
  27. pembot/config/config.yaml +1 -1
  28. pembot/query.py +5 -4
  29. pembot/requirements.txt +1 -1
  30. {pembot-0.0.7.dist-info → pembot-0.0.9.dist-info}/METADATA +1 -1
  31. {pembot-0.0.7.dist-info → pembot-0.0.9.dist-info}/RECORD +33 -17
  32. {pembot-0.0.7.dist-info → pembot-0.0.9.dist-info}/WHEEL +0 -0
  33. {pembot-0.0.7.dist-info → pembot-0.0.9.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- added model name to convertor
1
+ fixed the type_info array pushing bug
pembot/.git/index CHANGED
Binary file
pembot/.git/logs/HEAD CHANGED
@@ -8,3 +8,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
8
8
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947488 +0530 commit: handled local llm nonexistent error properly for choice of just passing None as llm_client;
9
9
  9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872559 +0530 commit: added a model_name_parameter to change models quicky
10
10
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
11
+ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
12
+ 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
@@ -8,3 +8,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
8
8
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947488 +0530 commit: handled local llm nonexistent error properly for choice of just passing None as llm_client;
9
9
  9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872559 +0530 commit: added a model_name_parameter to change models quicky
10
10
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
11
+ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
12
+ 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
@@ -7,3 +7,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
7
7
  0bdb4169fc0f312b8698f1df17a258fff163aeaa 9528bbccd167e3f4ad583a1ae9fac98a52620e27 cyto <silverstone965@gmail.com> 1750947502 +0530 update by push
8
8
  9528bbccd167e3f4ad583a1ae9fac98a52620e27 ef0503a60244391590b16042019032e91d7cc30d cyto <silverstone965@gmail.com> 1751872581 +0530 update by push
9
9
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896713 +0530 update by push
10
+ af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081801 +0530 update by push
11
+ 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136119 +0530 update by push
@@ -0,0 +1,3 @@
1
+ x��An� E��)��E�B`���z� *� !#��%W�����߷�)�
2
+ J��Z��K%�����HZMl���4"�� cz�mXx���
3
+ �Y�IxaVfd�H<����@l]�G]rw� _{�o.{�+OF�<�ػ��A�� ��[�B+�5�V��jwl��P���Eo�t�8��y�.��ꅁ�;��-���CX�D���K�M������c��Vύo�m����rBስل���������jv�
@@ -0,0 +1 @@
1
+ x��݊� @�^�s�P������2�M fR���<��~pश,�A�; �HC*:�h]��8T��� )��,ņ�V��X���Q���XE�
@@ -1 +1 @@
1
- af80ddb5890f062e364ea8ade2d602df4e12de8c
1
+ a898d2c3947d30d8be64bd2bbcef68f956d5456b
@@ -1 +1 @@
1
- af80ddb5890f062e364ea8ade2d602df4e12de8c
1
+ a898d2c3947d30d8be64bd2bbcef68f956d5456b
@@ -29,6 +29,7 @@ def search_within_document(
29
29
  limit: int = 5,
30
30
  index_name: str = "test_search",
31
31
  embeddings_collection_name: str= "doc_chunks",
32
+ document_belongs_to_a_type = "",
32
33
  ):
33
34
  """
34
35
  Performs a vector similarity search within the chunks of a specific document
@@ -42,6 +43,7 @@ def search_within_document(
42
43
  index_name: The name of your MongoDB Atlas Vector Search index.
43
44
  You MUST have a vector search index created on the 'embedding' field
44
45
  of the 'embeddings_collection' collection for this to work efficiently.
46
+ document_belongs_to_a_type: When search spaces intersect for different docIds, such that docId is an array field,
45
47
 
46
48
  Returns:
47
49
  A list of dictionaries, where each dictionary represents a matching chunk
@@ -50,10 +52,23 @@ def search_within_document(
50
52
  embeddings_collection = db_client[embeddings_collection_name]
51
53
 
52
54
  print(f"Searching within document (docId: {document_name_id})...")
55
+ # print(f" filter (slug: {document_belongs_to_a_type})...")
53
56
 
54
57
  # MongoDB Atlas Vector Search aggregation pipeline
55
58
  # The 'path' should point to the field containing the embeddings.
56
59
  # The 'filter' stage is crucial for searching within a specific document.
60
+ #
61
+ project_dict= {
62
+ '_id': 0,
63
+ 'docId': 1,
64
+ 'chunk_number': 1,
65
+ 'chunk_text': 1,
66
+ 'score': { '$meta': 'vectorSearchScore' } # Get the similarity score
67
+ }
68
+
69
+ if document_belongs_to_a_type:
70
+ project_dict['type']= 1
71
+
57
72
  pipeline = [
58
73
  {
59
74
  '$vectorSearch': {
@@ -66,25 +81,21 @@ def search_within_document(
66
81
  'index': index_name,
67
82
 
68
83
  #filter to search only within the specified document
69
- 'filter': {
70
- 'docId': document_name_id
71
- }
84
+ 'filter':
85
+ { "type": {"$in": [document_belongs_to_a_type ]} } if document_belongs_to_a_type else
86
+ { 'docId': document_name_id }
72
87
  }
73
88
  },
74
89
 
75
90
  # to exclude the MongoDB internal _id
76
91
  {
77
- '$project': {
78
- '_id': 0,
79
- 'docId': 1,
80
- 'chunk_number': 1,
81
- 'chunk_text': 1,
82
- 'score': { '$meta': 'vectorSearchScore' } # Get the similarity score
83
- }
92
+ '$project': project_dict
84
93
  }
85
94
  ]
86
95
 
96
+ # print("sesraching now:")
87
97
  results = list(embeddings_collection.aggregate(pipeline))
98
+ # print("search results: ", results)
88
99
 
89
100
  if not results:
90
101
  print(f"No relevant chunks found for document '{document_name_id}' with the given query.")
@@ -100,15 +111,18 @@ def search_within_document(
100
111
 
101
112
 
102
113
 
103
- def process_document_and_embed(db_client,
114
+ def process_document_and_embed(
115
+ db_client,
104
116
  llm_client,
105
117
  inference_client,
106
118
  file_path: Path,
107
119
  chunk_size: int,
108
- embedding_model: str = 'nomic-embed-text:v1.5',
120
+ embedding_model: str = 'BAAI/bge-en-icl',
109
121
  embeddings_collection_name= "doc_chunks",
110
122
  use_custom_id: str | None = None,
111
- use_custom_input: str | None = None
123
+ use_custom_input: str | None = None,
124
+ document_belongs_to_a_type= "",
125
+ type_info= []
112
126
  ) -> list[dict]:
113
127
  """
114
128
  Processes an input document by chunking its text, generating embeddings using
@@ -228,13 +242,30 @@ def process_document_and_embed(db_client,
228
242
  'chunk_text': chunk,
229
243
  'embedding': embedding,
230
244
  'chunk_id_global': chunk_id_global,
231
- 'chunk_id_doc_specific': chunk_id_doc_specific
245
+ 'chunk_id_doc_specific': chunk_id_doc_specific,
232
246
  }
233
- embeddings_collection.update_one(
234
- {'docId': document_name_id, 'chunk_number': i + 1},
235
- {'$set': doc_set},
236
- upsert=True
237
- )
247
+
248
+
249
+ # TBD: this is NOT pushing array, this is creating a "$push" field with type: "" object
250
+
251
+ if len(type_info) > 0:
252
+ embeddings_collection.update_one(
253
+ {'docId': document_name_id, 'chunk_number': i + 1},
254
+ {
255
+ '$set': doc_set,
256
+ '$addToSet': {
257
+ "type": { '$each': type_info }
258
+ }
259
+ },
260
+ upsert=True
261
+ )
262
+ else:
263
+
264
+ embeddings_collection.update_one(
265
+ {'docId': document_name_id, 'chunk_number': i + 1},
266
+ {'$set': doc_set},
267
+ upsert=True
268
+ )
238
269
  print(f"Successfully stored chunk {i+1} for '{file_path.name}' in MongoDB.")
239
270
  res.append({**doc_set, "docId": document_name_id, "chunk_number": i + 1})
240
271
 
@@ -4,7 +4,7 @@ from pymongo.operations import SearchIndexModel
4
4
  import time
5
5
  import os
6
6
 
7
- def create_vector_index(collection: Collection, index_name: str, num_dimensions: int = 768):
7
+ def create_vector_index(collection: Collection, index_name: str, num_dimensions: int = 768, document_belongs_to_a_type= ""):
8
8
  """
9
9
  Creates a MongoDB Atlas Vector Search index if it does not already exist.
10
10
 
@@ -13,14 +13,14 @@ def create_vector_index(collection: Collection, index_name: str, num_dimensions:
13
13
  index_name: The desired name for the vector search index.
14
14
  num_dimensions: The number of dimensions for the embedding vectors.
15
15
  """
16
-
16
+
17
17
  # 1. Check if the index already exists
18
18
  existing_indexes = list(collection.list_search_indexes())
19
-
19
+
20
20
  for index in existing_indexes:
21
21
  if index.get('name') == index_name:
22
22
  print(f"Search index '{index_name}' already exists. Skipping creation.")
23
-
23
+
24
24
  # Optional: You can also check if the existing index is "READY"
25
25
  if index.get('status') == 'READY':
26
26
  print(f"Index '{index_name}' is already ready for querying.")
@@ -33,20 +33,27 @@ def create_vector_index(collection: Collection, index_name: str, num_dimensions:
33
33
  # 2. If the index does not exist, proceed to create it
34
34
  print(f"Search index '{index_name}' does not exist. Creating it now...")
35
35
 
36
+ fields_arr= [
37
+ {
38
+ "type": "vector",
39
+ "path": "embedding",
40
+ "similarity": "dotProduct", # Or "cosine", "euclidean"
41
+ "numDimensions": num_dimensions,
42
+ "quantization": "scalar" # Or "none"
43
+ },
44
+ {
45
+ "type": "filter",
46
+ "path": "docId"
47
+ }
48
+ ]
49
+
50
+ if document_belongs_to_a_type:
51
+ fields_arr.append({
52
+ "type": "filter",
53
+ "path": "type"
54
+ })
36
55
  search_index_model = SearchIndexModel(definition={
37
- "fields": [
38
- {
39
- "type": "vector",
40
- "path": "embedding",
41
- "similarity": "dotProduct", # Or "cosine", "euclidean"
42
- "numDimensions": num_dimensions,
43
- "quantization": "scalar" # Or "none"
44
- },
45
- {
46
- "type": "filter",
47
- "path": "docId"
48
- }
49
- ]
56
+ "fields": fields_arr
50
57
  },
51
58
  name=index_name,
52
59
  type="vectorSearch"
@@ -70,7 +77,7 @@ def _wait_for_index_ready(collection: Collection, index_name: str):
70
77
  Helper function to poll the index status until it's ready.
71
78
  """
72
79
  print("Polling to check if the index is ready. This may take some time (up to a few minutes for large indexes).")
73
-
80
+
74
81
  start_time = time.time()
75
82
  timeout = 300 # 5 minutes timeout, adjust as needed
76
83
 
@@ -89,7 +96,7 @@ def _wait_for_index_ready(collection: Collection, index_name: str):
89
96
  print(f"Index '{index_name}' status: {current_status}. Waiting...")
90
97
  except Exception as e:
91
98
  print(f"Error while polling index status: {e}. Retrying...")
92
-
99
+
93
100
  if time.time() - start_time > timeout:
94
101
  status= indices[0].get('status') if indices else 'N/A'
95
102
  print(f"Timeout: Index '{index_name}' did not become ready within {timeout} seconds. Current status: {status}")
@@ -99,9 +106,9 @@ def _wait_for_index_ready(collection: Collection, index_name: str):
99
106
 
100
107
  # --- Example Usage ---
101
108
  if __name__ == "__main__":
102
-
109
+
103
110
  # Replace with your database and collection names
104
- DATABASE_NAME = "pembot"
111
+ DATABASE_NAME = "pembot"
105
112
  COLLECTION_NAME = "doc_chunks"
106
113
  VECTOR_INDEX_NAME = "test_search"
107
114
 
@@ -119,7 +126,7 @@ if __name__ == "__main__":
119
126
 
120
127
  # Call the function to create the index, with existence check
121
128
  create_vector_index(collection, VECTOR_INDEX_NAME, num_dimensions=EMBEDDING_DIMENSIONS)
122
-
129
+
123
130
  # Test calling it again to see the "already exists" message
124
131
  create_vector_index(collection, VECTOR_INDEX_NAME, num_dimensions=EMBEDDING_DIMENSIONS)
125
132
 
@@ -129,5 +136,3 @@ if __name__ == "__main__":
129
136
  if 'mongo_client' in locals() and mongo_client:
130
137
  mongo_client.close()
131
138
  print("MongoDB connection closed.")
132
-
133
-
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.0.7'
4
+ __version__ = '0.0.9'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.0.7
5
+ version: 0.0.9
pembot/query.py CHANGED
@@ -68,7 +68,8 @@ def multi_embedding_average(llm_client, inference_client, descriptions, model= "
68
68
  except Exception as e:
69
69
  print(f"Error generating embedding for description '{desc}': {e}")
70
70
  # Decide how to handle errors: skip, raise, or use a placeholder
71
- continue
71
+ # continue
72
+ raise e
72
73
  time.sleep(1)
73
74
 
74
75
  if not description_embeddings:
@@ -81,7 +82,7 @@ def multi_embedding_average(llm_client, inference_client, descriptions, model= "
81
82
 
82
83
 
83
84
 
84
- def rag_query_llm(db_client, llm_client, inference_client, user_query: str, document_id: str, required_fields_descriptions: list[str], model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", ollama_base_url: str = "http://localhost:11434", no_of_fields= 4, embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita", index_name: str= "test_search", embeddings_collection= "doc_chunks"):
85
+ def rag_query_llm(db_client, llm_client, inference_client, user_query: str, document_id: str, required_fields_descriptions: list[str], model_name: str = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", ollama_base_url: str = "http://localhost:11434", no_of_fields= 4, embedding_model= "BAAI/bge-en-icl", llm_provider_name: PROVIDER_T= "novita", index_name: str= "test_search", embeddings_collection= "doc_chunks", document_belongs_to_a_type= ""):
85
86
  """
86
87
  Performs a RAG (Retrieval Augmented Generation) query using a Hugging Face
87
88
  embedding model, ChromaDB for retrieval, and a local Ollama model for generation.
@@ -119,10 +120,10 @@ def rag_query_llm(db_client, llm_client, inference_client, user_query: str, docu
119
120
  aggregate_query_embedding= multi_embedding_average(llm_client, inference_client, required_fields_descriptions, model= embedding_model, embed_locally= embed_locally)
120
121
  print("Aggregate query embedding generated. length: ", len(aggregate_query_embedding))
121
122
 
122
- create_vector_index(db_client[embeddings_collection], index_name, num_dimensions= len(aggregate_query_embedding))
123
+ create_vector_index(db_client[embeddings_collection], index_name, num_dimensions= len(aggregate_query_embedding), document_belongs_to_a_type= document_belongs_to_a_type)
123
124
 
124
125
  # check the order of args
125
- relevant_chunks= search_within_document(db_client, aggregate_query_embedding, document_id, limit= no_of_fields, index_name= index_name, embeddings_collection_name= embeddings_collection)
126
+ relevant_chunks= search_within_document(db_client, aggregate_query_embedding, document_id, limit= no_of_fields, index_name= index_name, embeddings_collection_name= embeddings_collection, document_belongs_to_a_type= document_belongs_to_a_type)
126
127
  relevant_chunks= list(map(lambda x: x['chunk_text'], relevant_chunks))
127
128
 
128
129
  if not relevant_chunks:
pembot/requirements.txt CHANGED
@@ -40,7 +40,7 @@ pandas==2.3.0
40
40
  pathlib==1.0.1
41
41
  pdfminer.six==20250506
42
42
  pdfplumber==0.11.7
43
- pembot==0.0.6
43
+ pembot==0.0.8
44
44
  pillow==11.3.0
45
45
  pyasn1==0.6.1
46
46
  pyasn1_modules==0.4.2
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.0.7
3
+ Version: 0.0.9
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,17 +1,17 @@
1
1
  pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=AQsB73MqtY0M4iaYZzGve48zpUhohq6R9DjdFwsbbKo,211
3
+ pembot/__init__.py,sha256=1DmlCS-ZGr0pEr9CQ6lxoYtHSpRMghjac-_MbV29DhU,211
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
- pembot/query.py,sha256=D1RPRoImDWCafbshT2NpO4ymVj2RySm8j5FJ5bRzYWw,8476
9
- pembot/requirements.txt,sha256=6OV_n5JVco2lLA8Wq38tJX1bYgo_UU0R9RKgs4d2wfc,1360
10
- pembot/.git/COMMIT_EDITMSG,sha256=H9feTx6U3VWbFycy9cq077mD4oxuv2gz4G3EUOdQmV4,30
8
+ pembot/query.py,sha256=d6K2PyDDGoIOqwn7A_KIBr83w0zjMAHjhmx1S9VlVgg,8642
9
+ pembot/requirements.txt,sha256=vnjZ3uO4Dd8qPFDq1F2qGromJ5jrEBfBeiDwz7rKijk,1360
10
+ pembot/.git/COMMIT_EDITMSG,sha256=iRLy0abINakd4FBRWJuD-QQ_WWbXZusnr_9e3wo4d90,38
11
11
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
12
12
  pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
13
13
  pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
14
- pembot/.git/index,sha256=EEe4lVsgYW5zuGbFVZ8a4t7AqjDlPTqU1JGDynBe2Sc,1814
14
+ pembot/.git/index,sha256=CdDhKv_jjjaFu1MNjZTrZZ4GF_av7KNYh750IqRkGWg,1814
15
15
  pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
16
16
  pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
17
17
  pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
@@ -28,26 +28,34 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
28
28
  pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
29
29
  pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
30
30
  pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
31
- pembot/.git/logs/HEAD,sha256=zUB4DZpCGTMM1FnKY1jQ98WAMwY8twSD8qaFz3Q-K-4,2521
32
- pembot/.git/logs/refs/heads/main,sha256=zUB4DZpCGTMM1FnKY1jQ98WAMwY8twSD8qaFz3Q-K-4,2521
31
+ pembot/.git/logs/HEAD,sha256=TTHA6MsudjF1a9H4QECSP3XPvB-k2AOsA8mTVCEis_o,2980
32
+ pembot/.git/logs/refs/heads/main,sha256=TTHA6MsudjF1a9H4QECSP3XPvB-k2AOsA8mTVCEis_o,2980
33
33
  pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
34
- pembot/.git/logs/refs/remotes/origin/main,sha256=t9RDc56CIhCL27FolwvJyBJ6LK8uGVFkzXTZmWqHukw,1314
34
+ pembot/.git/logs/refs/remotes/origin/main,sha256=uxD0NKkje26omv-w0V09LJA81e-ccHAA-FY148tg93g,1606
35
+ pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
36
+ pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
35
37
  pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
36
38
  pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa,sha256=hsOHhX0Yajg27Y7B9lo-WjDXzW1KNMg2CBr93G116EY,387
37
39
  pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1,sha256=GKt_CAJNOQXwGnoFLuiNpkd0s_hP_UDLKd59VRknYy0,330
38
40
  pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c,sha256=Uk1dStvEBica-t38qHsZZ_4mxvi6b6VA9PaKE4KSunQ,90
41
+ pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05QL0Do-raB4AtK5QjvKLFBNc0RZqNga9o,244
42
+ pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
43
+ pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
39
44
  pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
40
45
  pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYpJ6HsfY2LQbXQTMwlT1IPWRSEiY2uDwyE,392
41
46
  pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
42
47
  pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
43
48
  pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
44
49
  pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515,sha256=A9MNZO3QZ6ghGd1MyfmJ6H3dBTpF4HZcRosVxWytx8E,4077
50
+ pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f,sha256=omF4gmE9IQFZR8t6ybAKfnW02tdn9ZaVWKRhv_o1V4c,2083
45
51
  pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f,sha256=ygVUpaLo7cxUdIgjFlaBh2BkllV6BIYYkzLIxsPKjWE,4111
46
52
  pembot/.git/objects/3e/cf23eb95123287531d708a21d4ba88d92ccabb,sha256=Jlg3XIzIjk3N5ZKolXbz_betMybJ2t2TVuOARg2ruQU,4943
47
53
  pembot/.git/objects/3f/78215d7e17da726fb352fd92b3c117db9b63ba,sha256=J8r5hqTEgAwlH5sDjr9tp1ipqpvs4BAVQY5rkiKqDCw,4080
48
54
  pembot/.git/objects/3f/e072cf3cb6a9f30c3e9936e3ddf622e80270d0,sha256=Z-UoKi2MYe0qGTtBxAr5cnIOHKkhoEXMgalevFUz9lA,2992
49
55
  pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0,sha256=TLuVmtSH9K33qB-WHMxKDUihHCrwdTtCKtjBs-rAnJ4,56
50
56
  pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D4JgHkjjqcDQCwuS17w60JRkVr25ZFlcI,117
57
+ pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
58
+ pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
51
59
  pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
52
60
  pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
53
61
  pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
@@ -62,11 +70,15 @@ pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEX
62
70
  pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
63
71
  pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
64
72
  pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
73
+ pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
65
74
  pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
66
75
  pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
67
76
  pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456,sha256=xIETiieOoilleucGg7vXOgjZ-v5PI0t34fDJjDD665A,4204
77
+ pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b,sha256=lh2LurucwRdL6WP8ChgmjXrK2lR7HASIXzt4iHFrTf4,178
78
+ pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547,sha256=kaU3Z4k6ptIwO8ktcjs2-kshb0bzM4y8Uur-a27_jnk,56
68
79
  pembot/.git/objects/ab/139d2cd4798dd8e2c565b80440b1a44b376126,sha256=v1UO-WINmigZNYD74kyIv310Kq5k4SNL-gQ2DYlw9xk,6258
69
80
  pembot/.git/objects/ab/c6b15265171457b41e2cfdaf3b8c3994a59eb7,sha256=ivRCkHzUZHXB16wn2ojARknUrwBkoUsV_18QT3Jbs-k,205
81
+ pembot/.git/objects/ab/f77db148e3fb3b26913af14ae43130396f3269,sha256=rJJenBYvGWdXvwNgHWqIJCF3q2oxeA3eT19eRxlNmJA,3346
70
82
  pembot/.git/objects/ac/9c9018c62fa30dc142665c1b5a375f4e056880,sha256=P_8LPBV0v4D17Akj4f5Cr2dhgNFUsh4o7DLK78CfNPo,349
71
83
  pembot/.git/objects/af/80ddb5890f062e364ea8ade2d602df4e12de8c,sha256=QELzH3NdMCFohFEcf5oAAu_e54VFr-LhTyPbXY7GjSk,169
72
84
  pembot/.git/objects/b1/1173d9b68db117437ccb9551461152e1e8a77d,sha256=6cl8NMNQ9b5fBh97GPEQNssOVrh-EQLJfhqSBbNb_vU,205
@@ -79,8 +91,10 @@ pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFt
79
91
  pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
80
92
  pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
81
93
  pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
94
+ pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
82
95
  pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
83
96
  pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
97
+ pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c,sha256=oAb2b2VwhPXykdK_ZV8MEFwfy-ZPd2Nja2gAv20U7hc,115
84
98
  pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e,sha256=irJ-z8kPZmg85B0f4TQz73yJoCMWMWsIR3Pi5wx1Dlk,4034
85
99
  pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc,sha256=r4zY-__F4gSfjE7onRTrcxvv8umXKuPuFzd95AiQ0cs,392
86
100
  pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d,sha256=qWZpM65kQPSxlVHAtyzH5L-j3rL-b9Jw-A7YBm4NMlI,249
@@ -88,6 +102,7 @@ pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58,sha256=lXbMvL_xl8P
88
102
  pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238,sha256=ltEINFUpQP86CkE4nAT1Afegz3ytY3Nlx1P6ibTFEbo,305
89
103
  pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8,sha256=fFYq_ODekFhF9SwBL9GP_fGDsNavXVVOuI6kmnHlkiY,5140
90
104
  pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d,sha256=mrF9jZHY2oJm8tkd8nQdMgUPbrZfENOFaR3mvbwi1dg,187
105
+ pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1,sha256=2cORujYAURTvGACujtNJvP1f-s7rt-UkrtPBuKfDR-g,419
91
106
  pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552,sha256=Ugf-wTcOlwZXmxmbnjEc3iOK3dDRntTVONOJsrOjl3E,205
92
107
  pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8,sha256=Y5WfCEpk121Cy9gaFfSY4ZkUz54qu45osRZdTy9kZ8c,393
93
108
  pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729,sha256=MrRy-fBSXZcp-yJM3e-tH3wCdUS-VFX6rW_mKTa-0_Y,419
@@ -98,20 +113,21 @@ pembot/.git/objects/fa/9c9a62ec1203a5868b033ded428c2382c4e1b6,sha256=is9gmIhAL-Q
98
113
  pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde,sha256=Bmrnc27anjqoAL6akhBM4MEO9BreF5olubroBwVVUvs,56
99
114
  pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4,sha256=8982HA0S9bCm0JQsrgWzIkRNIiGfaDRUUrXsnuVjE6A,196
100
115
  pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2,sha256=g-IVuI_8YBn048qkyyY6Vpn8zfP8UCJxUs0F5bDD6qM,1114
116
+ pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5,sha256=cWWr70MLaXk82O6EIPpYlGUd5P30IUEELrbrh2MrUXE,115
101
117
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
102
118
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
103
119
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
104
- pembot/.git/refs/heads/main,sha256=DymD5B54ONj5DkUMd4HYO5m52NWPWMez_QV6RdNCG0g,41
120
+ pembot/.git/refs/heads/main,sha256=UNQiShSvoG1AvbUvWeJLlN5V3ZHJSDarZ4vFLDI-Xh8,41
105
121
  pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
106
- pembot/.git/refs/remotes/origin/main,sha256=DymD5B54ONj5DkUMd4HYO5m52NWPWMez_QV6RdNCG0g,41
122
+ pembot/.git/refs/remotes/origin/main,sha256=UNQiShSvoG1AvbUvWeJLlN5V3ZHJSDarZ4vFLDI-Xh8,41
107
123
  pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
124
  pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
109
125
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
126
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
111
- pembot/TextEmbedder/mongodb_embedder.py,sha256=pD8mP-uC_o0COPdOrCTMpoC5PdF8hXlqARHvTr2T-VI,9642
112
- pembot/TextEmbedder/mongodb_index_creator.py,sha256=ejpsF_y1zY6Z0nux02vjODiDPnxx-YA_xy2PmT94zZ4,5306
127
+ pembot/TextEmbedder/mongodb_embedder.py,sha256=n-xQwCxSCpGC9jvxHoCVWhBhtm4fVmeg_Gvcn0FDJQ4,10699
128
+ pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
113
129
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
114
- pembot/config/config.yaml,sha256=xqo_Zq2dKEw98tzXDvJqOiJBluFCvT5JNhXpqkIqW0Y,156
130
+ pembot/config/config.yaml,sha256=9Ysd58bptzE9_od_DQQj1ebzrYsyLrOrlzN52dTA3zQ,156
115
131
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
116
132
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
117
133
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -167,7 +183,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
167
183
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
184
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
169
185
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
170
- pembot-0.0.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
171
- pembot-0.0.7.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
172
- pembot-0.0.7.dist-info/METADATA,sha256=7kfZ28VYYaCy0mWNNLTP_pP6Bi9c-PtzbEwdbyNdLzI,313
173
- pembot-0.0.7.dist-info/RECORD,,
186
+ pembot-0.0.9.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
187
+ pembot-0.0.9.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
188
+ pembot-0.0.9.dist-info/METADATA,sha256=Ba3uu0dcTllzXwQqeTwRIJx3--FKl_eXCCorQAPp2pU,313
189
+ pembot-0.0.9.dist-info/RECORD,,
File without changes