pembot 0.1.0__py2.py3-none-any.whl → 0.1.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pembot might be problematic. Click here for more details.

Files changed (39) hide show
  1. pembot/.git/COMMIT_EDITMSG +1 -1
  2. pembot/.git/index +0 -0
  3. pembot/.git/logs/HEAD +3 -0
  4. pembot/.git/logs/refs/heads/main +3 -0
  5. pembot/.git/logs/refs/remotes/origin/main +3 -0
  6. pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee +0 -0
  7. pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200 +0 -0
  8. pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba +0 -0
  9. pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c +0 -0
  10. pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4 +0 -0
  11. pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e +0 -0
  12. pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7 +0 -0
  13. pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d +0 -0
  14. pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd +0 -0
  15. pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5 +1 -0
  16. pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a +0 -0
  17. pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a +0 -0
  18. pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643 +0 -0
  19. pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444 +0 -0
  20. pembot/.git/objects/99/89463f57f1f2931e5973bd543c80f18b0204bc +0 -0
  21. pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511 +0 -0
  22. pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a +1 -0
  23. pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37 +0 -0
  24. pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834 +0 -0
  25. pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e +0 -0
  26. pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d +0 -0
  27. pembot/.git/refs/heads/main +1 -1
  28. pembot/.git/refs/remotes/origin/main +1 -1
  29. pembot/TextEmbedder/mongodb_embedder.py +3 -0
  30. pembot/__init__.py +1 -1
  31. pembot/config/config.yaml +1 -1
  32. pembot/pyrightconfig.json +8 -0
  33. pembot/query.py +279 -1
  34. pembot/requirements.txt +9 -1
  35. pembot/search.py +190 -0
  36. {pembot-0.1.0.dist-info → pembot-0.1.2.dist-info}/METADATA +1 -1
  37. {pembot-0.1.0.dist-info → pembot-0.1.2.dist-info}/RECORD +39 -16
  38. {pembot-0.1.0.dist-info → pembot-0.1.2.dist-info}/WHEEL +0 -0
  39. {pembot-0.1.0.dist-info → pembot-0.1.2.dist-info}/licenses/LICENSE +0 -0
@@ -1 +1 @@
1
- fixed the type_info array pushing bug
1
+ minor oopsie
pembot/.git/index CHANGED
Binary file
pembot/.git/logs/HEAD CHANGED
@@ -10,3 +10,6 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
10
10
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
11
11
  af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
12
12
  0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
13
+ a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
+ 784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
+ f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
@@ -10,3 +10,6 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
10
10
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
11
11
  af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
12
12
  0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
13
+ a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
14
+ 784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
15
+ f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859643 +0530 commit: minor oopsie
@@ -9,3 +9,6 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
9
9
  ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896713 +0530 update by push
10
10
  af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081801 +0530 update by push
11
11
  0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136119 +0530 update by push
12
+ a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
13
+ 784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
14
+ f214d4d56726e2928479c5948bd88e038cf70b2e 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a cyto <silverstone965@gmail.com> 1752859659 +0530 update by push
@@ -0,0 +1 @@
1
+ x�PIn� �:��U�*P��7�r�$H�#��p�����+��~�m9�PaR��"��~vFe�5Z��"o�Z6�O=f�۱P���h7�Ɉ�M�� )��qY,y����I!���n\�~W��#�*G�LF��5a�ϖ� ��I�A�c/��kh X���#{��Vȇ��W��Ђ�%$��Ę�3�
@@ -0,0 +1 @@
1
+ xe���0E��/]Xv7L؍i^�k��}J oqqp���s�g
@@ -1 +1 @@
1
- a898d2c3947d30d8be64bd2bbcef68f956d5456b
1
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
@@ -1 +1 @@
1
- a898d2c3947d30d8be64bd2bbcef68f956d5456b
1
+ 8fc00bf69f4ad3e50c13acc4a0988b6c0fe72b5a
@@ -49,6 +49,9 @@ def search_within_document(
49
49
  A list of dictionaries, where each dictionary represents a matching chunk
50
50
  from the specified document, including its text, docId, and score.
51
51
  """
52
+ if limit < 1:
53
+ return []
54
+
52
55
  embeddings_collection = db_client[embeddings_collection_name]
53
56
 
54
57
  print(f"Searching within document (docId: {document_name_id})...")
pembot/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  A Python Package to convert PEM blog content to usseful information by leveraging LLMs
3
3
  """
4
- __version__ = '0.1.0'
4
+ __version__ = '0.1.2'
5
5
  from .main import save_to_json_file, make_query
6
6
  __all__ = ["save_to_json_file", "make_query"]
pembot/config/config.yaml CHANGED
@@ -2,4 +2,4 @@ OUTPUT_DIR: /home/cyto/dev/pembotdir
2
2
  PAGE_DELIMITER: ___________________________ NEXT PAGE ___________________________
3
3
  app:
4
4
  name: pembot
5
- version: 0.1.0
5
+ version: 0.1.2
@@ -0,0 +1,8 @@
1
+ {
2
+ "venvPath": "..", // If your venv is a direct child like ./venv
3
+ "venv": "venvpem" // The name of your venv folder
4
+ // Or if you want to be explicit with the full path
5
+ // "pythonVersion": "3.9", // Or your specific version
6
+ // "pythonPlatform": "Linux", // Or "Windows", "Darwin"
7
+ // "pythonPath": "/path/to/your/project/.venv/bin/python" // Absolute path
8
+ }
pembot/query.py CHANGED
@@ -3,14 +3,34 @@ from huggingface_hub.inference._generated.types.chat_completion import ChatCompl
3
3
  from huggingface_hub.inference._providers import PROVIDER_T
4
4
  import ollama
5
5
  import re
6
+ from smolagents import InferenceClientModel, ToolCallingAgent, ActionStep, TaskStep
7
+ from smolagents.default_tools import VisitWebpageTool
8
+ from pymongo import MongoClient
9
+ from typing import Callable, Dict, Any, Optional, List
10
+ import uuid
11
+ from datetime import datetime
12
+ from smolagents.monitoring import Timing
6
13
 
7
- from pydantic_core.core_schema import ErrorType
14
+
15
+ from pembot.search import brave_search_tool
8
16
  from pembot.TextEmbedder.mongodb_embedder import search_within_document
9
17
  import numpy as np
10
18
  from huggingface_hub import InferenceClient
11
19
  from google import genai
12
20
  from google.genai import types
13
21
  import time
22
+ from datetime import timezone
23
+
24
+ init_timing= {
25
+ "start_time": 0.0,
26
+ "end_time": 0.0,
27
+ "duration": 0.0,
28
+ }
29
+
30
+ mongodb_uri= environ['MONGODB_SCHEMER']
31
+ mc = MongoClient(mongodb_uri)
32
+ db = mc["schemerdb"]
33
+ collection = db["chat_history"] # Collection name
14
34
 
15
35
  from pembot.TextEmbedder.mongodb_index_creator import create_vector_index
16
36
 
@@ -214,5 +234,263 @@ def remove_bs(text):
214
234
  return ""
215
235
 
216
236
 
237
+ def smolquery(message: str, external_tools: list[Callable] = [], chat_id: str | None = None, allow_web_search= True) -> Dict[str, Any]:
238
+ """
239
+ Run agent with chat history support.
240
+
241
+ Args:
242
+ message: User's message
243
+ external_tools: List of external tools to use
244
+ chat_id: Optional chat ID for continuing conversation
245
+ allow_web_search: Boolean to decide whether to include brave tool to fetch search results
246
+ and the Visiting Web Page Tool in the agent's toolbox
247
+
248
+ Returns:
249
+ Dictionary containing response and chat_id
250
+ """
251
+ alltools = []
252
+
253
+ if allow_web_search:
254
+ alltools.extend([
255
+ brave_search_tool,
256
+ VisitWebpageTool(),
257
+ ])
258
+
259
+ alltools.extend(external_tools)
260
+
261
+ model = InferenceClientModel(
262
+ token= environ["HF_TOKEN"],
263
+ # model_id= "HuggingFaceTB/SmolLM3-3B"
264
+ model_id= "deepseek-ai/DeepSeek-R1-0528"
265
+ )
266
+
267
+ agent = ToolCallingAgent(tools=alltools, model=model, add_base_tools=False)
268
+
269
+ # Handle chat history
270
+ if chat_id:
271
+ # Load existing conversation
272
+ chat_doc = collection.find_one({"_id": chat_id})
273
+ if chat_doc:
274
+ # Restore agent memory from database
275
+ restore_agent_memory(agent, chat_doc["messages"])
276
+ else:
277
+ # Chat ID provided but not found, create new one
278
+ chat_id = str(uuid.uuid4())
279
+ else:
280
+ # Create new chat
281
+ chat_id = str(uuid.uuid4())
282
+
283
+ # Run the agent
284
+ response = agent.run(message, reset= False)
285
+
286
+ # Extract the final answer from the response
287
+ final_answer = extract_final_answer(response)
288
+
289
+ # Save conversation to database
290
+ save_chat_history(chat_id, agent, message, final_answer)
291
+
292
+ return {
293
+ "response": final_answer,
294
+ "chat_id": chat_id
295
+ }
296
+
297
+ def extract_final_answer(response: Any) -> str:
298
+ """
299
+ Extract the final answer from various response types.
300
+
301
+ Args:
302
+ response: Response from agent.run()
303
+
304
+ Returns:
305
+ Final answer as string
306
+ """
307
+ # Handle RunResult object
308
+ if hasattr(response, 'final_answer'):
309
+ return str(response.final_answer)
310
+
311
+ # Handle direct string response
312
+ if isinstance(response, str):
313
+ return response
314
+
315
+ # Handle generator response
316
+ if hasattr(response, '__iter__') and not isinstance(response, (str, bytes)):
317
+ final_step = None
318
+ for step in response:
319
+ final_step = step
320
+ # Look for FinalAnswerStep
321
+ if hasattr(step, 'final_answer'):
322
+ return str(step.final_answer)
323
+
324
+ # If no final answer found, return last step as string
325
+ if final_step is not None:
326
+ return str(final_step)
327
+
328
+ # Fallback to string conversion
329
+ return str(response)
330
+
331
+ def restore_agent_memory(agent: ToolCallingAgent, messages: List[Dict[str, Any]]) -> None:
332
+ """
333
+ Restore agent memory from stored messages.
334
+
335
+ Args:
336
+ agent: The agent instance
337
+ messages: List of stored messages
338
+ """
339
+ for msg in messages:
340
+ if msg["type"] == "task":
341
+ # Add task step
342
+ task_step = TaskStep(
343
+ task=msg["content"],
344
+ task_images=msg.get("images", [])
345
+ )
346
+ agent.memory.steps.append(task_step)
347
+ elif msg["type"] == "action":
348
+ # Add action step with only the required parameters
349
+ # ActionStep objects are typically created during execution
350
+ # and contain read-only information, so we create a minimal one
351
+ action_saved_timing= msg.get("timing", init_timing)
352
+ action_step = ActionStep(
353
+ observations= msg.get("observations", ""),
354
+ step_number=msg["step_number"],
355
+ observations_images=msg.get("observations_images", []),
356
+ timing=Timing(
357
+ start_time= action_saved_timing.get("start_time", 0.0),
358
+ end_time= action_saved_timing.get("end_time", 0.0)
359
+ )
360
+ )
361
+ agent.memory.steps.append(action_step)
362
+
363
+ def save_chat_history(chat_id: str, agent: ToolCallingAgent, user_message: str, agent_response: str) -> None:
364
+ """
365
+ Save conversation history to MongoDB.
366
+
367
+ Args:
368
+ chat_id: Chat session ID
369
+ agent: Agent instance with memory
370
+ user_message: Latest user message
371
+ agent_response: Agent's response
372
+ """
373
+ # Convert agent memory to serializable format
374
+ messages = []
375
+
376
+ for step in agent.memory.steps:
377
+ if isinstance(step, TaskStep):
378
+ messages.append({
379
+ "type": "task",
380
+ "content": step.task,
381
+ "images": step.task_images if hasattr(step, 'task_images') else [],
382
+ "timestamp": datetime.now(timezone.utc)
383
+ })
384
+ elif isinstance(step, ActionStep):
385
+ msg = {
386
+ "type": "action",
387
+ "step_number": step.step_number,
388
+ "observations_images": step.observations_images if hasattr(step, 'observations_images') else [],
389
+ "timing": step.timing.dict() if hasattr(step, 'timing') else init_timing,
390
+ "timestamp": datetime.now(timezone.utc)
391
+ }
392
+
393
+ # Store any additional attributes that might be accessible
394
+ # Note: ActionStep attributes are typically read-only
395
+ if hasattr(step, 'observations') and step.observations:
396
+ msg["observations"] = str(step.observations)
397
+ if hasattr(step, 'error') and step.error:
398
+ msg["error"] = str(step.error)
399
+
400
+ messages.append(msg)
401
+
402
+ # Add the latest response
403
+ messages.append({
404
+ "type": "response",
405
+ "content": agent_response,
406
+ "timestamp": datetime.now(timezone.utc)
407
+ })
408
+
409
+ # Update or insert chat document
410
+ collection.update_one(
411
+ {"_id": chat_id},
412
+ {
413
+ "$set": {
414
+ "messages": messages,
415
+ "last_updated": datetime.now(timezone.utc)
416
+ }
417
+ },
418
+ upsert=True
419
+ )
420
+
421
+ def get_chat_history(chat_id: str) -> Optional[List[Dict[str, Any]]]:
422
+ """
423
+ Retrieve chat history by ID.
424
+
425
+ Args:
426
+ chat_id: Chat session ID
427
+
428
+ Returns:
429
+ List of messages or None if not found
430
+ """
431
+ chat_doc = collection.find_one({"_id": chat_id})
432
+ return chat_doc["messages"] if chat_doc else None
433
+
434
+ def delete_chat_history(chat_id: str) -> bool:
435
+ """
436
+ Delete chat history by ID.
437
+
438
+ Args:
439
+ chat_id: Chat session ID
440
+
441
+ Returns:
442
+ True if deleted, False if not found
443
+ """
444
+ result = collection.delete_one({"_id": chat_id})
445
+ return result.deleted_count > 0
446
+
447
+ def list_chat_sessions() -> List[Dict[str, Any]]:
448
+ """
449
+ List all chat sessions with basic info.
450
+
451
+ Returns:
452
+ List of chat sessions with ID and last updated time
453
+ """
454
+ sessions = []
455
+ for doc in collection.find({}, {"_id": 1, "last_updated": 1, "messages": {"$slice": 1}}):
456
+ first_message = doc["messages"][0] if doc["messages"] else {}
457
+ sessions.append({
458
+ "chat_id": doc["_id"],
459
+ "last_updated": doc.get("last_updated"),
460
+ "first_message": first_message.get("content", "")[:100] + "..." if len(first_message.get("content", "")) > 100 else first_message.get("content", "")
461
+ })
462
+ return sessions
463
+
464
+
465
+ # # First message - creates new chat
466
+ # result1 = smolquery("Hello, what's the weather like?", [])
467
+ # print(f"Response: {result1['response']}")
468
+ # print(f"Chat ID: {result1['chat_id']}")
469
+
470
+ # # Second message - continues the conversation
471
+ # result2 = smolquery("Thanks, now tell me about Python programming", [], chat_id=result1['chat_id'])
472
+ # print(f"Response: {result2['response']}")
473
+ # print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
474
+
475
+ # # Retrieve chat history
476
+ # history = get_chat_history(result1['chat_id'])
477
+ # print(f"Chat history length: {len(history) if history else 0}")
478
+
479
+
480
+
217
481
  if __name__ == "__main__":
218
482
  print("hemlo worls")
483
+
484
+ # result1 = smolquery("Did i tell you to do something regarding stocks before? What do you conclude?", allow_web_search= False, chat_id= "a52ab59e-d6d0-4089-a963-61e8876244e0")
485
+ result1 = smolquery("How has NIFTY 50 been doing past 3 months?")
486
+ print(f"Response: {result1['response']}")
487
+ print(f"Chat ID: {result1['chat_id']}")
488
+
489
+ # # Second message - continues the conversation
490
+ result2 = smolquery("now tell me about other indices in the same country", chat_id=result1['chat_id'])
491
+ print(f"Response: {result2['response']}")
492
+ print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
493
+
494
+ # # Retrieve chat history
495
+ history = get_chat_history(result1['chat_id'])
496
+ print(f"Chat history length: {len(history) if history else 0}")
pembot/requirements.txt CHANGED
@@ -2,6 +2,7 @@ aiofiles==24.1.0
2
2
  annotated-types==0.7.0
3
3
  anyio==4.9.0
4
4
  audioop-lts==0.2.1
5
+ beautifulsoup4==4.13.4
5
6
  cachetools==5.5.2
6
7
  certifi==2025.6.15
7
8
  cffi==1.17.1
@@ -9,6 +10,7 @@ charset-normalizer==3.4.2
9
10
  click==8.2.1
10
11
  cryptography==45.0.5
11
12
  dnspython==2.7.0
13
+ duckduckgo_search==8.1.1
12
14
  et_xmlfile==2.0.0
13
15
  fastapi==0.115.14
14
16
  ffmpy==0.6.0
@@ -27,7 +29,9 @@ httpx==0.28.1
27
29
  huggingface-hub==0.33.2
28
30
  idna==3.10
29
31
  Jinja2==3.1.6
32
+ lxml==6.0.0
30
33
  markdown-it-py==3.0.0
34
+ markdownify==1.1.0
31
35
  MarkupSafe==3.0.2
32
36
  mdurl==0.1.2
33
37
  msgpack==1.1.1
@@ -40,8 +44,9 @@ pandas==2.3.0
40
44
  pathlib==1.0.1
41
45
  pdfminer.six==20250506
42
46
  pdfplumber==0.11.7
43
- pembot==0.0.8
47
+ pembot==0.1.2
44
48
  pillow==11.3.0
49
+ primp==0.15.0
45
50
  pyasn1==0.6.1
46
51
  pyasn1_modules==0.4.2
47
52
  pycparser==2.22
@@ -55,6 +60,7 @@ pynvim==0.5.2
55
60
  pypdfium2==4.30.1
56
61
  pytesseract==0.3.13
57
62
  python-dateutil==2.9.0.post0
63
+ python-dotenv==1.1.1
58
64
  python-multipart==0.0.20
59
65
  pytz==2025.2
60
66
  PyYAML==6.0.2
@@ -66,7 +72,9 @@ safehttpx==0.1.6
66
72
  semantic-version==2.10.0
67
73
  shellingham==1.5.4
68
74
  six==1.17.0
75
+ smolagents==1.20.0
69
76
  sniffio==1.3.1
77
+ soupsieve==2.7
70
78
  starlette==0.46.2
71
79
  tenacity==8.5.0
72
80
  tomlkit==0.13.3
pembot/search.py ADDED
@@ -0,0 +1,190 @@
1
+ import os
2
+ import requests
3
+ import json
4
+ from bs4 import BeautifulSoup
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ import logging
7
+ from smolagents import tool
8
+
9
+ # Configure logging for better visibility
10
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
+
12
+ def _fetch_and_extract_text(url: str, timeout: int = 10) -> dict:
13
+ """
14
+ Fetches the content of a URL and extracts all visible text (excluding HTML/CSS).
15
+
16
+ Args:
17
+ url (str): The URL to fetch.
18
+ timeout (int): Timeout in seconds for the HTTP request.
19
+
20
+ Returns:
21
+ dict: A dictionary containing the URL and its extracted text, or an error.
22
+ """
23
+ try:
24
+ logging.info(f"Attempting to fetch and parse URL: {url}")
25
+ response = requests.get(url, timeout=timeout)
26
+ response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
27
+
28
+ soup = BeautifulSoup(response.text, 'html.parser')
29
+
30
+ # Remove script and style tags
31
+ for script_or_style in soup(['script', 'style']):
32
+ script_or_style.extract()
33
+
34
+ # Get text, strip whitespace, and handle line breaks
35
+ text = soup.get_text(separator='\n', strip=True)
36
+
37
+ return {
38
+ "url": url,
39
+ "extracted_text": text
40
+ }
41
+ except requests.exceptions.Timeout:
42
+ logging.warning(f"Timeout fetching URL: {url}")
43
+ return {"url": url, "error": f"Timeout after {timeout} seconds"}
44
+ except requests.exceptions.RequestException as e:
45
+ logging.error(f"Error fetching URL {url}: {e}")
46
+ return {"url": url, "error": f"Failed to fetch: {e}"}
47
+ except Exception as e:
48
+ logging.error(f"An unexpected error occurred while parsing {url}: {e}")
49
+ return {"url": url, "error": f"Error parsing content: {e}"}
50
+
51
+
52
+ @tool
53
+ def brave_search_tool(query: str, num_results: int = 5, fetch_full_text: bool = False, full_text_timeout: int = 10) -> str:
54
+ """
55
+ Performs a web search using the Brave Search API and returns the results.
56
+ Optionally fetches and extracts text from the top search results.
57
+
58
+ Args:
59
+ query (str): The search query.
60
+ num_results (int): The maximum number of search results to return from Brave Search.
61
+ Defaults to 5.
62
+ fetch_full_text (bool): If True, attempts to fetch and extract text from the URLs
63
+ of the top results. Defaults to False.
64
+ full_text_timeout (int): Timeout in seconds for fetching each full text.
65
+ Defaults to 10 seconds.
66
+
67
+ Returns:
68
+ str: A JSON string of the search results, optionally including extracted text,
69
+ or an error message.
70
+ JSON output is of the form:
71
+ {"title", "url", "snippet", "full_text"}
72
+ """
73
+ api_key = os.getenv("BRAVE_API_KEY")
74
+ if not api_key:
75
+ return json.dumps({"error": "Brave Search API key not found. Please set the BRAVE_API_KEY environment variable."})
76
+
77
+ url = "https://api.search.brave.com/res/v1/web/search"
78
+ headers = {
79
+ "Accept": "application/json",
80
+ "X-Subscription-Token": api_key
81
+ }
82
+ params = {
83
+ "q": query,
84
+ "count": num_results,
85
+ "offset": 0,
86
+ "country": "us",
87
+ "search_lang": "en"
88
+ }
89
+
90
+ try:
91
+ logging.info(f"Initiating Brave Search for query: '{query}' with {num_results} results.")
92
+ response = requests.get(url, headers=headers, params=params, timeout=15) # Brave API call timeout
93
+ response.raise_for_status()
94
+
95
+ data = response.json()
96
+ raw_web_results = []
97
+ if 'web' in data and 'results' in data['web']:
98
+ raw_web_results = data['web']['results']
99
+ else:
100
+ logging.warning("No web results found in Brave Search response.")
101
+ return json.dumps({"error": "No web results found in Brave Search response.", "raw_response": data})
102
+
103
+ formatted_results = []
104
+ urls_to_fetch = []
105
+
106
+ for result in raw_web_results:
107
+ formatted_item = {
108
+ "title": result.get("title"),
109
+ "url": result.get("url"),
110
+ "snippet": result.get("description")
111
+ }
112
+ formatted_results.append(formatted_item)
113
+ if fetch_full_text and result.get("url"):
114
+ urls_to_fetch.append(result["url"])
115
+
116
+ if fetch_full_text and urls_to_fetch:
117
+ logging.info(f"Fetching full text for {len(urls_to_fetch)} URLs with {full_text_timeout}s timeout per URL.")
118
+ # Use ThreadPoolExecutor to fetch URLs concurrently for efficiency
119
+ with ThreadPoolExecutor(max_workers=min(len(urls_to_fetch), 5)) as executor: # Limit concurrent fetches
120
+ future_to_url = {executor.submit(_fetch_and_extract_text, url, full_text_timeout): url for url in urls_to_fetch}
121
+ for future in as_completed(future_to_url):
122
+ url = future_to_url[future]
123
+ try:
124
+ extracted_data = future.result()
125
+ # Find the corresponding result in formatted_results and add extracted text
126
+ for item in formatted_results:
127
+ if item["url"] == url:
128
+ item["full_text"] = extracted_data.get("extracted_text")
129
+ if "error" in extracted_data:
130
+ item["full_text_error"] = extracted_data["error"]
131
+ break
132
+ except Exception as exc:
133
+ logging.error(f"URL {url} generated an exception during full text fetch: {exc}")
134
+ for item in formatted_results:
135
+ if item["url"] == url:
136
+ item["full_text_error"] = f"Failed to get full text due to internal error: {exc}"
137
+ break
138
+
139
+ return json.dumps(formatted_results, indent=2)
140
+
141
+ except requests.exceptions.HTTPError as http_err:
142
+ logging.error(f"HTTP error occurred during Brave Search: {http_err} - Status: {response.status_code}")
143
+ return json.dumps({"error": f"HTTP error occurred with Brave Search: {http_err}", "status_code": response.status_code, "response_text": response.text})
144
+ except requests.exceptions.ConnectionError as conn_err:
145
+ logging.error(f"Connection error occurred during Brave Search: {conn_err}")
146
+ return json.dumps({"error": f"Connection error occurred with Brave Search: {conn_err}"})
147
+ except requests.exceptions.Timeout as timeout_err:
148
+ logging.error(f"Timeout error occurred during Brave Search API call: {timeout_err}")
149
+ return json.dumps({"error": f"Timeout error occurred with Brave Search API: {timeout_err}"})
150
+ except requests.exceptions.RequestException as req_err:
151
+ logging.error(f"An unexpected request error occurred during Brave Search: {req_err}")
152
+ return json.dumps({"error": f"An unexpected request error occurred with Brave Search: {req_err}"})
153
+ except json.JSONDecodeError:
154
+ logging.error("Failed to decode JSON response from Brave Search API.")
155
+ return json.dumps({"error": "Failed to decode JSON response from Brave Search API."})
156
+ except Exception as e:
157
+ logging.error(f"An unexpected error occurred in brave_search_tool: {e}", exc_info=True)
158
+ return json.dumps({"error": f"An unexpected error occurred in brave_search_tool: {e}"})
159
+
160
+ # Example usage (for testing the tool function independently)
161
+ if __name__ == "__main__":
162
+ # For testing, you might temporarily set the API key here or ensure it's in your env
163
+ # os.environ["BRAVE_API_KEY"] = "YOUR_BRAVE_API_KEY" # REMOVE IN PRODUCTION
164
+ # If not set, the tool will return an error about missing key
165
+
166
+ print("--- Testing Brave Search Tool with Full Text Fetch ---")
167
+ search_query = "Impact of AI on job market latest research"
168
+
169
+ # Test 1: Basic search (no full text)
170
+ print("\n--- Test 1: Basic Search ---")
171
+ results_basic = brave_search_tool(search_query, num_results=2, fetch_full_text=False)
172
+ print(results_basic)
173
+
174
+ # Test 2: Search with full text fetching
175
+ print("\n--- Test 2: Search with Full Text Fetch (num_results=2, timeout=5s) ---")
176
+ results_full_text = brave_search_tool(search_query, num_results=2, fetch_full_text=True, full_text_timeout=5)
177
+ print(results_full_text)
178
+
179
+ print("\n--- Test 3: Search with Full Text Fetch (num_results=1, very short timeout) ---")
180
+ results_short_timeout = brave_search_tool("impact of climate change on agriculture", num_results=1, fetch_full_text=True, full_text_timeout=1)
181
+ print(results_short_timeout)
182
+
183
+ print("\n--- Testing with missing API Key (example of error handling) ---")
184
+ original_key = os.getenv("BRAVE_API_KEY")
185
+ if original_key:
186
+ del os.environ["BRAVE_API_KEY"] # Temporarily unset for test
187
+ missing_key_results = brave_search_tool("test")
188
+ print(missing_key_results)
189
+ if original_key:
190
+ os.environ["BRAVE_API_KEY"] = original_key # Restore for other parts of the script
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pembot
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: A Python Package to convert PEM blog content to usseful information by leveraging LLMs
5
5
  Author-email: cyto <aryan_sidhwani@protonmail.com>
6
6
  License-Expression: MIT
@@ -1,17 +1,19 @@
1
1
  pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
2
2
  pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pembot/__init__.py,sha256=oq-JydA8x2prvUKvfrjXWp-GeFLh_qZPesfmB4ad6HE,211
3
+ pembot/__init__.py,sha256=JS8ONln7V9MIxi_JrRdEW9Kc9ygOmpCCRKEYC9TnTjA,211
4
4
  pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
5
5
  pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
6
6
  pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
7
7
  pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
8
- pembot/query.py,sha256=0RBnLDBz8pctbsw9GA2UHG-uZPippKYjBBfBBKe2Oec,8705
9
- pembot/requirements.txt,sha256=vnjZ3uO4Dd8qPFDq1F2qGromJ5jrEBfBeiDwz7rKijk,1360
10
- pembot/.git/COMMIT_EDITMSG,sha256=iRLy0abINakd4FBRWJuD-QQ_WWbXZusnr_9e3wo4d90,38
8
+ pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
9
+ pembot/query.py,sha256=zgfIJsSMDatFPl0Fw3MhK7fO8uBB0Yj4rxEAExqGyGA,18054
10
+ pembot/requirements.txt,sha256=BGGuhM9oXJGN6oueIu3AYmBMUXeo-BoEQFJHHyO1BCc,1508
11
+ pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
12
+ pembot/.git/COMMIT_EDITMSG,sha256=pTsMiZ9dt9Of1JgR5858BXwxO8jn7P0MpLw0pJE7dqc,13
11
13
  pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
12
14
  pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
13
15
  pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
14
- pembot/.git/index,sha256=CdDhKv_jjjaFu1MNjZTrZZ4GF_av7KNYh750IqRkGWg,1814
16
+ pembot/.git/index,sha256=70Iy37BW4GBRzaodLY4qm1hjjcBml0r1cdcFyNX_G_I,1974
15
17
  pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
16
18
  pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
17
19
  pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
@@ -28,10 +30,10 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
28
30
  pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
29
31
  pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
30
32
  pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
31
- pembot/.git/logs/HEAD,sha256=TTHA6MsudjF1a9H4QECSP3XPvB-k2AOsA8mTVCEis_o,2980
32
- pembot/.git/logs/refs/heads/main,sha256=TTHA6MsudjF1a9H4QECSP3XPvB-k2AOsA8mTVCEis_o,2980
33
+ pembot/.git/logs/HEAD,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
34
+ pembot/.git/logs/refs/heads/main,sha256=OrEUcE6427s9cKnTnrPMYZLrJcs4v-PSww3Zq_Tc060,3790
33
35
  pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
34
- pembot/.git/logs/refs/remotes/origin/main,sha256=uxD0NKkje26omv-w0V09LJA81e-ccHAA-FY148tg93g,1606
36
+ pembot/.git/logs/refs/remotes/origin/main,sha256=iJ6dB86rQ2-iqzRSdgRdjkze4t1IGz0MTxou3cwLYE4,2044
35
37
  pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
36
38
  pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
37
39
  pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
@@ -39,6 +41,8 @@ pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa,sha256=hsOHhX0Yajg
39
41
  pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1,sha256=GKt_CAJNOQXwGnoFLuiNpkd0s_hP_UDLKd59VRknYy0,330
40
42
  pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c,sha256=Uk1dStvEBica-t38qHsZZ_4mxvi6b6VA9PaKE4KSunQ,90
41
43
  pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05QL0Do-raB4AtK5QjvKLFBNc0RZqNga9o,244
44
+ pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
45
+ pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
42
46
  pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
43
47
  pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
44
48
  pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
@@ -46,6 +50,7 @@ pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYp
46
50
  pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
47
51
  pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
48
52
  pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
53
+ pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
49
54
  pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515,sha256=A9MNZO3QZ6ghGd1MyfmJ6H3dBTpF4HZcRosVxWytx8E,4077
50
55
  pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f,sha256=omF4gmE9IQFZR8t6ybAKfnW02tdn9ZaVWKRhv_o1V4c,2083
51
56
  pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f,sha256=ygVUpaLo7cxUdIgjFlaBh2BkllV6BIYYkzLIxsPKjWE,4111
@@ -56,23 +61,35 @@ pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0,sha256=TLuVmtSH9K3
56
61
  pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D4JgHkjjqcDQCwuS17w60JRkVr25ZFlcI,117
57
62
  pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
58
63
  pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
64
+ pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
65
+ pembot/.git/objects/4b/c4370a037feed828cca0915ebb0bb94b24a9d4,sha256=jt9lsSz8c3dw9PyfEEtkReCC_8YLXSKuc6ykSJCKZPM,487
59
66
  pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
60
67
  pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
61
68
  pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
69
+ pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
70
+ pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
62
71
  pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
63
72
  pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
64
73
  pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177,sha256=-tFnLFQvYrtkodOVhPK3WUhN8Qg9sQ8VfcEHG9MXdAg,421
65
74
  pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLygONcQ7MPETl9AImF3Iy5tB_KUeCvKyKY,2651
75
+ pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
66
76
  pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
67
77
  pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
78
+ pembot/.git/objects/75/321fbcd2be44a548400fbacbf5bcb71e3810fd,sha256=7AXaYVgItbw3xQiEqeRyO5qdIedIxDoI9hTDn8CBRxM,56
79
+ pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
68
80
  pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
69
81
  pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
70
82
  pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
71
83
  pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
72
84
  pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
85
+ pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
86
+ pembot/.git/objects/8f/c00bf69f4ad3e50c13acc4a0988b6c0fe72b5a,sha256=uJVaujaQWN_NwzK9P0SM7cYp3I6GQFXdlYBPrnqVhcg,159
73
87
  pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
88
+ pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643,sha256=WPgmr5bXli5s8rNdiUQM4IB4o_xyJe6nuI3TG4e5aYs,487
74
89
  pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
75
90
  pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
91
+ pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444,sha256=PZW83V2jXf8-zDoTJqG1jNsCMURU8NUK8ljBD458MK0,3949
92
+ pembot/.git/objects/99/89463f57f1f2931e5973bd543c80f18b0204bc,sha256=RoShFyJfmRnXYXP_y2c_e9KE_Otawr_T1mC22BqDNDI,6472
76
93
  pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456,sha256=xIETiieOoilleucGg7vXOgjZ-v5PI0t34fDJjDD665A,4204
77
94
  pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b,sha256=lh2LurucwRdL6WP8ChgmjXrK2lR7HASIXzt4iHFrTf4,178
78
95
  pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547,sha256=kaU3Z4k6ptIwO8ktcjs2-kshb0bzM4y8Uur-a27_jnk,56
@@ -91,10 +108,12 @@ pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFt
91
108
  pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
92
109
  pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
93
110
  pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
111
+ pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
94
112
  pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
95
113
  pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
96
114
  pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
97
115
  pembot/.git/objects/e0/da740b542afc451c45b9b4be6c0c7a3c79b06c,sha256=oAb2b2VwhPXykdK_ZV8MEFwfy-ZPd2Nja2gAv20U7hc,115
116
+ pembot/.git/objects/e3/da98f3722c2d0c937db0872836fc4491e4487a,sha256=DNdNDoMdjDexgwLErwUZDQCpvq4-QkFHtbVRXW_jKTk,168
98
117
  pembot/.git/objects/e5/3070f2b07f45d031444b09b1b38658f3caf29e,sha256=irJ-z8kPZmg85B0f4TQz73yJoCMWMWsIR3Pi5wx1Dlk,4034
99
118
  pembot/.git/objects/e7/911a702079a6144997ea4e70f59abbe59ec2bc,sha256=r4zY-__F4gSfjE7onRTrcxvv8umXKuPuFzd95AiQ0cs,392
100
119
  pembot/.git/objects/e9/1172752e9a421ae463112d2b0506b37498c98d,sha256=qWZpM65kQPSxlVHAtyzH5L-j3rL-b9Jw-A7YBm4NMlI,249
@@ -102,8 +121,11 @@ pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58,sha256=lXbMvL_xl8P
102
121
  pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238,sha256=ltEINFUpQP86CkE4nAT1Afegz3ytY3Nlx1P6ibTFEbo,305
103
122
  pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8,sha256=fFYq_ODekFhF9SwBL9GP_fGDsNavXVVOuI6kmnHlkiY,5140
104
123
  pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d,sha256=mrF9jZHY2oJm8tkd8nQdMgUPbrZfENOFaR3mvbwi1dg,187
124
+ pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37,sha256=09VHFwSnSxOExWa3sKBzWAfSMyx5INI9KK2mstW8-xs,203
125
+ pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834,sha256=HvfHm5CnbujMp-uu0aucVzpaWCy2aKBzaXknvlYCnjg,3937
105
126
  pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1,sha256=2cORujYAURTvGACujtNJvP1f-s7rt-UkrtPBuKfDR-g,419
106
127
  pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552,sha256=Ugf-wTcOlwZXmxmbnjEc3iOK3dDRntTVONOJsrOjl3E,205
128
+ pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e,sha256=e-Ff8_3sWUqd5ciQJLbXohN-E2ShUWKUiYZCHMitMus,290
107
129
  pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8,sha256=Y5WfCEpk121Cy9gaFfSY4ZkUz54qu45osRZdTy9kZ8c,393
108
130
  pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729,sha256=MrRy-fBSXZcp-yJM3e-tH3wCdUS-VFX6rW_mKTa-0_Y,419
109
131
  pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1,sha256=hBVqthGLEEX2NmdD51kjiIeTd5CP5MU8it41zHlW3m0,419
@@ -114,20 +136,21 @@ pembot/.git/objects/fb/6c90c9ce5e0cdfbe074a3f060afc66f62eefde,sha256=Bmrnc27anjq
114
136
  pembot/.git/objects/fc/988aab7e2d46396dc595ad24345e8e77dda0e4,sha256=8982HA0S9bCm0JQsrgWzIkRNIiGfaDRUUrXsnuVjE6A,196
115
137
  pembot/.git/objects/fc/e56f1e09d09a05b9babf796fb40bece176f3a2,sha256=g-IVuI_8YBn048qkyyY6Vpn8zfP8UCJxUs0F5bDD6qM,1114
116
138
  pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5,sha256=cWWr70MLaXk82O6EIPpYlGUd5P30IUEELrbrh2MrUXE,115
139
+ pembot/.git/objects/fe/cc5d8154b1e77e4c6beb23ce9cbe8fea55d34d,sha256=0it_Z3Lk5Mj06l0fJTVXs4RP4-kw_kFFJqFApwtKDfU,115
117
140
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
118
141
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
119
142
  pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
120
- pembot/.git/refs/heads/main,sha256=UNQiShSvoG1AvbUvWeJLlN5V3ZHJSDarZ4vFLDI-Xh8,41
143
+ pembot/.git/refs/heads/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
121
144
  pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
122
- pembot/.git/refs/remotes/origin/main,sha256=UNQiShSvoG1AvbUvWeJLlN5V3ZHJSDarZ4vFLDI-Xh8,41
145
+ pembot/.git/refs/remotes/origin/main,sha256=XQJQDbhwKcxH2eABZeHWgpv9Yedy7ytKBo0tRWo03ZM,41
123
146
  pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
147
  pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
125
148
  pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
126
149
  pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
127
- pembot/TextEmbedder/mongodb_embedder.py,sha256=i5M56jaC2t3yUinqlXfC70GaTos4kSR_1LGsbljpEhU,10762
150
+ pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
128
151
  pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
129
152
  pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
130
- pembot/config/config.yaml,sha256=-6niASpCQzNynkHSTHWi3MawUWUHpOVuNg0Fhmun30M,156
153
+ pembot/config/config.yaml,sha256=uLE_cngQbhDN3pwqRaZC60yhXP5dSFUYIWQKZ0qkhFM,156
131
154
  pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
132
155
  pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
133
156
  pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -183,7 +206,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
183
206
  pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
184
207
  pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
185
208
  pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
186
- pembot-0.1.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
187
- pembot-0.1.0.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
188
- pembot-0.1.0.dist-info/METADATA,sha256=3tLhZEY9302ZgIQ2itAMbcM354jWyEcZ_Z3h2FeD1-8,313
189
- pembot-0.1.0.dist-info/RECORD,,
209
+ pembot-0.1.2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
210
+ pembot-0.1.2.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
211
+ pembot-0.1.2.dist-info/METADATA,sha256=2Eaou5uE_IQB3jUmUnvxXuY_ifpKbo9ZhZpdVjj2DMk,313
212
+ pembot-0.1.2.dist-info/RECORD,,
File without changes