pembot 0.1.0__py2.py3-none-any.whl → 0.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pembot might be problematic. Click here for more details.
- pembot/.git/COMMIT_EDITMSG +1 -1
- pembot/.git/index +0 -0
- pembot/.git/logs/HEAD +2 -0
- pembot/.git/logs/refs/heads/main +2 -0
- pembot/.git/logs/refs/remotes/origin/main +2 -0
- pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee +0 -0
- pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200 +0 -0
- pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba +0 -0
- pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c +0 -0
- pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e +0 -0
- pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7 +0 -0
- pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d +0 -0
- pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5 +1 -0
- pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a +0 -0
- pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643 +0 -0
- pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444 +0 -0
- pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511 +0 -0
- pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37 +0 -0
- pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834 +0 -0
- pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e +0 -0
- pembot/.git/refs/heads/main +1 -1
- pembot/.git/refs/remotes/origin/main +1 -1
- pembot/TextEmbedder/mongodb_embedder.py +3 -0
- pembot/__init__.py +1 -1
- pembot/config/config.yaml +1 -1
- pembot/pyrightconfig.json +8 -0
- pembot/query.py +279 -1
- pembot/requirements.txt +1 -1
- pembot/search.py +190 -0
- {pembot-0.1.0.dist-info → pembot-0.1.1.dist-info}/METADATA +1 -1
- {pembot-0.1.0.dist-info → pembot-0.1.1.dist-info}/RECORD +33 -16
- {pembot-0.1.0.dist-info → pembot-0.1.1.dist-info}/WHEEL +0 -0
- {pembot-0.1.0.dist-info → pembot-0.1.1.dist-info}/licenses/LICENSE +0 -0
pembot/.git/COMMIT_EDITMSG
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
pembot/.git/index
CHANGED
|
Binary file
|
pembot/.git/logs/HEAD
CHANGED
|
@@ -10,3 +10,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
10
10
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
|
|
11
11
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
|
|
12
12
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
|
|
13
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
pembot/.git/logs/refs/heads/main
CHANGED
|
@@ -10,3 +10,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
10
10
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896700 +0530 commit: added model name to convertor
|
|
11
11
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081786 +0530 commit: upgraded to 0.0.8; added ability to classify the document chunks into one more filterable parameter 'type'; other query related improvements..
|
|
12
12
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136060 +0530 commit: fixed the type_info array pushing bug
|
|
13
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236415 +0530 commit: added prompt prefixing option to prime the llm with some more data; added the option for no-filter global search in a collection in search_within_document(s)
|
|
14
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858244 +0530 commit: added smolagent functionality with tool extension capability to query.py and added a caveat in search_within_documents by which you can just return no context if you set limit= 0, effectively doing a Non-contextual prompt
|
|
@@ -9,3 +9,5 @@ eb75e1c49f1e5b79dca17ccdbec8067756523238 0bdb4169fc0f312b8698f1df17a258fff163aea
|
|
|
9
9
|
ef0503a60244391590b16042019032e91d7cc30d af80ddb5890f062e364ea8ade2d602df4e12de8c cyto <silverstone965@gmail.com> 1751896713 +0530 update by push
|
|
10
10
|
af80ddb5890f062e364ea8ade2d602df4e12de8c 0d28f73897db0c9a9351ee9e64d2a0fe27db2705 cyto <silverstone965@gmail.com> 1752081801 +0530 update by push
|
|
11
11
|
0d28f73897db0c9a9351ee9e64d2a0fe27db2705 a898d2c3947d30d8be64bd2bbcef68f956d5456b cyto <silverstone965@gmail.com> 1752136119 +0530 update by push
|
|
12
|
+
a898d2c3947d30d8be64bd2bbcef68f956d5456b 784aa28d912b66e07748483efe0326c70d7541a5 cyto <silverstone965@gmail.com> 1752236436 +0530 update by push
|
|
13
|
+
784aa28d912b66e07748483efe0326c70d7541a5 f214d4d56726e2928479c5948bd88e038cf70b2e cyto <silverstone965@gmail.com> 1752858280 +0530 update by push
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
x�PIn� �:��U�*P��7�r�$H�#��p�����+��~�m9�PaR��"��~vFe�5Z��"o�Z6�O=f�۱P���h7�Ɉ�M��)��qY,y����I!���n\�~W��#�*G�LF��5a�ϖ���I�A�c/��khX���#{��Vȇ��W��Ђ�%$��Ę�3�
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
pembot/.git/refs/heads/main
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
f214d4d56726e2928479c5948bd88e038cf70b2e
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
f214d4d56726e2928479c5948bd88e038cf70b2e
|
|
@@ -49,6 +49,9 @@ def search_within_document(
|
|
|
49
49
|
A list of dictionaries, where each dictionary represents a matching chunk
|
|
50
50
|
from the specified document, including its text, docId, and score.
|
|
51
51
|
"""
|
|
52
|
+
if limit < 1:
|
|
53
|
+
return []
|
|
54
|
+
|
|
52
55
|
embeddings_collection = db_client[embeddings_collection_name]
|
|
53
56
|
|
|
54
57
|
print(f"Searching within document (docId: {document_name_id})...")
|
pembot/__init__.py
CHANGED
pembot/config/config.yaml
CHANGED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
{
|
|
2
|
+
"venvPath": "..", // If your venv is a direct child like ./venv
|
|
3
|
+
"venv": "venvpem" // The name of your venv folder
|
|
4
|
+
// Or if you want to be explicit with the full path
|
|
5
|
+
// "pythonVersion": "3.9", // Or your specific version
|
|
6
|
+
// "pythonPlatform": "Linux", // Or "Windows", "Darwin"
|
|
7
|
+
// "pythonPath": "/path/to/your/project/.venv/bin/python" // Absolute path
|
|
8
|
+
}
|
pembot/query.py
CHANGED
|
@@ -3,14 +3,34 @@ from huggingface_hub.inference._generated.types.chat_completion import ChatCompl
|
|
|
3
3
|
from huggingface_hub.inference._providers import PROVIDER_T
|
|
4
4
|
import ollama
|
|
5
5
|
import re
|
|
6
|
+
from smolagents import InferenceClientModel, ToolCallingAgent, ActionStep, TaskStep
|
|
7
|
+
from smolagents.default_tools import FinalAnswerTool, UserInputTool, VisitWebpageTool
|
|
8
|
+
from pymongo import MongoClient
|
|
9
|
+
from typing import Callable, Dict, Any, Optional, List
|
|
10
|
+
import uuid
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from smolagents.monitoring import Timing
|
|
6
13
|
|
|
7
|
-
|
|
14
|
+
|
|
15
|
+
from search import brave_search_tool
|
|
8
16
|
from pembot.TextEmbedder.mongodb_embedder import search_within_document
|
|
9
17
|
import numpy as np
|
|
10
18
|
from huggingface_hub import InferenceClient
|
|
11
19
|
from google import genai
|
|
12
20
|
from google.genai import types
|
|
13
21
|
import time
|
|
22
|
+
from datetime import timezone
|
|
23
|
+
|
|
24
|
+
init_timing= {
|
|
25
|
+
"start_time": 0.0,
|
|
26
|
+
"end_time": 0.0,
|
|
27
|
+
"duration": 0.0,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
mongodb_uri= environ['MONGODB_SCHEMER']
|
|
31
|
+
mc = MongoClient(mongodb_uri)
|
|
32
|
+
db = mc["schemerdb"]
|
|
33
|
+
collection = db["chat_history"] # Collection name
|
|
14
34
|
|
|
15
35
|
from pembot.TextEmbedder.mongodb_index_creator import create_vector_index
|
|
16
36
|
|
|
@@ -214,5 +234,263 @@ def remove_bs(text):
|
|
|
214
234
|
return ""
|
|
215
235
|
|
|
216
236
|
|
|
237
|
+
def smolquery(message: str, external_tools: list[Callable] = [], chat_id: str | None = None, allow_web_search= True) -> Dict[str, Any]:
|
|
238
|
+
"""
|
|
239
|
+
Run agent with chat history support.
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
message: User's message
|
|
243
|
+
external_tools: List of external tools to use
|
|
244
|
+
chat_id: Optional chat ID for continuing conversation
|
|
245
|
+
allow_web_search: Boolean to decide whether to include brave tool to fetch search results
|
|
246
|
+
and the Visiting Web Page Tool in the agent's toolbox
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
Dictionary containing response and chat_id
|
|
250
|
+
"""
|
|
251
|
+
alltools = []
|
|
252
|
+
|
|
253
|
+
if allow_web_search:
|
|
254
|
+
alltools.extend([
|
|
255
|
+
brave_search_tool,
|
|
256
|
+
VisitWebpageTool(),
|
|
257
|
+
])
|
|
258
|
+
|
|
259
|
+
alltools.extend(external_tools)
|
|
260
|
+
|
|
261
|
+
model = InferenceClientModel(
|
|
262
|
+
token= environ["HF_TOKEN"],
|
|
263
|
+
# model_id= "HuggingFaceTB/SmolLM3-3B"
|
|
264
|
+
model_id= "deepseek-ai/DeepSeek-R1-0528"
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
agent = ToolCallingAgent(tools=alltools, model=model, add_base_tools=False)
|
|
268
|
+
|
|
269
|
+
# Handle chat history
|
|
270
|
+
if chat_id:
|
|
271
|
+
# Load existing conversation
|
|
272
|
+
chat_doc = collection.find_one({"_id": chat_id})
|
|
273
|
+
if chat_doc:
|
|
274
|
+
# Restore agent memory from database
|
|
275
|
+
restore_agent_memory(agent, chat_doc["messages"])
|
|
276
|
+
else:
|
|
277
|
+
# Chat ID provided but not found, create new one
|
|
278
|
+
chat_id = str(uuid.uuid4())
|
|
279
|
+
else:
|
|
280
|
+
# Create new chat
|
|
281
|
+
chat_id = str(uuid.uuid4())
|
|
282
|
+
|
|
283
|
+
# Run the agent
|
|
284
|
+
response = agent.run(message, reset= False)
|
|
285
|
+
|
|
286
|
+
# Extract the final answer from the response
|
|
287
|
+
final_answer = extract_final_answer(response)
|
|
288
|
+
|
|
289
|
+
# Save conversation to database
|
|
290
|
+
save_chat_history(chat_id, agent, message, final_answer)
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
"response": final_answer,
|
|
294
|
+
"chat_id": chat_id
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
def extract_final_answer(response: Any) -> str:
|
|
298
|
+
"""
|
|
299
|
+
Extract the final answer from various response types.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
response: Response from agent.run()
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Final answer as string
|
|
306
|
+
"""
|
|
307
|
+
# Handle RunResult object
|
|
308
|
+
if hasattr(response, 'final_answer'):
|
|
309
|
+
return str(response.final_answer)
|
|
310
|
+
|
|
311
|
+
# Handle direct string response
|
|
312
|
+
if isinstance(response, str):
|
|
313
|
+
return response
|
|
314
|
+
|
|
315
|
+
# Handle generator response
|
|
316
|
+
if hasattr(response, '__iter__') and not isinstance(response, (str, bytes)):
|
|
317
|
+
final_step = None
|
|
318
|
+
for step in response:
|
|
319
|
+
final_step = step
|
|
320
|
+
# Look for FinalAnswerStep
|
|
321
|
+
if hasattr(step, 'final_answer'):
|
|
322
|
+
return str(step.final_answer)
|
|
323
|
+
|
|
324
|
+
# If no final answer found, return last step as string
|
|
325
|
+
if final_step is not None:
|
|
326
|
+
return str(final_step)
|
|
327
|
+
|
|
328
|
+
# Fallback to string conversion
|
|
329
|
+
return str(response)
|
|
330
|
+
|
|
331
|
+
def restore_agent_memory(agent: ToolCallingAgent, messages: List[Dict[str, Any]]) -> None:
|
|
332
|
+
"""
|
|
333
|
+
Restore agent memory from stored messages.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
agent: The agent instance
|
|
337
|
+
messages: List of stored messages
|
|
338
|
+
"""
|
|
339
|
+
for msg in messages:
|
|
340
|
+
if msg["type"] == "task":
|
|
341
|
+
# Add task step
|
|
342
|
+
task_step = TaskStep(
|
|
343
|
+
task=msg["content"],
|
|
344
|
+
task_images=msg.get("images", [])
|
|
345
|
+
)
|
|
346
|
+
agent.memory.steps.append(task_step)
|
|
347
|
+
elif msg["type"] == "action":
|
|
348
|
+
# Add action step with only the required parameters
|
|
349
|
+
# ActionStep objects are typically created during execution
|
|
350
|
+
# and contain read-only information, so we create a minimal one
|
|
351
|
+
action_saved_timing= msg.get("timing", init_timing)
|
|
352
|
+
action_step = ActionStep(
|
|
353
|
+
observations= msg.get("observations", ""),
|
|
354
|
+
step_number=msg["step_number"],
|
|
355
|
+
observations_images=msg.get("observations_images", []),
|
|
356
|
+
timing=Timing(
|
|
357
|
+
start_time= action_saved_timing.get("start_time", 0.0),
|
|
358
|
+
end_time= action_saved_timing.get("end_time", 0.0)
|
|
359
|
+
)
|
|
360
|
+
)
|
|
361
|
+
agent.memory.steps.append(action_step)
|
|
362
|
+
|
|
363
|
+
def save_chat_history(chat_id: str, agent: ToolCallingAgent, user_message: str, agent_response: str) -> None:
|
|
364
|
+
"""
|
|
365
|
+
Save conversation history to MongoDB.
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
chat_id: Chat session ID
|
|
369
|
+
agent: Agent instance with memory
|
|
370
|
+
user_message: Latest user message
|
|
371
|
+
agent_response: Agent's response
|
|
372
|
+
"""
|
|
373
|
+
# Convert agent memory to serializable format
|
|
374
|
+
messages = []
|
|
375
|
+
|
|
376
|
+
for step in agent.memory.steps:
|
|
377
|
+
if isinstance(step, TaskStep):
|
|
378
|
+
messages.append({
|
|
379
|
+
"type": "task",
|
|
380
|
+
"content": step.task,
|
|
381
|
+
"images": step.task_images if hasattr(step, 'task_images') else [],
|
|
382
|
+
"timestamp": datetime.now(timezone.utc)
|
|
383
|
+
})
|
|
384
|
+
elif isinstance(step, ActionStep):
|
|
385
|
+
msg = {
|
|
386
|
+
"type": "action",
|
|
387
|
+
"step_number": step.step_number,
|
|
388
|
+
"observations_images": step.observations_images if hasattr(step, 'observations_images') else [],
|
|
389
|
+
"timing": step.timing.dict() if hasattr(step, 'timing') else init_timing,
|
|
390
|
+
"timestamp": datetime.now(timezone.utc)
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
# Store any additional attributes that might be accessible
|
|
394
|
+
# Note: ActionStep attributes are typically read-only
|
|
395
|
+
if hasattr(step, 'observations') and step.observations:
|
|
396
|
+
msg["observations"] = str(step.observations)
|
|
397
|
+
if hasattr(step, 'error') and step.error:
|
|
398
|
+
msg["error"] = str(step.error)
|
|
399
|
+
|
|
400
|
+
messages.append(msg)
|
|
401
|
+
|
|
402
|
+
# Add the latest response
|
|
403
|
+
messages.append({
|
|
404
|
+
"type": "response",
|
|
405
|
+
"content": agent_response,
|
|
406
|
+
"timestamp": datetime.now(timezone.utc)
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
# Update or insert chat document
|
|
410
|
+
collection.update_one(
|
|
411
|
+
{"_id": chat_id},
|
|
412
|
+
{
|
|
413
|
+
"$set": {
|
|
414
|
+
"messages": messages,
|
|
415
|
+
"last_updated": datetime.now(timezone.utc)
|
|
416
|
+
}
|
|
417
|
+
},
|
|
418
|
+
upsert=True
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
def get_chat_history(chat_id: str) -> Optional[List[Dict[str, Any]]]:
|
|
422
|
+
"""
|
|
423
|
+
Retrieve chat history by ID.
|
|
424
|
+
|
|
425
|
+
Args:
|
|
426
|
+
chat_id: Chat session ID
|
|
427
|
+
|
|
428
|
+
Returns:
|
|
429
|
+
List of messages or None if not found
|
|
430
|
+
"""
|
|
431
|
+
chat_doc = collection.find_one({"_id": chat_id})
|
|
432
|
+
return chat_doc["messages"] if chat_doc else None
|
|
433
|
+
|
|
434
|
+
def delete_chat_history(chat_id: str) -> bool:
|
|
435
|
+
"""
|
|
436
|
+
Delete chat history by ID.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
chat_id: Chat session ID
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
True if deleted, False if not found
|
|
443
|
+
"""
|
|
444
|
+
result = collection.delete_one({"_id": chat_id})
|
|
445
|
+
return result.deleted_count > 0
|
|
446
|
+
|
|
447
|
+
def list_chat_sessions() -> List[Dict[str, Any]]:
|
|
448
|
+
"""
|
|
449
|
+
List all chat sessions with basic info.
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
List of chat sessions with ID and last updated time
|
|
453
|
+
"""
|
|
454
|
+
sessions = []
|
|
455
|
+
for doc in collection.find({}, {"_id": 1, "last_updated": 1, "messages": {"$slice": 1}}):
|
|
456
|
+
first_message = doc["messages"][0] if doc["messages"] else {}
|
|
457
|
+
sessions.append({
|
|
458
|
+
"chat_id": doc["_id"],
|
|
459
|
+
"last_updated": doc.get("last_updated"),
|
|
460
|
+
"first_message": first_message.get("content", "")[:100] + "..." if len(first_message.get("content", "")) > 100 else first_message.get("content", "")
|
|
461
|
+
})
|
|
462
|
+
return sessions
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
# # First message - creates new chat
|
|
466
|
+
# result1 = smolquery("Hello, what's the weather like?", [])
|
|
467
|
+
# print(f"Response: {result1['response']}")
|
|
468
|
+
# print(f"Chat ID: {result1['chat_id']}")
|
|
469
|
+
|
|
470
|
+
# # Second message - continues the conversation
|
|
471
|
+
# result2 = smolquery("Thanks, now tell me about Python programming", [], chat_id=result1['chat_id'])
|
|
472
|
+
# print(f"Response: {result2['response']}")
|
|
473
|
+
# print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
|
|
474
|
+
|
|
475
|
+
# # Retrieve chat history
|
|
476
|
+
# history = get_chat_history(result1['chat_id'])
|
|
477
|
+
# print(f"Chat history length: {len(history) if history else 0}")
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
|
|
217
481
|
if __name__ == "__main__":
|
|
218
482
|
print("hemlo worls")
|
|
483
|
+
|
|
484
|
+
# result1 = smolquery("Did i tell you to do something regarding stocks before? What do you conclude?", allow_web_search= False, chat_id= "a52ab59e-d6d0-4089-a963-61e8876244e0")
|
|
485
|
+
result1 = smolquery("How has NIFTY 50 been doing past 3 months?")
|
|
486
|
+
print(f"Response: {result1['response']}")
|
|
487
|
+
print(f"Chat ID: {result1['chat_id']}")
|
|
488
|
+
|
|
489
|
+
# # Second message - continues the conversation
|
|
490
|
+
result2 = smolquery("now tell me about other indices in the same country", chat_id=result1['chat_id'])
|
|
491
|
+
print(f"Response: {result2['response']}")
|
|
492
|
+
print(f"Chat ID: {result2['chat_id']}") # Should be the same as result1['chat_id']
|
|
493
|
+
|
|
494
|
+
# # Retrieve chat history
|
|
495
|
+
history = get_chat_history(result1['chat_id'])
|
|
496
|
+
print(f"Chat history length: {len(history) if history else 0}")
|
pembot/requirements.txt
CHANGED
pembot/search.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import requests
|
|
3
|
+
import json
|
|
4
|
+
from bs4 import BeautifulSoup
|
|
5
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
import logging
|
|
7
|
+
from smolagents import tool
|
|
8
|
+
|
|
9
|
+
# Configure logging for better visibility
|
|
10
|
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
11
|
+
|
|
12
|
+
def _fetch_and_extract_text(url: str, timeout: int = 10) -> dict:
|
|
13
|
+
"""
|
|
14
|
+
Fetches the content of a URL and extracts all visible text (excluding HTML/CSS).
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
url (str): The URL to fetch.
|
|
18
|
+
timeout (int): Timeout in seconds for the HTTP request.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
dict: A dictionary containing the URL and its extracted text, or an error.
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
logging.info(f"Attempting to fetch and parse URL: {url}")
|
|
25
|
+
response = requests.get(url, timeout=timeout)
|
|
26
|
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
27
|
+
|
|
28
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
|
29
|
+
|
|
30
|
+
# Remove script and style tags
|
|
31
|
+
for script_or_style in soup(['script', 'style']):
|
|
32
|
+
script_or_style.extract()
|
|
33
|
+
|
|
34
|
+
# Get text, strip whitespace, and handle line breaks
|
|
35
|
+
text = soup.get_text(separator='\n', strip=True)
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"url": url,
|
|
39
|
+
"extracted_text": text
|
|
40
|
+
}
|
|
41
|
+
except requests.exceptions.Timeout:
|
|
42
|
+
logging.warning(f"Timeout fetching URL: {url}")
|
|
43
|
+
return {"url": url, "error": f"Timeout after {timeout} seconds"}
|
|
44
|
+
except requests.exceptions.RequestException as e:
|
|
45
|
+
logging.error(f"Error fetching URL {url}: {e}")
|
|
46
|
+
return {"url": url, "error": f"Failed to fetch: {e}"}
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logging.error(f"An unexpected error occurred while parsing {url}: {e}")
|
|
49
|
+
return {"url": url, "error": f"Error parsing content: {e}"}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@tool
|
|
53
|
+
def brave_search_tool(query: str, num_results: int = 5, fetch_full_text: bool = False, full_text_timeout: int = 10) -> str:
|
|
54
|
+
"""
|
|
55
|
+
Performs a web search using the Brave Search API and returns the results.
|
|
56
|
+
Optionally fetches and extracts text from the top search results.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
query (str): The search query.
|
|
60
|
+
num_results (int): The maximum number of search results to return from Brave Search.
|
|
61
|
+
Defaults to 5.
|
|
62
|
+
fetch_full_text (bool): If True, attempts to fetch and extract text from the URLs
|
|
63
|
+
of the top results. Defaults to False.
|
|
64
|
+
full_text_timeout (int): Timeout in seconds for fetching each full text.
|
|
65
|
+
Defaults to 10 seconds.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
str: A JSON string of the search results, optionally including extracted text,
|
|
69
|
+
or an error message.
|
|
70
|
+
JSON output is of the form:
|
|
71
|
+
{"title", "url", "snippet", "full_text"}
|
|
72
|
+
"""
|
|
73
|
+
api_key = os.getenv("BRAVE_API_KEY")
|
|
74
|
+
if not api_key:
|
|
75
|
+
return json.dumps({"error": "Brave Search API key not found. Please set the BRAVE_API_KEY environment variable."})
|
|
76
|
+
|
|
77
|
+
url = "https://api.search.brave.com/res/v1/web/search"
|
|
78
|
+
headers = {
|
|
79
|
+
"Accept": "application/json",
|
|
80
|
+
"X-Subscription-Token": api_key
|
|
81
|
+
}
|
|
82
|
+
params = {
|
|
83
|
+
"q": query,
|
|
84
|
+
"count": num_results,
|
|
85
|
+
"offset": 0,
|
|
86
|
+
"country": "us",
|
|
87
|
+
"search_lang": "en"
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
logging.info(f"Initiating Brave Search for query: '{query}' with {num_results} results.")
|
|
92
|
+
response = requests.get(url, headers=headers, params=params, timeout=15) # Brave API call timeout
|
|
93
|
+
response.raise_for_status()
|
|
94
|
+
|
|
95
|
+
data = response.json()
|
|
96
|
+
raw_web_results = []
|
|
97
|
+
if 'web' in data and 'results' in data['web']:
|
|
98
|
+
raw_web_results = data['web']['results']
|
|
99
|
+
else:
|
|
100
|
+
logging.warning("No web results found in Brave Search response.")
|
|
101
|
+
return json.dumps({"error": "No web results found in Brave Search response.", "raw_response": data})
|
|
102
|
+
|
|
103
|
+
formatted_results = []
|
|
104
|
+
urls_to_fetch = []
|
|
105
|
+
|
|
106
|
+
for result in raw_web_results:
|
|
107
|
+
formatted_item = {
|
|
108
|
+
"title": result.get("title"),
|
|
109
|
+
"url": result.get("url"),
|
|
110
|
+
"snippet": result.get("description")
|
|
111
|
+
}
|
|
112
|
+
formatted_results.append(formatted_item)
|
|
113
|
+
if fetch_full_text and result.get("url"):
|
|
114
|
+
urls_to_fetch.append(result["url"])
|
|
115
|
+
|
|
116
|
+
if fetch_full_text and urls_to_fetch:
|
|
117
|
+
logging.info(f"Fetching full text for {len(urls_to_fetch)} URLs with {full_text_timeout}s timeout per URL.")
|
|
118
|
+
# Use ThreadPoolExecutor to fetch URLs concurrently for efficiency
|
|
119
|
+
with ThreadPoolExecutor(max_workers=min(len(urls_to_fetch), 5)) as executor: # Limit concurrent fetches
|
|
120
|
+
future_to_url = {executor.submit(_fetch_and_extract_text, url, full_text_timeout): url for url in urls_to_fetch}
|
|
121
|
+
for future in as_completed(future_to_url):
|
|
122
|
+
url = future_to_url[future]
|
|
123
|
+
try:
|
|
124
|
+
extracted_data = future.result()
|
|
125
|
+
# Find the corresponding result in formatted_results and add extracted text
|
|
126
|
+
for item in formatted_results:
|
|
127
|
+
if item["url"] == url:
|
|
128
|
+
item["full_text"] = extracted_data.get("extracted_text")
|
|
129
|
+
if "error" in extracted_data:
|
|
130
|
+
item["full_text_error"] = extracted_data["error"]
|
|
131
|
+
break
|
|
132
|
+
except Exception as exc:
|
|
133
|
+
logging.error(f"URL {url} generated an exception during full text fetch: {exc}")
|
|
134
|
+
for item in formatted_results:
|
|
135
|
+
if item["url"] == url:
|
|
136
|
+
item["full_text_error"] = f"Failed to get full text due to internal error: {exc}"
|
|
137
|
+
break
|
|
138
|
+
|
|
139
|
+
return json.dumps(formatted_results, indent=2)
|
|
140
|
+
|
|
141
|
+
except requests.exceptions.HTTPError as http_err:
|
|
142
|
+
logging.error(f"HTTP error occurred during Brave Search: {http_err} - Status: {response.status_code}")
|
|
143
|
+
return json.dumps({"error": f"HTTP error occurred with Brave Search: {http_err}", "status_code": response.status_code, "response_text": response.text})
|
|
144
|
+
except requests.exceptions.ConnectionError as conn_err:
|
|
145
|
+
logging.error(f"Connection error occurred during Brave Search: {conn_err}")
|
|
146
|
+
return json.dumps({"error": f"Connection error occurred with Brave Search: {conn_err}"})
|
|
147
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
148
|
+
logging.error(f"Timeout error occurred during Brave Search API call: {timeout_err}")
|
|
149
|
+
return json.dumps({"error": f"Timeout error occurred with Brave Search API: {timeout_err}"})
|
|
150
|
+
except requests.exceptions.RequestException as req_err:
|
|
151
|
+
logging.error(f"An unexpected request error occurred during Brave Search: {req_err}")
|
|
152
|
+
return json.dumps({"error": f"An unexpected request error occurred with Brave Search: {req_err}"})
|
|
153
|
+
except json.JSONDecodeError:
|
|
154
|
+
logging.error("Failed to decode JSON response from Brave Search API.")
|
|
155
|
+
return json.dumps({"error": "Failed to decode JSON response from Brave Search API."})
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logging.error(f"An unexpected error occurred in brave_search_tool: {e}", exc_info=True)
|
|
158
|
+
return json.dumps({"error": f"An unexpected error occurred in brave_search_tool: {e}"})
|
|
159
|
+
|
|
160
|
+
# Example usage (for testing the tool function independently)
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
# For testing, you might temporarily set the API key here or ensure it's in your env
|
|
163
|
+
# os.environ["BRAVE_API_KEY"] = "YOUR_BRAVE_API_KEY" # REMOVE IN PRODUCTION
|
|
164
|
+
# If not set, the tool will return an error about missing key
|
|
165
|
+
|
|
166
|
+
print("--- Testing Brave Search Tool with Full Text Fetch ---")
|
|
167
|
+
search_query = "Impact of AI on job market latest research"
|
|
168
|
+
|
|
169
|
+
# Test 1: Basic search (no full text)
|
|
170
|
+
print("\n--- Test 1: Basic Search ---")
|
|
171
|
+
results_basic = brave_search_tool(search_query, num_results=2, fetch_full_text=False)
|
|
172
|
+
print(results_basic)
|
|
173
|
+
|
|
174
|
+
# Test 2: Search with full text fetching
|
|
175
|
+
print("\n--- Test 2: Search with Full Text Fetch (num_results=2, timeout=5s) ---")
|
|
176
|
+
results_full_text = brave_search_tool(search_query, num_results=2, fetch_full_text=True, full_text_timeout=5)
|
|
177
|
+
print(results_full_text)
|
|
178
|
+
|
|
179
|
+
print("\n--- Test 3: Search with Full Text Fetch (num_results=1, very short timeout) ---")
|
|
180
|
+
results_short_timeout = brave_search_tool("impact of climate change on agriculture", num_results=1, fetch_full_text=True, full_text_timeout=1)
|
|
181
|
+
print(results_short_timeout)
|
|
182
|
+
|
|
183
|
+
print("\n--- Testing with missing API Key (example of error handling) ---")
|
|
184
|
+
original_key = os.getenv("BRAVE_API_KEY")
|
|
185
|
+
if original_key:
|
|
186
|
+
del os.environ["BRAVE_API_KEY"] # Temporarily unset for test
|
|
187
|
+
missing_key_results = brave_search_tool("test")
|
|
188
|
+
print(missing_key_results)
|
|
189
|
+
if original_key:
|
|
190
|
+
os.environ["BRAVE_API_KEY"] = original_key # Restore for other parts of the script
|
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
pembot/.gitignore,sha256=_7FTsZokJ_pzEyyPjOsGw5x5Xx3gUBFaafs7UlPsv9E,98
|
|
2
2
|
pembot/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
3
|
-
pembot/__init__.py,sha256=
|
|
3
|
+
pembot/__init__.py,sha256=COex4004l8eKvUqKnQZnBRPIfL7zpiB2jfAGt4wG-7U,211
|
|
4
4
|
pembot/gartner.py,sha256=3ALknQ5mSXIimmwCa3JFDzB_EW2hHEcQO1T2odyBquk,5408
|
|
5
5
|
pembot/main.py,sha256=lZLIV8XPonvNoY4LVS-5fct1y9URMXWoSGJUKMw3Yg8,9667
|
|
6
6
|
pembot/output_structure_local.py,sha256=YfpHzfTNeLMSsB_CjAamha9D6Iz7E1IC-tW9xPCMWFc,3000
|
|
7
7
|
pembot/pem.py,sha256=mv6iGcN1peSY7z2dtCQ_BKj31EFBNfczBhps_d-0XDo,6377
|
|
8
|
-
pembot/
|
|
9
|
-
pembot/
|
|
10
|
-
pembot
|
|
8
|
+
pembot/pyrightconfig.json,sha256=j2O2tc8Z-Zu7hEnhN9neoKk6-iLkAlp4qOmAxFyHB7Y,368
|
|
9
|
+
pembot/query.py,sha256=G9DDg8kbmbTNS3IpU5_IyUdHAlCzpdZfletCcriSyZQ,18079
|
|
10
|
+
pembot/requirements.txt,sha256=fbPd8A0hzQR3VH5ygBQJ5xQa0hNJsqsRRESPadfgje4,1360
|
|
11
|
+
pembot/search.py,sha256=IW0F8QjE-HSYP47v5P9EqfnzKgFEf5CGxeICtHDDrkE,9137
|
|
12
|
+
pembot/.git/COMMIT_EDITMSG,sha256=DMeQ0P82ePwVToqD2zesvIk6Q-7CURTNfdluklt857c,222
|
|
11
13
|
pembot/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
|
12
14
|
pembot/.git/config,sha256=ZFl9d2GyxirgRXRsv8iULIieKxwGC9P6SAjB_AmTkmQ,271
|
|
13
15
|
pembot/.git/description,sha256=hatsFj1DoX6pz3eIMIvKFGbxsKjRzJLibpv2PaQGKu4,73
|
|
14
|
-
pembot/.git/index,sha256=
|
|
16
|
+
pembot/.git/index,sha256=zHF7i6pqKKeztQvRVfN4ItAtcdUlNoCvEl12f3Efdi4,1974
|
|
15
17
|
pembot/.git/packed-refs,sha256=7DECsr7q7vJ6Gw6a2gS3dE4v-YzbxGiWYoSWM43DgsQ,112
|
|
16
18
|
pembot/.git/hooks/applypatch-msg.sample,sha256=AiNJeguLAzqlijpSG4YphpOGz3qw4vEBlj0yiqYhk_c,478
|
|
17
19
|
pembot/.git/hooks/commit-msg.sample,sha256=H3TV6SkpebVz69WXQdRsuT_zkazdCD00C5Q3B1PZJDc,896
|
|
@@ -28,10 +30,10 @@ pembot/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO
|
|
|
28
30
|
pembot/.git/hooks/sendemail-validate.sample,sha256=ROv8kj3FRmvACWAvDs8Ge5xlRZq_6IaN3Em3jmztepI,2308
|
|
29
31
|
pembot/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
|
30
32
|
pembot/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
|
31
|
-
pembot/.git/logs/HEAD,sha256=
|
|
32
|
-
pembot/.git/logs/refs/heads/main,sha256=
|
|
33
|
+
pembot/.git/logs/HEAD,sha256=inb9c7SnW9i9voOMnHWUBEQ4NVzULBypWNe7K54kmF4,3638
|
|
34
|
+
pembot/.git/logs/refs/heads/main,sha256=inb9c7SnW9i9voOMnHWUBEQ4NVzULBypWNe7K54kmF4,3638
|
|
33
35
|
pembot/.git/logs/refs/remotes/origin/HEAD,sha256=OrkNquczPPh6fEGtutFKva_-_JhAdwnvXpCCPC4N6jk,194
|
|
34
|
-
pembot/.git/logs/refs/remotes/origin/main,sha256=
|
|
36
|
+
pembot/.git/logs/refs/remotes/origin/main,sha256=Cm8Q6zQQR27eezAnQRB9h2jLDkrMhhSgzqXALd-zn74,1898
|
|
35
37
|
pembot/.git/objects/01/5f71967c525963c827d7fe5415ae2c040c4a64,sha256=-qlT-5utWcwFnO3ADkH2SA2LBsdcph6wE2iePxJxkHs,170
|
|
36
38
|
pembot/.git/objects/09/ee34d94dffd4c286df1d6d528b2e98e2a6cce2,sha256=FSXPGn6UBhR7s1Ug-afzCYLfGy8dE3Umn8dBKaahkDM,203
|
|
37
39
|
pembot/.git/objects/0a/fb3a98cdc55b1434b44534ec2bf22c56cfa26c,sha256=Xxw20vI57zuhERWopDAZpQw6rAOhFtUr05lzpGyCTTE,120
|
|
@@ -39,6 +41,8 @@ pembot/.git/objects/0b/db4169fc0f312b8698f1df17a258fff163aeaa,sha256=hsOHhX0Yajg
|
|
|
39
41
|
pembot/.git/objects/0c/8d9b2690545bf1906b05cd9f18b783b3eb74f1,sha256=GKt_CAJNOQXwGnoFLuiNpkd0s_hP_UDLKd59VRknYy0,330
|
|
40
42
|
pembot/.git/objects/0c/ab66ffbaf50ef60dd41f3498595ebd2526b33c,sha256=Uk1dStvEBica-t38qHsZZ_4mxvi6b6VA9PaKE4KSunQ,90
|
|
41
43
|
pembot/.git/objects/0d/28f73897db0c9a9351ee9e64d2a0fe27db2705,sha256=hqMFSXWo_05QL0Do-raB4AtK5QjvKLFBNc0RZqNga9o,244
|
|
44
|
+
pembot/.git/objects/0e/120123bfadfd594220963f3bbca54056bab6ee,sha256=fj4c6vIKYMYSj5DEdXd6fcYGcanqaPGRD_9haJy35ns,56
|
|
45
|
+
pembot/.git/objects/0f/ccea3d0db4864a854f8b2c13b9f76b3601d200,sha256=Fq6qF_9lqg1bYsF2tWArhzkldnfgLFELLK2CH_2XNcU,203
|
|
42
46
|
pembot/.git/objects/10/9d1ca0463ea42bbbc435bcb43a90711211cf49,sha256=vR33_Raw-LpnaXGQc1MhSk_ZgEROO2Xa9n97YmA3gtQ,56
|
|
43
47
|
pembot/.git/objects/14/22c2d0cf79fd928ff7e2d77f96ad5b40cc2a31,sha256=2INSnjkW4KTAcfO2aLYVzjnpT89NXxx8TBJj4iU9e3Y,170
|
|
44
48
|
pembot/.git/objects/18/28e18ab80aa64d334b26428708140e280cbc63,sha256=PTF8WLVhzxBDTZhwU_PBHrkQBbijHbKvttSr0XVTOcU,3936
|
|
@@ -46,6 +50,7 @@ pembot/.git/objects/19/f61df7dbd562d04f561288677bbf2f18f5dff7,sha256=zg8IdUSnMYp
|
|
|
46
50
|
pembot/.git/objects/1f/83a471c8119f7794d98c049170a5d7d07a4b71,sha256=XnMaYQUA8iT1fiOIvlBav331Ry7pNBOBqI3wB3Y1VM0,90
|
|
47
51
|
pembot/.git/objects/28/db0ab48059acccd7d257aa02e52e9b6b83a4a5,sha256=S6PrWSQlkifYxKIgFdU0PZD0uLebS6uAP2LAUwp5yOI,91
|
|
48
52
|
pembot/.git/objects/35/97e518a8658280be9f377f78edf1dfa1f23814,sha256=gfc5bFLVZpwNQb1Ox2VosDYAjw0Lc5ZLjmvNA8gWcmg,2546
|
|
53
|
+
pembot/.git/objects/37/175696b3ca7a5d17379f03fb61a1023d50aeba,sha256=XaF3EsJ1wSIWtgBtgKsZkwiMK0NM8acFy9nnqE9_d0s,3085
|
|
49
54
|
pembot/.git/objects/3d/07d3b29ff53d95de3898fb786d61732f210515,sha256=A9MNZO3QZ6ghGd1MyfmJ6H3dBTpF4HZcRosVxWytx8E,4077
|
|
50
55
|
pembot/.git/objects/3d/e536f9c1fd05a23c2dec66423ed610afb0cf5f,sha256=omF4gmE9IQFZR8t6ybAKfnW02tdn9ZaVWKRhv_o1V4c,2083
|
|
51
56
|
pembot/.git/objects/3e/23850624fcf5f111d6ea88ddd64adf924cf82f,sha256=ygVUpaLo7cxUdIgjFlaBh2BkllV6BIYYkzLIxsPKjWE,4111
|
|
@@ -56,23 +61,31 @@ pembot/.git/objects/41/ae8fa8f8baa2daee5ec0aa21ae17922ae051a0,sha256=TLuVmtSH9K3
|
|
|
56
61
|
pembot/.git/objects/41/cbeb6bcb4c6fa9ef9be571082d95ecb4ea0ee3,sha256=waMrzjG_o5D4JgHkjjqcDQCwuS17w60JRkVr25ZFlcI,117
|
|
57
62
|
pembot/.git/objects/42/f03e1b66aa56bbb36a1c3a8dea9e1e727faffa,sha256=n4W2gcagesjI1rStKNxQ98q5UOHlfwFJGUADFeYldoE,418
|
|
58
63
|
pembot/.git/objects/44/86da0f89c566c3bae8abf435d37aeca87f1632,sha256=S2hY860Ep-0c7gQcbgrH6ioG7-Hw9a3BwYHcCkwy1Hg,3884
|
|
64
|
+
pembot/.git/objects/44/9dea1ca63d6f1e47d119b36576acc94822a37c,sha256=gVL6GHxMRFhlOnyUCO1dSxnsBlMd4Jx90eNZFrv32UQ,6490
|
|
59
65
|
pembot/.git/objects/4d/a03134f70896f72053fbdc0cd4f4c76d4ac1d8,sha256=GBhAvxM1omIt-PN6mNXYlIJMN5nx2AUE0ZOf68El5pc,117
|
|
60
66
|
pembot/.git/objects/50/39b29fda67743a044993436df6a4a1db7b8888,sha256=NYNmYtOq8IMmH32GaQSOBpTRTTm6jEJfY3vytVpzfKM,115
|
|
61
67
|
pembot/.git/objects/51/9e780574933d7627a083222bd10dd74f430904,sha256=3e3Iu2-waVySghbLYXmwhDPpfhV4PF82suvjcYkSVog,3604
|
|
68
|
+
pembot/.git/objects/59/69ac8b9d6b44a601385c3ed8c710a69d05216e,sha256=3IOcUn5myiozgeId1iWJZX-r7cS65xXnzQCEjrc-1ZA,168
|
|
69
|
+
pembot/.git/objects/5c/4f01d3ce9e243bbb8a693f97e5c7d13a857cb7,sha256=BnHoA5JBo5NY2ReemhwmZ-dOdx6CwXWY1TQsc-FSM5o,242
|
|
62
70
|
pembot/.git/objects/61/46a371b9c1bd9f51af273f11f986cfd1bedeba,sha256=KZvfnjxuriY54uWZQOM-GLovAvHs1k8_KwhpjNA5lW4,128
|
|
63
71
|
pembot/.git/objects/63/1700a51c8fa97b543991f5f61bfcd1e7e1327d,sha256=sYkhBkrSPQ8klX2gPrXJUZVt2a0iaF7KC7NFGBuxgeY,4360
|
|
64
72
|
pembot/.git/objects/64/00040794955d17c9a1fe1aaaea59f2c4822177,sha256=-tFnLFQvYrtkodOVhPK3WUhN8Qg9sQ8VfcEHG9MXdAg,421
|
|
65
73
|
pembot/.git/objects/6d/7a865a23b1cb4182f67907820104ced48b11c9,sha256=dJRTCmT9rLygONcQ7MPETl9AImF3Iy5tB_KUeCvKyKY,2651
|
|
74
|
+
pembot/.git/objects/71/014c7a6c8c98449a26ef966485a6cf30a1974d,sha256=ZamWua6G5BGjBYZYeG8dN3nHhwz_kqFfoYyO2wtuRV0,417
|
|
66
75
|
pembot/.git/objects/72/f047cda92abcd1ddc857f6461de605f8668331,sha256=PFb9LUDMnUCnuJcXUa5W1ea__fdP17kNyWrnqvnOpjs,240
|
|
67
76
|
pembot/.git/objects/73/2e98f08bc806c331b06847fc8c743f545499e5,sha256=kbKUb6fwwhRO73B4EZmol55JBvckqE3GNZ9PqHRB2ag,3995
|
|
77
|
+
pembot/.git/objects/78/4aa28d912b66e07748483efe0326c70d7541a5,sha256=6ut1I6cMnpRs6EK2CZZv50W25yNc0Ha6nC_cj9tSQjI,249
|
|
68
78
|
pembot/.git/objects/7a/7d28b0313a3d9d509823faaae31949af8610ef,sha256=X59k-p9VNLBpmJlL53qIz8mntLeCSpnjw-rq9u9z_6I,90
|
|
69
79
|
pembot/.git/objects/7e/0907822f7d316ebe0be07e1f6918bef412c80b,sha256=lFc55Bu-vEXF8In553gHxlEsB47Vg2qFXHiJqepWEqg,5167
|
|
70
80
|
pembot/.git/objects/86/cdaec229f1fbebf43042266b03878944669f25,sha256=eTvQhUeYXP8E181oTOcBydcgmImr62IizaH_Jbcbg8g,4077
|
|
71
81
|
pembot/.git/objects/87/d6df5217a4a374f8c1211a05f9bd657f72c9a7,sha256=OGq5-x1lFa94vTX7WYO6o4TGvCZwAvZ6LXm6N3dpiKM,3881
|
|
72
82
|
pembot/.git/objects/8b/5be2af9b16f290549193859c214cd9072212e8,sha256=DhGeGisCdFZ0TcRKp5angRpaseI87TQDt5FtGZInstk,117
|
|
83
|
+
pembot/.git/objects/8d/adc1d7891c79de24ba2c7c38b4c830bf61870a,sha256=QJaAleJXlBhybaUcSeKB7nC9OJg9gjP_xc071Wyq8BM,115
|
|
73
84
|
pembot/.git/objects/90/f067b86364ab243a7e3bc75f936319ba9eac88,sha256=FLAmmgvYuEAx1-ZBU30rvDzP0ppXWRSVrzPWVnArIb0,203
|
|
85
|
+
pembot/.git/objects/93/652290aac46c69b1b4dd83062b6cfe648dd643,sha256=WPgmr5bXli5s8rNdiUQM4IB4o_xyJe6nuI3TG4e5aYs,487
|
|
74
86
|
pembot/.git/objects/93/8f29d9b4b1ae86e39dddf9e3d115a82ddfc9b6,sha256=xf8oZ5IBMTxfkH7MFfukV7ZIu0Apd-78eJTdlI7GBv0,90
|
|
75
87
|
pembot/.git/objects/95/28bbccd167e3f4ad583a1ae9fac98a52620e27,sha256=jwJdRviwjGJIyMpE_BM6mr7B9ofGEsI5ZToJo5nmlao,263
|
|
88
|
+
pembot/.git/objects/97/b7aaa5c8b45f5471c9d39893cd4c893da6f444,sha256=PZW83V2jXf8-zDoTJqG1jNsCMURU8NUK8ljBD458MK0,3949
|
|
76
89
|
pembot/.git/objects/9b/123713e30fc9e225f9ac8ff5b02f8f8cf86456,sha256=xIETiieOoilleucGg7vXOgjZ-v5PI0t34fDJjDD665A,4204
|
|
77
90
|
pembot/.git/objects/a8/98d2c3947d30d8be64bd2bbcef68f956d5456b,sha256=lh2LurucwRdL6WP8ChgmjXrK2lR7HASIXzt4iHFrTf4,178
|
|
78
91
|
pembot/.git/objects/a9/d5e349fa091647742b09eb3d05bac8f58fe547,sha256=kaU3Z4k6ptIwO8ktcjs2-kshb0bzM4y8Uur-a27_jnk,56
|
|
@@ -91,6 +104,7 @@ pembot/.git/objects/bf/32a7e6872e5dc4025ee3df3c921ec7ade0855f,sha256=lwL9ickzIFt
|
|
|
91
104
|
pembot/.git/objects/bf/518686b06069d2a8abd3689908b7e1a6e16b05,sha256=w-HgdJdX2_ZdiIptJv8BcWdeDEyhl42WEk8P72X8YKU,421
|
|
92
105
|
pembot/.git/objects/c0/793458db6e1bee7f79f1a504fb8ff4963f8ed3,sha256=b8lo_OrMeGgirc9yY_OFjv5xVpG6FBpZnBf7jbtlmyw,421
|
|
93
106
|
pembot/.git/objects/c2/443060c07101948487cfa93cc39e082e9e0f5f,sha256=d9rjB8sgBOUQ-HQ8yu5I-c5Dqr_q2z0OOCXSufjDAak,3998
|
|
107
|
+
pembot/.git/objects/c2/ce19d575a8cccf6886862c4fa6afefba142511,sha256=kxbbFUJ1TpEVIrqgiLzepP5Z1k_kF3FjCHvJ04yCBvs,3370
|
|
94
108
|
pembot/.git/objects/ce/a4ffc1cf5eab61a2a0abd8f6dc941b580b69fd,sha256=yKUe_ZHD0UynTIrDRhuVqjDjKYDfZkWplqXjeSOD_bk,3894
|
|
95
109
|
pembot/.git/objects/d0/937f7d832266337289d5ec09459f931a46fcf7,sha256=_RZ7Z2EZp1OOF_XZhY6e1tzWwhI8Fa5R9aaF_W8APBA,56
|
|
96
110
|
pembot/.git/objects/e0/9162dbd64d85bb5ed740aa99faefa73f293d78,sha256=I5fpz3BQ2maFPTSu43T1uvYMuLiep1C3K6CsX8UMNPI,196
|
|
@@ -102,8 +116,11 @@ pembot/.git/objects/ea/0af89e61a882c5afc2a8c281b2d96f174bfe58,sha256=lXbMvL_xl8P
|
|
|
102
116
|
pembot/.git/objects/eb/75e1c49f1e5b79dca17ccdbec8067756523238,sha256=ltEINFUpQP86CkE4nAT1Afegz3ytY3Nlx1P6ibTFEbo,305
|
|
103
117
|
pembot/.git/objects/ee/a73c7f24094ed83b014f7cfce46e10f817bec8,sha256=fFYq_ODekFhF9SwBL9GP_fGDsNavXVVOuI6kmnHlkiY,5140
|
|
104
118
|
pembot/.git/objects/ef/0503a60244391590b16042019032e91d7cc30d,sha256=mrF9jZHY2oJm8tkd8nQdMgUPbrZfENOFaR3mvbwi1dg,187
|
|
119
|
+
pembot/.git/objects/ef/3488a3c636d73d82ad138e70a92453249b7f37,sha256=09VHFwSnSxOExWa3sKBzWAfSMyx5INI9KK2mstW8-xs,203
|
|
120
|
+
pembot/.git/objects/f1/2d2ef8948cbe4b24279bee282f934cf5a1b834,sha256=HvfHm5CnbujMp-uu0aucVzpaWCy2aKBzaXknvlYCnjg,3937
|
|
105
121
|
pembot/.git/objects/f1/3181b12cf4d539e635bf94ad6e950d68cedaf1,sha256=2cORujYAURTvGACujtNJvP1f-s7rt-UkrtPBuKfDR-g,419
|
|
106
122
|
pembot/.git/objects/f1/655afa1c5636c8d58969e3194bb770aefbc552,sha256=Ugf-wTcOlwZXmxmbnjEc3iOK3dDRntTVONOJsrOjl3E,205
|
|
123
|
+
pembot/.git/objects/f2/14d4d56726e2928479c5948bd88e038cf70b2e,sha256=e-Ff8_3sWUqd5ciQJLbXohN-E2ShUWKUiYZCHMitMus,290
|
|
107
124
|
pembot/.git/objects/f4/e991088a63def67a30a2b8bbdb4d58514abab8,sha256=Y5WfCEpk121Cy9gaFfSY4ZkUz54qu45osRZdTy9kZ8c,393
|
|
108
125
|
pembot/.git/objects/f6/b1d54483ce20fbcb252a8a93a5eff7bec88729,sha256=MrRy-fBSXZcp-yJM3e-tH3wCdUS-VFX6rW_mKTa-0_Y,419
|
|
109
126
|
pembot/.git/objects/f8/6fbd490878cb0d3c35cc4443672d1309171bf1,sha256=hBVqthGLEEX2NmdD51kjiIeTd5CP5MU8it41zHlW3m0,419
|
|
@@ -117,17 +134,17 @@ pembot/.git/objects/fd/abd48f3e947a9f420003446dd118c5295346a5,sha256=cWWr70MLaXk
|
|
|
117
134
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.idx,sha256=CNzx_lz6v4PulPxRW2t9nz-ifvplpSFPhMA2M9WNUrA,3424
|
|
118
135
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.pack,sha256=dk3Sqrd0L-tNVLRy3uJdTYJNkw8v59mE1hV8zrCFNzc,41355
|
|
119
136
|
pembot/.git/objects/pack/pack-d5469edc8c36e3bb1de5e0070e4d5b1eae935dd4.rev,sha256=7U3tpTWQ3dn5dwQo_KWMWxF31cKaDnCk2AzTO7Cx4Bg,388
|
|
120
|
-
pembot/.git/refs/heads/main,sha256=
|
|
137
|
+
pembot/.git/refs/heads/main,sha256=83ymz-l-IiRZslv1Mh-pd8KJwJJSu8vfEXnXfKbWzLI,41
|
|
121
138
|
pembot/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
|
|
122
|
-
pembot/.git/refs/remotes/origin/main,sha256=
|
|
139
|
+
pembot/.git/refs/remotes/origin/main,sha256=83ymz-l-IiRZslv1Mh-pd8KJwJJSu8vfEXnXfKbWzLI,41
|
|
123
140
|
pembot/AnyToText/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
124
141
|
pembot/AnyToText/convertor.py,sha256=gqvhwFssUsAeirfO4n0Ztwga1hn8zHbdG96sMTjYrpE,17188
|
|
125
142
|
pembot/TextEmbedder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
126
143
|
pembot/TextEmbedder/gemini_embedder.py,sha256=P679-2mmQESlYKML1vcrwx_-CSgWJgIQk7NL4F7BLQE,677
|
|
127
|
-
pembot/TextEmbedder/mongodb_embedder.py,sha256
|
|
144
|
+
pembot/TextEmbedder/mongodb_embedder.py,sha256=-xIr-zrAGzCmgNeojuX6qYj2t019EVO1I6g-Hwq0FL8,10799
|
|
128
145
|
pembot/TextEmbedder/mongodb_index_creator.py,sha256=kopqdVYJii_wExVrXGZjMfqWZ2dD42b3PeNWo71weHI,5354
|
|
129
146
|
pembot/TextEmbedder/vector_query.py,sha256=Kh1uhx9CatB-oQlQtnW-1I2Qz7MGHI20n2h_8peAChM,1986
|
|
130
|
-
pembot/config/config.yaml,sha256
|
|
147
|
+
pembot/config/config.yaml,sha256=n4AGOgweeokHFX6kutizWLDIli7IE-6abtTQZG5SKF0,156
|
|
131
148
|
pembot/pdf2markdown/LICENSE,sha256=1JTJhQjUYDqJzFJhNtitm7mHyE71PRHgetIqRRWg6Pk,1068
|
|
132
149
|
pembot/pdf2markdown/README.md,sha256=jitM1pwI69oa0N4mXv5-SY1ka9Sz3jsRNCDdpW-50kY,4545
|
|
133
150
|
pembot/pdf2markdown/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -183,7 +200,7 @@ pembot/pdf2markdown/config/config.yaml,sha256=w75W2Eg4-tu8rRk_23PqxWDh0010kRKLmP
|
|
|
183
200
|
pembot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
184
201
|
pembot/utils/inference_client.py,sha256=jeURmY2P5heVlH1dCV0XSgiX3U2qYGEmrnUv0KFpdww,5380
|
|
185
202
|
pembot/utils/string_tools.py,sha256=gtRa5rBR0Q7GspTu2WtCnvhJQLFjPfWLvhmyiPkyStU,1883
|
|
186
|
-
pembot-0.1.
|
|
187
|
-
pembot-0.1.
|
|
188
|
-
pembot-0.1.
|
|
189
|
-
pembot-0.1.
|
|
203
|
+
pembot-0.1.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
204
|
+
pembot-0.1.1.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
|
|
205
|
+
pembot-0.1.1.dist-info/METADATA,sha256=dEXQTUivLp55_Malvl4tDwlXLwz43LV6vWocpOgFtv0,313
|
|
206
|
+
pembot-0.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|