lionagi 0.0.208__py3-none-any.whl → 0.0.210__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. lionagi/__init__.py +4 -6
  2. lionagi/api_service/base_endpoint.py +65 -0
  3. lionagi/api_service/base_rate_limiter.py +121 -0
  4. lionagi/api_service/base_service.py +146 -0
  5. lionagi/api_service/chat_completion.py +6 -0
  6. lionagi/api_service/embeddings.py +6 -0
  7. lionagi/api_service/payload_package.py +47 -0
  8. lionagi/api_service/status_tracker.py +29 -0
  9. lionagi/core/__init__.py +5 -9
  10. lionagi/core/branch.py +1191 -0
  11. lionagi/core/flow.py +423 -0
  12. lionagi/core/{instruction_set/instruction_set.py → instruction_set.py} +3 -3
  13. lionagi/core/session.py +872 -0
  14. lionagi/schema/__init__.py +5 -8
  15. lionagi/schema/base_schema.py +821 -0
  16. lionagi/{_services → services}/base_service.py +4 -4
  17. lionagi/{_services → services}/oai.py +4 -4
  18. lionagi/structures/graph.py +1 -1
  19. lionagi/structures/relationship.py +1 -1
  20. lionagi/structures/structure.py +1 -1
  21. lionagi/tools/tool_manager.py +0 -163
  22. lionagi/tools/tool_util.py +2 -1
  23. lionagi/utils/__init__.py +7 -14
  24. lionagi/utils/api_util.py +63 -2
  25. lionagi/utils/core_utils.py +338 -0
  26. lionagi/utils/sys_util.py +3 -3
  27. lionagi/version.py +1 -1
  28. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/METADATA +28 -29
  29. lionagi-0.0.210.dist-info/RECORD +56 -0
  30. lionagi/_services/anthropic.py +0 -79
  31. lionagi/_services/anyscale.py +0 -0
  32. lionagi/_services/azure.py +0 -1
  33. lionagi/_services/bedrock.py +0 -0
  34. lionagi/_services/everlyai.py +0 -0
  35. lionagi/_services/gemini.py +0 -0
  36. lionagi/_services/gpt4all.py +0 -0
  37. lionagi/_services/huggingface.py +0 -0
  38. lionagi/_services/litellm.py +0 -33
  39. lionagi/_services/localai.py +0 -0
  40. lionagi/_services/openllm.py +0 -0
  41. lionagi/_services/openrouter.py +0 -44
  42. lionagi/_services/perplexity.py +0 -0
  43. lionagi/_services/predibase.py +0 -0
  44. lionagi/_services/rungpt.py +0 -0
  45. lionagi/_services/vllm.py +0 -0
  46. lionagi/_services/xinference.py +0 -0
  47. lionagi/agents/planner.py +0 -1
  48. lionagi/agents/prompter.py +0 -1
  49. lionagi/agents/scorer.py +0 -1
  50. lionagi/agents/summarizer.py +0 -1
  51. lionagi/agents/validator.py +0 -1
  52. lionagi/bridge/__init__.py +0 -22
  53. lionagi/bridge/langchain.py +0 -195
  54. lionagi/bridge/llama_index.py +0 -266
  55. lionagi/core/branch/__init__.py +0 -0
  56. lionagi/core/branch/branch.py +0 -841
  57. lionagi/core/branch/cluster.py +0 -1
  58. lionagi/core/branch/conversation.py +0 -787
  59. lionagi/core/core_util.py +0 -0
  60. lionagi/core/flow/__init__.py +0 -0
  61. lionagi/core/flow/flow.py +0 -19
  62. lionagi/core/flow/flow_util.py +0 -62
  63. lionagi/core/instruction_set/__init__.py +0 -0
  64. lionagi/core/messages/__init__.py +0 -0
  65. lionagi/core/sessions/__init__.py +0 -0
  66. lionagi/core/sessions/session.py +0 -504
  67. lionagi/datastores/__init__.py +0 -1
  68. lionagi/datastores/chroma.py +0 -1
  69. lionagi/datastores/deeplake.py +0 -1
  70. lionagi/datastores/elasticsearch.py +0 -1
  71. lionagi/datastores/lantern.py +0 -1
  72. lionagi/datastores/pinecone.py +0 -1
  73. lionagi/datastores/postgres.py +0 -1
  74. lionagi/datastores/qdrant.py +0 -1
  75. lionagi/loaders/__init__.py +0 -18
  76. lionagi/loaders/chunker.py +0 -166
  77. lionagi/loaders/load_util.py +0 -240
  78. lionagi/loaders/reader.py +0 -122
  79. lionagi/models/__init__.py +0 -0
  80. lionagi/models/base_model.py +0 -0
  81. lionagi/models/imodel.py +0 -53
  82. lionagi/schema/async_queue.py +0 -158
  83. lionagi/schema/base_condition.py +0 -1
  84. lionagi/schema/base_node.py +0 -422
  85. lionagi/schema/base_tool.py +0 -44
  86. lionagi/schema/data_logger.py +0 -126
  87. lionagi/schema/data_node.py +0 -88
  88. lionagi/schema/status_tracker.py +0 -37
  89. lionagi/tests/test_utils/test_encrypt_util.py +0 -323
  90. lionagi/utils/encrypt_util.py +0 -283
  91. lionagi/utils/url_util.py +0 -55
  92. lionagi-0.0.208.dist-info/RECORD +0 -106
  93. lionagi/{agents → api_service}/__init__.py +0 -0
  94. lionagi/core/{branch/branch_manager.py → branch_manager.py} +0 -0
  95. lionagi/core/{messages/messages.py → messages.py} +3 -3
  96. /lionagi/{_services → services}/__init__.py +0 -0
  97. /lionagi/{_services → services}/mistralai.py +0 -0
  98. /lionagi/{_services → services}/mlx_service.py +0 -0
  99. /lionagi/{_services → services}/ollama.py +0 -0
  100. /lionagi/{_services → services}/services.py +0 -0
  101. /lionagi/{_services → services}/transformers.py +0 -0
  102. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/LICENSE +0 -0
  103. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/WHEEL +0 -0
  104. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/top_level.txt +0 -0
@@ -1,787 +0,0 @@
1
- import json
2
- import pandas as pd
3
- from datetime import datetime
4
- from typing import Any, Optional, Dict, Union
5
-
6
- from lionagi.utils.sys_util import as_dict, create_copy, strip_lower, to_df
7
- from lionagi.utils.call_util import lcall
8
- from ..messages.messages import Message, System, Instruction, Response
9
-
10
- class Conversation:
11
- """
12
- A class to represent a conversation, encapsulating messages within a pandas DataFrame.
13
-
14
- Attributes:
15
- messages (pd.DataFrame): A DataFrame holding conversation messages with columns specified in _cols.
16
- """
17
-
18
- _cols = ["node_id", "role", "sender", "timestamp", "content"]
19
-
20
- def __init__(self) -> None:
21
- """
22
- Initializes a Conversation instance with an empty DataFrame structured to hold messages.
23
- """
24
- self.messages = pd.DataFrame(columns=Conversation._cols)
25
-
26
- @classmethod
27
- def from_csv(cls, filepath: str, **kwargs) -> 'Conversation':
28
- """
29
- Create a Conversation instance from a CSV file containing messages.
30
-
31
- Args:
32
- filepath (str): The path to the CSV file to be loaded.
33
- **kwargs: Additional keyword arguments passed to pandas.read_csv function.
34
-
35
- Returns:
36
- Conversation: An instance of Conversation with messages loaded from the specified CSV file.
37
- """
38
- messages = pd.read_csv(filepath)
39
- messages = to_df(messages)
40
- self = cls(messages=messages, **kwargs)
41
- return self
42
-
43
- @classmethod
44
- def from_json(cls, filepath: str, **kwargs) -> 'Conversation':
45
- """
46
- Create a Conversation instance from a JSON file containing messages.
47
-
48
- Args:
49
- filepath (str): The path to the JSON file to be loaded.
50
- **kwargs: Additional keyword arguments passed to pandas.read_json function.
51
-
52
- Returns:
53
- Conversation: An instance of Conversation with messages loaded from the specified JSON file.
54
- """
55
- messages = pd.read_json(filepath, orient="records", lines=True)
56
- messages = to_df(messages)
57
- self = cls(messages=messages, **kwargs)
58
- return self
59
-
60
- @property
61
- def last_row(self) -> pd.Series:
62
- """
63
- Retrieve the last row from the conversation messages as a pandas Series.
64
-
65
- Returns:
66
- pd.Series: The last message in the conversation.
67
- """
68
- return get_rows(self.messages, n=1, from_='last')
69
-
70
- @property
71
- def first_system(self) -> pd.Series:
72
- """
73
- Retrieve the first system message from the conversation.
74
-
75
- Returns:
76
- pd.Series: The first message in the conversation where the role is 'system'.
77
- """
78
- return get_rows(self.messages, role='system', n=1, from_='front')
79
-
80
- @property
81
- def last_response(self) -> pd.Series:
82
- """
83
- Retrieve the last response message from the conversation.
84
-
85
- Returns:
86
- pd.Series: The last message in the conversation where the role is 'assistant'.
87
- """
88
- return get_rows(self.messages, role='assistant', n=1, from_='last')
89
-
90
- @property
91
- def last_response_content(self) -> Dict:
92
- """
93
- Retrieve the last response message content from the conversation.
94
-
95
- Returns:
96
- pd.Series: The last message in the conversation where the role is 'assistant'.
97
- """
98
- return as_dict(self.last_response.content.iloc[-1])
99
-
100
- @property
101
- def last_instruction(self) -> pd.Series:
102
- """
103
- Retrieve the last instruction message from the conversation.
104
-
105
- Returns:
106
- pd.Series: The last message in the conversation where the role is 'user'.
107
- """
108
- return get_rows(self.messages, role='user', n=1, from_='last')
109
-
110
- @property
111
- def last_action_request(self):
112
- """
113
- Retrieve the last action request message from the conversation.
114
-
115
- Returns:
116
- pd.Series: The last message in the conversation with sender 'action_request'.
117
- """
118
- return get_rows(self.messages, sender='action_request', n=1, from_='last')
119
-
120
- @property
121
- def last_action_response(self):
122
- """
123
- Retrieve the last action response message from the conversation.
124
-
125
- Returns:
126
- pd.Series: The last message in the conversation with sender 'action_response'.
127
- """
128
- return get_rows(self.messages, sender='action_response', n=1, from_='last')
129
-
130
- @property
131
- def len_messages(self):
132
- """
133
- Get the total number of messages in the conversation.
134
-
135
- Returns:
136
- int: The total number of messages.
137
- """
138
- return len(self.messages)
139
-
140
- @property
141
- def len_instructions(self):
142
- """
143
- Get the total number of instruction messages (messages with role 'user') in the conversation.
144
-
145
- Returns:
146
- int: The total number of instruction messages.
147
- """
148
- return len(self.messages[self.messages.role == 'user'])
149
-
150
- @property
151
- def len_responses(self):
152
- """
153
- Get the total number of response messages (messages with role 'assistant') in the conversation.
154
-
155
- Returns:
156
- int: The total number of response messages.
157
- """
158
-
159
- return len(self.messages[self.messages.role == 'assistant'])
160
-
161
- @property
162
- def len_systems(self):
163
- """
164
- Get the total number of system messages (messages with role 'system') in the conversation.
165
-
166
- Returns:
167
- int: The total number of system messages.
168
- """
169
- return len(self.messages[self.messages.role == 'system'])
170
-
171
- @property
172
- def info(self):
173
- """
174
- Get a summary of the conversation messages categorized by role.
175
-
176
- Returns:
177
- Dict[str, int]: A dictionary with keys as message roles and values as counts.
178
- """
179
-
180
- return self._info()
181
-
182
- @property
183
- def sender_info(self):
184
- """
185
- Provides a descriptive summary of the conversation, including the total number of messages,
186
- a summary by role, and the first five messages.
187
-
188
- Returns:
189
- Dict[str, Any]: A dictionary containing the total number of messages, summary by role,
190
- and a list of the first five message dictionaries.
191
- """
192
- return self._info(use_sender=True)
193
-
194
- @property
195
- def describe(self) -> Dict[str, Any]:
196
- """
197
- Provides a descriptive summary of the conversation, including the total number of messages,
198
- a summary by role, and the first five messages.
199
-
200
- Returns:
201
- Dict[str, Any]: A dictionary containing the total number of messages, summary by role, and a list of the first maximum five message dictionaries.
202
- """
203
- return {
204
- "total_messages": len(self.messages),
205
- "summary_by_role": self._info(),
206
- "messages": [
207
- msg.to_dict() for _, msg in self.messages.iterrows()
208
- ][: self.len_messages -1 if self.len_messages < 5 else 5],
209
- }
210
-
211
- def clone(self, num: Optional[int] = None) -> 'Conversation':
212
- """
213
- Creates a copy or multiple copies of the current Conversation instance.
214
-
215
- Args:
216
- num (Optional[int], optional): The number of copies to create. If None, a single copy is created.
217
- Defaults to None.
218
-
219
- Returns:
220
- Conversation: A new Conversation instance or a list of Conversation instances if num is specified.
221
- """
222
- cloned = Conversation()
223
- cloned.logger.set_dir(self.logger.dir)
224
- cloned.messages = self.messages.copy()
225
- if num:
226
- return create_copy(cloned, num=num)
227
- return cloned
228
-
229
- def add_message(
230
- self,
231
- system: Optional[Union[dict, list, System]] = None,
232
- instruction: Optional[Union[dict, list, Instruction]] = None,
233
- context: Optional[Union[str, Dict[str, Any]]] = None,
234
- response: Optional[Union[dict, list, Response]] = None,
235
- sender: Optional[str] = None
236
- ) -> None:
237
- """
238
- Adds a message to the conversation.
239
-
240
- Args:
241
- system (Optional[Union[dict, list, System]], optional): System message content or object.
242
- instruction (Optional[Union[dict, list, Instruction]], optional): Instruction message content or object.
243
- context (Optional[Union[str, Dict[str, Any]]], optional): Context for the message.
244
- response (Optional[Union[dict, list, Response]], optional): Response message content or object.
245
- sender (Optional[str], optional): The sender of the message.
246
-
247
- Raises:
248
- ValueError: If the content cannot be converted to a JSON string.
249
- """
250
- msg = self._create_message(
251
- system=system, instruction=instruction,
252
- context=context, response=response, sender=sender
253
- )
254
- message_dict = msg.to_dict()
255
- if isinstance(as_dict(message_dict['content']), dict):
256
- message_dict['content'] = json.dumps(message_dict['content'])
257
- message_dict['timestamp'] = datetime.now().isoformat()
258
- self.messages.loc[len(self.messages)] = message_dict
259
-
260
- def remove_message(self, node_id: str) -> None:
261
- """
262
- Removes a message from the conversation based on its node_id.
263
-
264
- Args:
265
- node_id (str): The node_id of the message to be removed.
266
- """
267
- _remove_message(self.messages, node_id)
268
-
269
- def update_message(
270
- self, value: Any, node_id: Optional[str] = None, col: str = 'node_id'
271
- ) -> None:
272
- """
273
- Updates a message in the conversation based on its node_id.
274
-
275
- Args:
276
- value (Any): The new value to update the message with.
277
- node_id (Optional[str], optional): The node_id of the message to be updated. Defaults to None.
278
- col (str, optional): The column to be updated. Defaults to 'node_id'.
279
-
280
- Returns:
281
- bool: True if the update was successful, False otherwise.
282
- """
283
- return _update_row(self.messages, node_id=node_id, col=col, value=value)
284
-
285
- def change_first_system_message(
286
- self, system: Union[str, Dict[str, Any], System], sender: Optional[str] = None
287
- ):
288
- """
289
- Updates the first system message in the conversation.
290
-
291
- Args:
292
- system (Union[str, Dict[str, Any], System]): The new system message content, which can be a string,
293
- a dictionary of message content, or a System object.
294
- sender (Optional[str], optional): The sender of the system message. Defaults to None.
295
-
296
- Raises:
297
- ValueError: If there are no system messages in the conversation or if the input cannot be
298
- converted into a system message.
299
- """
300
- if self.len_systems == 0:
301
- raise ValueError("There is no system message in the messages.")
302
-
303
- if not isinstance(system, (str, Dict, System)):
304
- raise ValueError("Input cannot be converted into a system message.")
305
-
306
- elif isinstance(system, (str, Dict)):
307
- system = System(system, sender=sender)
308
-
309
- elif isinstance(system, System):
310
- message_dict = system.to_dict()
311
- if sender:
312
- message_dict['sender'] = sender
313
- message_dict['timestamp'] = datetime.now().isoformat()
314
- sys_index = self.messages[self.messages.role == 'system'].index
315
- self.messages.loc[sys_index[0]] = message_dict
316
-
317
- def rollback(self, steps: int) -> None:
318
- """
319
- Removes the last 'n' messages from the conversation.
320
-
321
- Args:
322
- steps (int): The number of messages to remove from the end of the conversation.
323
-
324
- Raises:
325
- ValueError: If 'steps' is not a positive integer or exceeds the number of messages.
326
- """
327
- return _remove_last_n_rows(self.messages, steps)
328
-
329
- def clear_messages(self) -> None:
330
- """
331
- Clears all messages from the conversation, resetting it to an empty state.
332
- """
333
- self.messages = pd.DataFrame(columns=Conversation._cols)
334
-
335
- def to_csv(self, filepath: str, **kwargs) -> None:
336
- """
337
- Exports the conversation messages to a CSV file.
338
-
339
- Args:
340
- filepath (str): The path to the file where the CSV will be saved.
341
- **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_csv() method.
342
- """
343
- self.messages.to_csv(filepath, **kwargs)
344
-
345
- def to_json(self, filepath: str) -> None:
346
- """
347
- Exports the conversation messages to a JSON file.
348
-
349
- Args:
350
- filepath (str): The path to the file where the JSON will be saved.
351
- **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_json() method, such as
352
- 'orient', 'lines', and 'date_format'.
353
-
354
- Note:
355
- The recommended kwargs for compatibility with the from_json class method are
356
- orient='records', lines=True, and date_format='iso'.
357
- """
358
- self.messages.to_json(
359
- filepath, orient="records", lines=True, date_format="iso")
360
-
361
- def replace_keyword(
362
- self,
363
- keyword: str,
364
- replacement: str,
365
- col: str = 'content',
366
- case_sensitive: bool = False
367
- ) -> None:
368
- """
369
- Replaces all occurrences of a keyword in a specified column of the conversation's messages with a given replacement.
370
-
371
- Args:
372
- keyword (str): The keyword to be replaced.
373
- replacement (str): The string to replace the keyword with.
374
- col (str, optional): The column where the replacement should occur. Defaults to 'content'.
375
- case_sensitive (bool, optional): If True, the replacement is case sensitive. Defaults to False.
376
- """
377
- _replace_keyword(
378
- self.messages, keyword, replacement, col=col,
379
- case_sensitive=case_sensitive
380
- )
381
-
382
- def search_keywords(
383
- self,
384
- keywords: Union[str, list],
385
- case_sensitive: bool = False, reset_index: bool = False, dropna: bool = False
386
- ) -> pd.DataFrame:
387
- """
388
- Searches for messages containing specified keywords within the conversation.
389
-
390
- Args:
391
- keywords (Union[str, list]): The keyword(s) to search for within the conversation's messages.
392
- case_sensitive (bool, optional): If True, the search is case sensitive. Defaults to False.
393
- reset_index (bool, optional): If True, resets the index of the resulting DataFrame. Defaults to False.
394
- dropna (bool, optional): If True, drops messages with NA values before searching. Defaults to False.
395
-
396
- Returns:
397
- pd.DataFrame: A DataFrame containing messages that match the search criteria.
398
- """
399
- return _search_keywords(
400
- self.messages, keywords, case_sensitive, reset_index, dropna
401
- )
402
-
403
- def extend(self, messages: pd.DataFrame, **kwargs) -> None:
404
- """
405
- Extends the conversation by appending new messages, optionally avoiding duplicates based on specified criteria.
406
-
407
- Args:
408
- messages (pd.DataFrame): A DataFrame containing new messages to append to the conversation.
409
- **kwargs: Additional keyword arguments for handling duplicates (passed to pandas' drop_duplicates method).
410
- """
411
- self.messages = _extend(self.messages, messages, **kwargs)
412
-
413
- def filter_by(
414
- self,
415
- role: Optional[str] = None,
416
- sender: Optional[str] = None,
417
- start_time: Optional[datetime] = None,
418
- end_time: Optional[datetime] = None,
419
- content_keywords: Optional[Union[str, list]] = None,
420
- case_sensitive: bool = False
421
- ) -> pd.DataFrame:
422
- """
423
- Filters the conversation's messages based on specified criteria such as role, sender, time range, and keywords.
424
-
425
- Args:
426
- role (Optional[str]): Filter messages by role (e.g., 'user', 'assistant', 'system').
427
- sender (Optional[str]): Filter messages by sender.
428
- start_time (Optional[datetime]): Filter messages sent after this time.
429
- end_time (Optional[datetime]): Filter messages sent before this time.
430
- content_keywords (Optional[Union[str, list]]): Filter messages containing these keywords.
431
- case_sensitive (bool, optional): If True, keyword search is case sensitive. Defaults to False.
432
-
433
- Returns:
434
- pd.DataFrame: A DataFrame containing messages that match the filter criteria.
435
- """
436
- return _filter_messages_by(
437
- self.messages, role=role, sender=sender,
438
- start_time=start_time, end_time=end_time,
439
- content_keywords=content_keywords, case_sensitive=case_sensitive
440
- )
441
-
442
- def _create_message(
443
- self,
444
- system: Optional[Union[dict, list, System]] = None,
445
- instruction: Optional[Union[dict, list, Instruction]] = None,
446
- context: Optional[Union[str, Dict[str, Any]]] = None,
447
- response: Optional[Union[dict, list, Response]] = None,
448
- sender: Optional[str] = None
449
- ) -> Message:
450
- """
451
- Creates a message object based on the given parameters, ensuring only one message type is specified.
452
-
453
- Args:
454
- system (Optional[Union[dict, list, System]]): System message to be added.
455
- instruction (Optional[Union[dict, list, Instruction]]): Instruction message to be added.
456
- context (Optional[Union[str, Dict[str, Any]]]): Context for the instruction message.
457
- response (Optional[Union[dict, list, Response]]): Response message to be added.
458
- sender (Optional[str]): The sender of the message.
459
-
460
- Returns:
461
- Message: A Message object created from the provided parameters.
462
-
463
- Raises:
464
- ValueError: If more than one message type is specified or if the parameters do not form a valid message.
465
- """
466
- if sum(lcall([system, instruction, response], bool)) != 1:
467
- raise ValueError("Error: Message must have one and only one role.")
468
-
469
- else:
470
- if isinstance(any([system, instruction, response]), Message):
471
- if system:
472
- return system
473
- elif instruction:
474
- return instruction
475
- elif response:
476
- return response
477
-
478
- msg = 0
479
- if response:
480
- msg = Response(response=response, sender=sender)
481
- elif instruction:
482
- msg = Instruction(instruction=instruction,
483
- context=context, sender=sender)
484
- elif system:
485
- msg = System(system=system, sender=sender)
486
- return msg
487
-
488
- def _info(self, use_sender: bool = False) -> Dict[str, int]:
489
- """
490
- Generates a summary of the conversation's messages, either by role or sender.
491
-
492
- Args:
493
- use_sender (bool, optional): If True, generates the summary based on sender. If False, uses role. Defaults to False.
494
-
495
- Returns:
496
- Dict[str, int]: A dictionary with counts of messages, categorized either by role or sender.
497
- """
498
- messages = self.messages['sender'] if use_sender else self.messages['role']
499
- result = messages.value_counts().to_dict()
500
- result['total'] = len(self.len_messages)
501
- return result
502
-
503
- def validate_messages(messages):
504
- """
505
- Validates the structure and content of a DataFrame containing conversation messages.
506
-
507
- Args:
508
- messages (pd.DataFrame): The DataFrame containing conversation messages to validate.
509
-
510
- Returns:
511
- bool: True if the DataFrame is valid, raises a ValueError otherwise.
512
-
513
- Raises:
514
- ValueError: If the DataFrame has unmatched columns, contains null values, has an unsupported role, or
515
- if the content cannot be parsed as a JSON string.
516
- """
517
- if list(messages.columns) != ['node_id', 'role', 'sender', 'timestamp', 'content']:
518
- raise ValueError('Invalid messages dataframe. Unmatched columns.')
519
- if messages.isnull().values.any():
520
- raise ValueError('Invalid messages dataframe. Cannot have null.')
521
- if not all(role in ['system', 'user', 'assistant'] for role in messages['role'].unique()):
522
- raise ValueError('Invalid messages dataframe. Cannot have role other than ["system", "user", "assistant"].')
523
- for cont in messages['content']:
524
- if cont.startswith('Sender'):
525
- cont = cont.split(':', 1)[1]
526
- try:
527
- json.loads(cont)
528
- except:
529
- raise ValueError('Invalid messages dataframe. Content expect json string.')
530
- return True
531
-
532
- def _sign_message(messages, sender: str):
533
- """
534
- Prefixes each message in the DataFrame with 'Sender <sender>:' to indicate the message's origin.
535
-
536
- Args:
537
- messages (pd.DataFrame): The DataFrame containing conversation messages to sign.
538
- sender (str): The name or identifier of the sender to prefix the messages with.
539
-
540
- Returns:
541
- pd.DataFrame: The DataFrame with updated messages signed by the specified sender.
542
-
543
- Raises:
544
- ValueError: If the sender is None or equivalent to the string 'none'.
545
- """
546
- if sender is None or strip_lower(sender) == 'none':
547
- raise ValueError("sender cannot be None")
548
- df = messages.copy()
549
-
550
- for i in df.index:
551
- if not df.loc[i, 'content'].startswith('Sender'):
552
- df.loc[i, 'content'] = f"Sender {sender}: {df.loc[i, 'content']}"
553
- else:
554
- content = df.loc[i, 'content'].split(':', 1)[1]
555
- df.loc[i, 'content'] = f"Sender {sender}: {content}"
556
-
557
- return to_df(df)
558
-
559
- def _search_keywords(
560
- messages,
561
- keywords: Union[str, list],
562
- case_sensitive: bool = False, reset_index=False, dropna=False
563
- ):
564
- """
565
- Searches for keywords in the 'content' column of a DataFrame and returns matching rows.
566
-
567
- Args:
568
- messages (pd.DataFrame): The DataFrame to search within.
569
- keywords (Union[str, List[str]]): Keyword(s) to search for. If a list, combines keywords with an OR condition.
570
- case_sensitive (bool, optional): Whether the search should be case-sensitive. Defaults to False.
571
- reset_index (bool, optional): Whether to reset the index of the resulting DataFrame. Defaults to False.
572
- dropna (bool, optional): Whether to drop rows with NA values in the 'content' column. Defaults to False.
573
-
574
- Returns:
575
- pd.DataFrame: A DataFrame containing rows where the 'content' column matches the search criteria.
576
- """
577
- out = ''
578
- if isinstance(keywords, list):
579
- keywords = '|'.join(keywords)
580
- if not case_sensitive:
581
- out = messages[
582
- messages["content"].str.contains(keywords, case=False)
583
- ]
584
- out = messages[messages["content"].str.contains(keywords)]
585
- if reset_index or dropna:
586
- out = to_df(out, reset_index=reset_index)
587
- return out
588
-
589
- def _filter_messages_by(
590
- messages,
591
- role: Optional[str] = None,
592
- sender: Optional[str] = None,
593
- start_time: Optional[datetime] = None,
594
- end_time: Optional[datetime] = None,
595
- content_keywords: Optional[Union[str, list]] = None,
596
- case_sensitive: bool = False
597
- ) -> pd.DataFrame:
598
- """
599
- Filters messages in a DataFrame based on specified criteria such as role, sender, time range, and keywords.
600
-
601
- Args:
602
- messages (pd.DataFrame): The DataFrame of messages to filter.
603
- role (Optional[str]): The role to filter messages by (e.g., 'user', 'assistant').
604
- sender (Optional[str]): The sender to filter messages by.
605
- start_time (Optional[datetime]): The start time for filtering messages.
606
- end_time (Optional[datetime]): The end time for filtering messages.
607
- content_keywords (Optional[Union[str, list]]): Keywords to filter messages by content.
608
- case_sensitive (bool): Determines if the keyword search should be case-sensitive.
609
-
610
- Returns:
611
- pd.DataFrame: A DataFrame containing messages that match the filter criteria.
612
-
613
- Raises:
614
- ValueError: If an error occurs during the filtering process.
615
- """
616
-
617
- try:
618
- outs = messages.copy()
619
-
620
- if content_keywords:
621
- outs = _search_keywords(content_keywords, case_sensitive)
622
-
623
- outs = outs[outs['role'] == role] if role else outs
624
- outs = outs[outs['sender'] == sender] if sender else outs
625
- outs = outs[outs['timestamp'] > start_time] if start_time else outs
626
- outs = outs[outs['timestamp'] < end_time] if end_time else outs
627
-
628
- return to_df(outs)
629
-
630
- except Exception as e:
631
- raise ValueError(f"Error in filtering messages: {e}")
632
-
633
- def _replace_keyword(
634
- df,
635
- keyword: str,
636
- replacement: str,
637
- col='content',
638
- case_sensitive: bool = False
639
- ) -> None:
640
- """
641
- Replaces occurrences of a keyword within a specified column of a DataFrame with a given replacement.
642
-
643
- Args:
644
- df (pd.DataFrame): The DataFrame to operate on.
645
- keyword (str): The keyword to search for and replace.
646
- replacement (str): The string to replace the keyword with.
647
- col (str): The column to search for the keyword in.
648
- case_sensitive (bool): If True, the search and replacement are case-sensitive.
649
-
650
- Returns:
651
- None: This function modifies the DataFrame in place.
652
- """
653
- if not case_sensitive:
654
- df[col] = df[col].str.replace(
655
- keyword, replacement, case=False
656
- )
657
- else:
658
- df[col] = df[col].str.replace(
659
- keyword, replacement
660
- )
661
-
662
- def _remove_message(df, node_id: str) -> bool:
663
- """
664
- Removes a message from the DataFrame based on its node_id.
665
-
666
- Args:
667
- df (pd.DataFrame): The DataFrame from which the message should be removed.
668
- node_id (str): The node_id of the message to be removed.
669
-
670
- Returns:
671
- bool: True if the message was successfully removed, False otherwise.
672
- """
673
- initial_length = len(df)
674
- df = df[df["node_id"] != node_id]
675
-
676
- return len(df) < initial_length
677
-
678
- def _update_row(
679
- df, node_id = None, col = "node_id", value = None
680
- ) -> bool:
681
- """
682
- Updates the value of a specified column for a row identified by node_id in a DataFrame.
683
-
684
- Args:
685
- df (pd.DataFrame): The DataFrame to update.
686
- node_id (Optional[str]): The node_id of the row to be updated.
687
- col (str): The column to update.
688
- value (Any): The new value to be assigned to the column.
689
-
690
- Returns:
691
- bool: True if the update was successful, False otherwise.
692
- """
693
- index = df.index[df[col] == node_id].tolist()
694
- if index:
695
- df.at[index[0], col] = value
696
- return True
697
- return False
698
-
699
- def _remove_last_n_rows(df, steps: int) -> None:
700
- """
701
- Removes the last 'n' rows from a DataFrame.
702
-
703
- Args:
704
- df (pd.DataFrame): The DataFrame from which rows will be removed.
705
- steps (int): The number of rows to remove.
706
-
707
- Returns:
708
- pd.DataFrame: The DataFrame after the last 'n' rows have been removed.
709
-
710
- Raises:
711
- ValueError: If 'steps' is less than 0 or greater than the number of rows in the DataFrame.
712
- """
713
- if steps < 0 or steps > len(df):
714
- raise ValueError("Steps must be a non-negative integer less than or equal to the number of messages.")
715
- df = to_df(df[:-steps])
716
-
717
- def get_rows(
718
- df,
719
- sender: Optional[str] = None,
720
- role: Optional[str] = None,
721
- n: int = 1,
722
- sign_ = False,
723
- from_="front",
724
- ) -> pd.DataFrame:
725
- """
726
- Retrieves rows from a DataFrame based on specified sender, role, and quantity, optionally signing them.
727
-
728
- Args:
729
- df (pd.DataFrame): The DataFrame to retrieve rows from.
730
- sender (Optional[str]): The sender based on which to filter rows.
731
- role (Optional[str]): The role based on which to filter rows.
732
- n (int): The number of rows to retrieve.
733
- sign_ (bool): Whether to sign the retrieved rows.
734
- from_ (str): Direction to retrieve rows ('front' for the first rows, 'last' for the last rows).
735
-
736
- Returns:
737
- pd.DataFrame: A DataFrame containing the retrieved rows.
738
- """
739
-
740
- if from_ == "last":
741
- if sender is None and role is None:
742
- outs = df.iloc[-n:]
743
- elif sender and role:
744
- outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[-n:]
745
- elif sender:
746
- outs = df[df['sender'] == sender].iloc[-n:]
747
- else:
748
- outs = df[df['role'] == role].iloc[-n:]
749
-
750
- elif from_ == "front":
751
- if sender is None and role is None:
752
- outs = df.iloc[:n]
753
- elif sender and role:
754
- outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[:n]
755
- elif sender:
756
- outs = df[df['sender'] == sender].iloc[:n]
757
- else:
758
- outs = df[df['role'] == role].iloc[:n]
759
-
760
- return _sign_message(outs, sender) if sign_ else outs
761
-
762
- def _extend(df1: pd.DataFrame, df2: pd.DataFrame, **kwargs) -> pd.DataFrame:
763
- """
764
- Extends a DataFrame with another DataFrame, optionally removing duplicates based on specified criteria.
765
-
766
- Args:
767
- df1 (pd.DataFrame): The original DataFrame to be extended.
768
- df2 (pd.DataFrame): The DataFrame containing new rows to add to df1.
769
- **kwargs: Additional keyword arguments for pandas.DataFrame.drop_duplicates().
770
-
771
- Returns:
772
- pd.DataFrame: The extended DataFrame after adding rows from df2 and removing duplicates.
773
-
774
- Raises:
775
- ValueError: If an error occurs during the extension process.
776
- """
777
- validate_messages(df2)
778
- try:
779
- if len(df2.dropna(how='all')) > 0 and len(df1.dropna(how='all')) > 0:
780
- df = to_df([df1, df2])
781
- df.drop_duplicates(
782
- inplace=True, subset=['node_id'], keep='first', **kwargs
783
- )
784
- return to_df(df)
785
- except Exception as e:
786
- raise ValueError(f"Error in extending messages: {e}")
787
-