lionagi 0.0.208__py3-none-any.whl → 0.0.210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. lionagi/__init__.py +4 -6
  2. lionagi/api_service/base_endpoint.py +65 -0
  3. lionagi/api_service/base_rate_limiter.py +121 -0
  4. lionagi/api_service/base_service.py +146 -0
  5. lionagi/api_service/chat_completion.py +6 -0
  6. lionagi/api_service/embeddings.py +6 -0
  7. lionagi/api_service/payload_package.py +47 -0
  8. lionagi/api_service/status_tracker.py +29 -0
  9. lionagi/core/__init__.py +5 -9
  10. lionagi/core/branch.py +1191 -0
  11. lionagi/core/flow.py +423 -0
  12. lionagi/core/{instruction_set/instruction_set.py → instruction_set.py} +3 -3
  13. lionagi/core/session.py +872 -0
  14. lionagi/schema/__init__.py +5 -8
  15. lionagi/schema/base_schema.py +821 -0
  16. lionagi/{_services → services}/base_service.py +4 -4
  17. lionagi/{_services → services}/oai.py +4 -4
  18. lionagi/structures/graph.py +1 -1
  19. lionagi/structures/relationship.py +1 -1
  20. lionagi/structures/structure.py +1 -1
  21. lionagi/tools/tool_manager.py +0 -163
  22. lionagi/tools/tool_util.py +2 -1
  23. lionagi/utils/__init__.py +7 -14
  24. lionagi/utils/api_util.py +63 -2
  25. lionagi/utils/core_utils.py +338 -0
  26. lionagi/utils/sys_util.py +3 -3
  27. lionagi/version.py +1 -1
  28. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/METADATA +28 -29
  29. lionagi-0.0.210.dist-info/RECORD +56 -0
  30. lionagi/_services/anthropic.py +0 -79
  31. lionagi/_services/anyscale.py +0 -0
  32. lionagi/_services/azure.py +0 -1
  33. lionagi/_services/bedrock.py +0 -0
  34. lionagi/_services/everlyai.py +0 -0
  35. lionagi/_services/gemini.py +0 -0
  36. lionagi/_services/gpt4all.py +0 -0
  37. lionagi/_services/huggingface.py +0 -0
  38. lionagi/_services/litellm.py +0 -33
  39. lionagi/_services/localai.py +0 -0
  40. lionagi/_services/openllm.py +0 -0
  41. lionagi/_services/openrouter.py +0 -44
  42. lionagi/_services/perplexity.py +0 -0
  43. lionagi/_services/predibase.py +0 -0
  44. lionagi/_services/rungpt.py +0 -0
  45. lionagi/_services/vllm.py +0 -0
  46. lionagi/_services/xinference.py +0 -0
  47. lionagi/agents/planner.py +0 -1
  48. lionagi/agents/prompter.py +0 -1
  49. lionagi/agents/scorer.py +0 -1
  50. lionagi/agents/summarizer.py +0 -1
  51. lionagi/agents/validator.py +0 -1
  52. lionagi/bridge/__init__.py +0 -22
  53. lionagi/bridge/langchain.py +0 -195
  54. lionagi/bridge/llama_index.py +0 -266
  55. lionagi/core/branch/__init__.py +0 -0
  56. lionagi/core/branch/branch.py +0 -841
  57. lionagi/core/branch/cluster.py +0 -1
  58. lionagi/core/branch/conversation.py +0 -787
  59. lionagi/core/core_util.py +0 -0
  60. lionagi/core/flow/__init__.py +0 -0
  61. lionagi/core/flow/flow.py +0 -19
  62. lionagi/core/flow/flow_util.py +0 -62
  63. lionagi/core/instruction_set/__init__.py +0 -0
  64. lionagi/core/messages/__init__.py +0 -0
  65. lionagi/core/sessions/__init__.py +0 -0
  66. lionagi/core/sessions/session.py +0 -504
  67. lionagi/datastores/__init__.py +0 -1
  68. lionagi/datastores/chroma.py +0 -1
  69. lionagi/datastores/deeplake.py +0 -1
  70. lionagi/datastores/elasticsearch.py +0 -1
  71. lionagi/datastores/lantern.py +0 -1
  72. lionagi/datastores/pinecone.py +0 -1
  73. lionagi/datastores/postgres.py +0 -1
  74. lionagi/datastores/qdrant.py +0 -1
  75. lionagi/loaders/__init__.py +0 -18
  76. lionagi/loaders/chunker.py +0 -166
  77. lionagi/loaders/load_util.py +0 -240
  78. lionagi/loaders/reader.py +0 -122
  79. lionagi/models/__init__.py +0 -0
  80. lionagi/models/base_model.py +0 -0
  81. lionagi/models/imodel.py +0 -53
  82. lionagi/schema/async_queue.py +0 -158
  83. lionagi/schema/base_condition.py +0 -1
  84. lionagi/schema/base_node.py +0 -422
  85. lionagi/schema/base_tool.py +0 -44
  86. lionagi/schema/data_logger.py +0 -126
  87. lionagi/schema/data_node.py +0 -88
  88. lionagi/schema/status_tracker.py +0 -37
  89. lionagi/tests/test_utils/test_encrypt_util.py +0 -323
  90. lionagi/utils/encrypt_util.py +0 -283
  91. lionagi/utils/url_util.py +0 -55
  92. lionagi-0.0.208.dist-info/RECORD +0 -106
  93. lionagi/{agents → api_service}/__init__.py +0 -0
  94. lionagi/core/{branch/branch_manager.py → branch_manager.py} +0 -0
  95. lionagi/core/{messages/messages.py → messages.py} +3 -3
  96. /lionagi/{_services → services}/__init__.py +0 -0
  97. /lionagi/{_services → services}/mistralai.py +0 -0
  98. /lionagi/{_services → services}/mlx_service.py +0 -0
  99. /lionagi/{_services → services}/ollama.py +0 -0
  100. /lionagi/{_services → services}/services.py +0 -0
  101. /lionagi/{_services → services}/transformers.py +0 -0
  102. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/LICENSE +0 -0
  103. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/WHEEL +0 -0
  104. {lionagi-0.0.208.dist-info → lionagi-0.0.210.dist-info}/top_level.txt +0 -0
@@ -1,787 +0,0 @@
1
- import json
2
- import pandas as pd
3
- from datetime import datetime
4
- from typing import Any, Optional, Dict, Union
5
-
6
- from lionagi.utils.sys_util import as_dict, create_copy, strip_lower, to_df
7
- from lionagi.utils.call_util import lcall
8
- from ..messages.messages import Message, System, Instruction, Response
9
-
10
- class Conversation:
11
- """
12
- A class to represent a conversation, encapsulating messages within a pandas DataFrame.
13
-
14
- Attributes:
15
- messages (pd.DataFrame): A DataFrame holding conversation messages with columns specified in _cols.
16
- """
17
-
18
- _cols = ["node_id", "role", "sender", "timestamp", "content"]
19
-
20
- def __init__(self) -> None:
21
- """
22
- Initializes a Conversation instance with an empty DataFrame structured to hold messages.
23
- """
24
- self.messages = pd.DataFrame(columns=Conversation._cols)
25
-
26
- @classmethod
27
- def from_csv(cls, filepath: str, **kwargs) -> 'Conversation':
28
- """
29
- Create a Conversation instance from a CSV file containing messages.
30
-
31
- Args:
32
- filepath (str): The path to the CSV file to be loaded.
33
- **kwargs: Additional keyword arguments passed to pandas.read_csv function.
34
-
35
- Returns:
36
- Conversation: An instance of Conversation with messages loaded from the specified CSV file.
37
- """
38
- messages = pd.read_csv(filepath)
39
- messages = to_df(messages)
40
- self = cls(messages=messages, **kwargs)
41
- return self
42
-
43
- @classmethod
44
- def from_json(cls, filepath: str, **kwargs) -> 'Conversation':
45
- """
46
- Create a Conversation instance from a JSON file containing messages.
47
-
48
- Args:
49
- filepath (str): The path to the JSON file to be loaded.
50
- **kwargs: Additional keyword arguments passed to pandas.read_json function.
51
-
52
- Returns:
53
- Conversation: An instance of Conversation with messages loaded from the specified JSON file.
54
- """
55
- messages = pd.read_json(filepath, orient="records", lines=True)
56
- messages = to_df(messages)
57
- self = cls(messages=messages, **kwargs)
58
- return self
59
-
60
- @property
61
- def last_row(self) -> pd.Series:
62
- """
63
- Retrieve the last row from the conversation messages as a pandas Series.
64
-
65
- Returns:
66
- pd.Series: The last message in the conversation.
67
- """
68
- return get_rows(self.messages, n=1, from_='last')
69
-
70
- @property
71
- def first_system(self) -> pd.Series:
72
- """
73
- Retrieve the first system message from the conversation.
74
-
75
- Returns:
76
- pd.Series: The first message in the conversation where the role is 'system'.
77
- """
78
- return get_rows(self.messages, role='system', n=1, from_='front')
79
-
80
- @property
81
- def last_response(self) -> pd.Series:
82
- """
83
- Retrieve the last response message from the conversation.
84
-
85
- Returns:
86
- pd.Series: The last message in the conversation where the role is 'assistant'.
87
- """
88
- return get_rows(self.messages, role='assistant', n=1, from_='last')
89
-
90
- @property
91
- def last_response_content(self) -> Dict:
92
- """
93
- Retrieve the last response message content from the conversation.
94
-
95
- Returns:
96
- pd.Series: The last message in the conversation where the role is 'assistant'.
97
- """
98
- return as_dict(self.last_response.content.iloc[-1])
99
-
100
- @property
101
- def last_instruction(self) -> pd.Series:
102
- """
103
- Retrieve the last instruction message from the conversation.
104
-
105
- Returns:
106
- pd.Series: The last message in the conversation where the role is 'user'.
107
- """
108
- return get_rows(self.messages, role='user', n=1, from_='last')
109
-
110
- @property
111
- def last_action_request(self):
112
- """
113
- Retrieve the last action request message from the conversation.
114
-
115
- Returns:
116
- pd.Series: The last message in the conversation with sender 'action_request'.
117
- """
118
- return get_rows(self.messages, sender='action_request', n=1, from_='last')
119
-
120
- @property
121
- def last_action_response(self):
122
- """
123
- Retrieve the last action response message from the conversation.
124
-
125
- Returns:
126
- pd.Series: The last message in the conversation with sender 'action_response'.
127
- """
128
- return get_rows(self.messages, sender='action_response', n=1, from_='last')
129
-
130
- @property
131
- def len_messages(self):
132
- """
133
- Get the total number of messages in the conversation.
134
-
135
- Returns:
136
- int: The total number of messages.
137
- """
138
- return len(self.messages)
139
-
140
- @property
141
- def len_instructions(self):
142
- """
143
- Get the total number of instruction messages (messages with role 'user') in the conversation.
144
-
145
- Returns:
146
- int: The total number of instruction messages.
147
- """
148
- return len(self.messages[self.messages.role == 'user'])
149
-
150
- @property
151
- def len_responses(self):
152
- """
153
- Get the total number of response messages (messages with role 'assistant') in the conversation.
154
-
155
- Returns:
156
- int: The total number of response messages.
157
- """
158
-
159
- return len(self.messages[self.messages.role == 'assistant'])
160
-
161
- @property
162
- def len_systems(self):
163
- """
164
- Get the total number of system messages (messages with role 'system') in the conversation.
165
-
166
- Returns:
167
- int: The total number of system messages.
168
- """
169
- return len(self.messages[self.messages.role == 'system'])
170
-
171
- @property
172
- def info(self):
173
- """
174
- Get a summary of the conversation messages categorized by role.
175
-
176
- Returns:
177
- Dict[str, int]: A dictionary with keys as message roles and values as counts.
178
- """
179
-
180
- return self._info()
181
-
182
- @property
183
- def sender_info(self):
184
- """
185
- Provides a descriptive summary of the conversation, including the total number of messages,
186
- a summary by role, and the first five messages.
187
-
188
- Returns:
189
- Dict[str, Any]: A dictionary containing the total number of messages, summary by role,
190
- and a list of the first five message dictionaries.
191
- """
192
- return self._info(use_sender=True)
193
-
194
- @property
195
- def describe(self) -> Dict[str, Any]:
196
- """
197
- Provides a descriptive summary of the conversation, including the total number of messages,
198
- a summary by role, and the first five messages.
199
-
200
- Returns:
201
- Dict[str, Any]: A dictionary containing the total number of messages, summary by role, and a list of the first maximum five message dictionaries.
202
- """
203
- return {
204
- "total_messages": len(self.messages),
205
- "summary_by_role": self._info(),
206
- "messages": [
207
- msg.to_dict() for _, msg in self.messages.iterrows()
208
- ][: self.len_messages -1 if self.len_messages < 5 else 5],
209
- }
210
-
211
- def clone(self, num: Optional[int] = None) -> 'Conversation':
212
- """
213
- Creates a copy or multiple copies of the current Conversation instance.
214
-
215
- Args:
216
- num (Optional[int], optional): The number of copies to create. If None, a single copy is created.
217
- Defaults to None.
218
-
219
- Returns:
220
- Conversation: A new Conversation instance or a list of Conversation instances if num is specified.
221
- """
222
- cloned = Conversation()
223
- cloned.logger.set_dir(self.logger.dir)
224
- cloned.messages = self.messages.copy()
225
- if num:
226
- return create_copy(cloned, num=num)
227
- return cloned
228
-
229
- def add_message(
230
- self,
231
- system: Optional[Union[dict, list, System]] = None,
232
- instruction: Optional[Union[dict, list, Instruction]] = None,
233
- context: Optional[Union[str, Dict[str, Any]]] = None,
234
- response: Optional[Union[dict, list, Response]] = None,
235
- sender: Optional[str] = None
236
- ) -> None:
237
- """
238
- Adds a message to the conversation.
239
-
240
- Args:
241
- system (Optional[Union[dict, list, System]], optional): System message content or object.
242
- instruction (Optional[Union[dict, list, Instruction]], optional): Instruction message content or object.
243
- context (Optional[Union[str, Dict[str, Any]]], optional): Context for the message.
244
- response (Optional[Union[dict, list, Response]], optional): Response message content or object.
245
- sender (Optional[str], optional): The sender of the message.
246
-
247
- Raises:
248
- ValueError: If the content cannot be converted to a JSON string.
249
- """
250
- msg = self._create_message(
251
- system=system, instruction=instruction,
252
- context=context, response=response, sender=sender
253
- )
254
- message_dict = msg.to_dict()
255
- if isinstance(as_dict(message_dict['content']), dict):
256
- message_dict['content'] = json.dumps(message_dict['content'])
257
- message_dict['timestamp'] = datetime.now().isoformat()
258
- self.messages.loc[len(self.messages)] = message_dict
259
-
260
- def remove_message(self, node_id: str) -> None:
261
- """
262
- Removes a message from the conversation based on its node_id.
263
-
264
- Args:
265
- node_id (str): The node_id of the message to be removed.
266
- """
267
- _remove_message(self.messages, node_id)
268
-
269
- def update_message(
270
- self, value: Any, node_id: Optional[str] = None, col: str = 'node_id'
271
- ) -> None:
272
- """
273
- Updates a message in the conversation based on its node_id.
274
-
275
- Args:
276
- value (Any): The new value to update the message with.
277
- node_id (Optional[str], optional): The node_id of the message to be updated. Defaults to None.
278
- col (str, optional): The column to be updated. Defaults to 'node_id'.
279
-
280
- Returns:
281
- bool: True if the update was successful, False otherwise.
282
- """
283
- return _update_row(self.messages, node_id=node_id, col=col, value=value)
284
-
285
- def change_first_system_message(
286
- self, system: Union[str, Dict[str, Any], System], sender: Optional[str] = None
287
- ):
288
- """
289
- Updates the first system message in the conversation.
290
-
291
- Args:
292
- system (Union[str, Dict[str, Any], System]): The new system message content, which can be a string,
293
- a dictionary of message content, or a System object.
294
- sender (Optional[str], optional): The sender of the system message. Defaults to None.
295
-
296
- Raises:
297
- ValueError: If there are no system messages in the conversation or if the input cannot be
298
- converted into a system message.
299
- """
300
- if self.len_systems == 0:
301
- raise ValueError("There is no system message in the messages.")
302
-
303
- if not isinstance(system, (str, Dict, System)):
304
- raise ValueError("Input cannot be converted into a system message.")
305
-
306
- elif isinstance(system, (str, Dict)):
307
- system = System(system, sender=sender)
308
-
309
- elif isinstance(system, System):
310
- message_dict = system.to_dict()
311
- if sender:
312
- message_dict['sender'] = sender
313
- message_dict['timestamp'] = datetime.now().isoformat()
314
- sys_index = self.messages[self.messages.role == 'system'].index
315
- self.messages.loc[sys_index[0]] = message_dict
316
-
317
- def rollback(self, steps: int) -> None:
318
- """
319
- Removes the last 'n' messages from the conversation.
320
-
321
- Args:
322
- steps (int): The number of messages to remove from the end of the conversation.
323
-
324
- Raises:
325
- ValueError: If 'steps' is not a positive integer or exceeds the number of messages.
326
- """
327
- return _remove_last_n_rows(self.messages, steps)
328
-
329
- def clear_messages(self) -> None:
330
- """
331
- Clears all messages from the conversation, resetting it to an empty state.
332
- """
333
- self.messages = pd.DataFrame(columns=Conversation._cols)
334
-
335
- def to_csv(self, filepath: str, **kwargs) -> None:
336
- """
337
- Exports the conversation messages to a CSV file.
338
-
339
- Args:
340
- filepath (str): The path to the file where the CSV will be saved.
341
- **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_csv() method.
342
- """
343
- self.messages.to_csv(filepath, **kwargs)
344
-
345
- def to_json(self, filepath: str) -> None:
346
- """
347
- Exports the conversation messages to a JSON file.
348
-
349
- Args:
350
- filepath (str): The path to the file where the JSON will be saved.
351
- **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_json() method, such as
352
- 'orient', 'lines', and 'date_format'.
353
-
354
- Note:
355
- The recommended kwargs for compatibility with the from_json class method are
356
- orient='records', lines=True, and date_format='iso'.
357
- """
358
- self.messages.to_json(
359
- filepath, orient="records", lines=True, date_format="iso")
360
-
361
- def replace_keyword(
362
- self,
363
- keyword: str,
364
- replacement: str,
365
- col: str = 'content',
366
- case_sensitive: bool = False
367
- ) -> None:
368
- """
369
- Replaces all occurrences of a keyword in a specified column of the conversation's messages with a given replacement.
370
-
371
- Args:
372
- keyword (str): The keyword to be replaced.
373
- replacement (str): The string to replace the keyword with.
374
- col (str, optional): The column where the replacement should occur. Defaults to 'content'.
375
- case_sensitive (bool, optional): If True, the replacement is case sensitive. Defaults to False.
376
- """
377
- _replace_keyword(
378
- self.messages, keyword, replacement, col=col,
379
- case_sensitive=case_sensitive
380
- )
381
-
382
- def search_keywords(
383
- self,
384
- keywords: Union[str, list],
385
- case_sensitive: bool = False, reset_index: bool = False, dropna: bool = False
386
- ) -> pd.DataFrame:
387
- """
388
- Searches for messages containing specified keywords within the conversation.
389
-
390
- Args:
391
- keywords (Union[str, list]): The keyword(s) to search for within the conversation's messages.
392
- case_sensitive (bool, optional): If True, the search is case sensitive. Defaults to False.
393
- reset_index (bool, optional): If True, resets the index of the resulting DataFrame. Defaults to False.
394
- dropna (bool, optional): If True, drops messages with NA values before searching. Defaults to False.
395
-
396
- Returns:
397
- pd.DataFrame: A DataFrame containing messages that match the search criteria.
398
- """
399
- return _search_keywords(
400
- self.messages, keywords, case_sensitive, reset_index, dropna
401
- )
402
-
403
- def extend(self, messages: pd.DataFrame, **kwargs) -> None:
404
- """
405
- Extends the conversation by appending new messages, optionally avoiding duplicates based on specified criteria.
406
-
407
- Args:
408
- messages (pd.DataFrame): A DataFrame containing new messages to append to the conversation.
409
- **kwargs: Additional keyword arguments for handling duplicates (passed to pandas' drop_duplicates method).
410
- """
411
- self.messages = _extend(self.messages, messages, **kwargs)
412
-
413
- def filter_by(
414
- self,
415
- role: Optional[str] = None,
416
- sender: Optional[str] = None,
417
- start_time: Optional[datetime] = None,
418
- end_time: Optional[datetime] = None,
419
- content_keywords: Optional[Union[str, list]] = None,
420
- case_sensitive: bool = False
421
- ) -> pd.DataFrame:
422
- """
423
- Filters the conversation's messages based on specified criteria such as role, sender, time range, and keywords.
424
-
425
- Args:
426
- role (Optional[str]): Filter messages by role (e.g., 'user', 'assistant', 'system').
427
- sender (Optional[str]): Filter messages by sender.
428
- start_time (Optional[datetime]): Filter messages sent after this time.
429
- end_time (Optional[datetime]): Filter messages sent before this time.
430
- content_keywords (Optional[Union[str, list]]): Filter messages containing these keywords.
431
- case_sensitive (bool, optional): If True, keyword search is case sensitive. Defaults to False.
432
-
433
- Returns:
434
- pd.DataFrame: A DataFrame containing messages that match the filter criteria.
435
- """
436
- return _filter_messages_by(
437
- self.messages, role=role, sender=sender,
438
- start_time=start_time, end_time=end_time,
439
- content_keywords=content_keywords, case_sensitive=case_sensitive
440
- )
441
-
442
- def _create_message(
443
- self,
444
- system: Optional[Union[dict, list, System]] = None,
445
- instruction: Optional[Union[dict, list, Instruction]] = None,
446
- context: Optional[Union[str, Dict[str, Any]]] = None,
447
- response: Optional[Union[dict, list, Response]] = None,
448
- sender: Optional[str] = None
449
- ) -> Message:
450
- """
451
- Creates a message object based on the given parameters, ensuring only one message type is specified.
452
-
453
- Args:
454
- system (Optional[Union[dict, list, System]]): System message to be added.
455
- instruction (Optional[Union[dict, list, Instruction]]): Instruction message to be added.
456
- context (Optional[Union[str, Dict[str, Any]]]): Context for the instruction message.
457
- response (Optional[Union[dict, list, Response]]): Response message to be added.
458
- sender (Optional[str]): The sender of the message.
459
-
460
- Returns:
461
- Message: A Message object created from the provided parameters.
462
-
463
- Raises:
464
- ValueError: If more than one message type is specified or if the parameters do not form a valid message.
465
- """
466
- if sum(lcall([system, instruction, response], bool)) != 1:
467
- raise ValueError("Error: Message must have one and only one role.")
468
-
469
- else:
470
- if isinstance(any([system, instruction, response]), Message):
471
- if system:
472
- return system
473
- elif instruction:
474
- return instruction
475
- elif response:
476
- return response
477
-
478
- msg = 0
479
- if response:
480
- msg = Response(response=response, sender=sender)
481
- elif instruction:
482
- msg = Instruction(instruction=instruction,
483
- context=context, sender=sender)
484
- elif system:
485
- msg = System(system=system, sender=sender)
486
- return msg
487
-
488
- def _info(self, use_sender: bool = False) -> Dict[str, int]:
489
- """
490
- Generates a summary of the conversation's messages, either by role or sender.
491
-
492
- Args:
493
- use_sender (bool, optional): If True, generates the summary based on sender. If False, uses role. Defaults to False.
494
-
495
- Returns:
496
- Dict[str, int]: A dictionary with counts of messages, categorized either by role or sender.
497
- """
498
- messages = self.messages['sender'] if use_sender else self.messages['role']
499
- result = messages.value_counts().to_dict()
500
- result['total'] = len(self.len_messages)
501
- return result
502
-
503
- def validate_messages(messages):
504
- """
505
- Validates the structure and content of a DataFrame containing conversation messages.
506
-
507
- Args:
508
- messages (pd.DataFrame): The DataFrame containing conversation messages to validate.
509
-
510
- Returns:
511
- bool: True if the DataFrame is valid, raises a ValueError otherwise.
512
-
513
- Raises:
514
- ValueError: If the DataFrame has unmatched columns, contains null values, has an unsupported role, or
515
- if the content cannot be parsed as a JSON string.
516
- """
517
- if list(messages.columns) != ['node_id', 'role', 'sender', 'timestamp', 'content']:
518
- raise ValueError('Invalid messages dataframe. Unmatched columns.')
519
- if messages.isnull().values.any():
520
- raise ValueError('Invalid messages dataframe. Cannot have null.')
521
- if not all(role in ['system', 'user', 'assistant'] for role in messages['role'].unique()):
522
- raise ValueError('Invalid messages dataframe. Cannot have role other than ["system", "user", "assistant"].')
523
- for cont in messages['content']:
524
- if cont.startswith('Sender'):
525
- cont = cont.split(':', 1)[1]
526
- try:
527
- json.loads(cont)
528
- except:
529
- raise ValueError('Invalid messages dataframe. Content expect json string.')
530
- return True
531
-
532
- def _sign_message(messages, sender: str):
533
- """
534
- Prefixes each message in the DataFrame with 'Sender <sender>:' to indicate the message's origin.
535
-
536
- Args:
537
- messages (pd.DataFrame): The DataFrame containing conversation messages to sign.
538
- sender (str): The name or identifier of the sender to prefix the messages with.
539
-
540
- Returns:
541
- pd.DataFrame: The DataFrame with updated messages signed by the specified sender.
542
-
543
- Raises:
544
- ValueError: If the sender is None or equivalent to the string 'none'.
545
- """
546
- if sender is None or strip_lower(sender) == 'none':
547
- raise ValueError("sender cannot be None")
548
- df = messages.copy()
549
-
550
- for i in df.index:
551
- if not df.loc[i, 'content'].startswith('Sender'):
552
- df.loc[i, 'content'] = f"Sender {sender}: {df.loc[i, 'content']}"
553
- else:
554
- content = df.loc[i, 'content'].split(':', 1)[1]
555
- df.loc[i, 'content'] = f"Sender {sender}: {content}"
556
-
557
- return to_df(df)
558
-
559
- def _search_keywords(
560
- messages,
561
- keywords: Union[str, list],
562
- case_sensitive: bool = False, reset_index=False, dropna=False
563
- ):
564
- """
565
- Searches for keywords in the 'content' column of a DataFrame and returns matching rows.
566
-
567
- Args:
568
- messages (pd.DataFrame): The DataFrame to search within.
569
- keywords (Union[str, List[str]]): Keyword(s) to search for. If a list, combines keywords with an OR condition.
570
- case_sensitive (bool, optional): Whether the search should be case-sensitive. Defaults to False.
571
- reset_index (bool, optional): Whether to reset the index of the resulting DataFrame. Defaults to False.
572
- dropna (bool, optional): Whether to drop rows with NA values in the 'content' column. Defaults to False.
573
-
574
- Returns:
575
- pd.DataFrame: A DataFrame containing rows where the 'content' column matches the search criteria.
576
- """
577
- out = ''
578
- if isinstance(keywords, list):
579
- keywords = '|'.join(keywords)
580
- if not case_sensitive:
581
- out = messages[
582
- messages["content"].str.contains(keywords, case=False)
583
- ]
584
- out = messages[messages["content"].str.contains(keywords)]
585
- if reset_index or dropna:
586
- out = to_df(out, reset_index=reset_index)
587
- return out
588
-
589
- def _filter_messages_by(
590
- messages,
591
- role: Optional[str] = None,
592
- sender: Optional[str] = None,
593
- start_time: Optional[datetime] = None,
594
- end_time: Optional[datetime] = None,
595
- content_keywords: Optional[Union[str, list]] = None,
596
- case_sensitive: bool = False
597
- ) -> pd.DataFrame:
598
- """
599
- Filters messages in a DataFrame based on specified criteria such as role, sender, time range, and keywords.
600
-
601
- Args:
602
- messages (pd.DataFrame): The DataFrame of messages to filter.
603
- role (Optional[str]): The role to filter messages by (e.g., 'user', 'assistant').
604
- sender (Optional[str]): The sender to filter messages by.
605
- start_time (Optional[datetime]): The start time for filtering messages.
606
- end_time (Optional[datetime]): The end time for filtering messages.
607
- content_keywords (Optional[Union[str, list]]): Keywords to filter messages by content.
608
- case_sensitive (bool): Determines if the keyword search should be case-sensitive.
609
-
610
- Returns:
611
- pd.DataFrame: A DataFrame containing messages that match the filter criteria.
612
-
613
- Raises:
614
- ValueError: If an error occurs during the filtering process.
615
- """
616
-
617
- try:
618
- outs = messages.copy()
619
-
620
- if content_keywords:
621
- outs = _search_keywords(content_keywords, case_sensitive)
622
-
623
- outs = outs[outs['role'] == role] if role else outs
624
- outs = outs[outs['sender'] == sender] if sender else outs
625
- outs = outs[outs['timestamp'] > start_time] if start_time else outs
626
- outs = outs[outs['timestamp'] < end_time] if end_time else outs
627
-
628
- return to_df(outs)
629
-
630
- except Exception as e:
631
- raise ValueError(f"Error in filtering messages: {e}")
632
-
633
- def _replace_keyword(
634
- df,
635
- keyword: str,
636
- replacement: str,
637
- col='content',
638
- case_sensitive: bool = False
639
- ) -> None:
640
- """
641
- Replaces occurrences of a keyword within a specified column of a DataFrame with a given replacement.
642
-
643
- Args:
644
- df (pd.DataFrame): The DataFrame to operate on.
645
- keyword (str): The keyword to search for and replace.
646
- replacement (str): The string to replace the keyword with.
647
- col (str): The column to search for the keyword in.
648
- case_sensitive (bool): If True, the search and replacement are case-sensitive.
649
-
650
- Returns:
651
- None: This function modifies the DataFrame in place.
652
- """
653
- if not case_sensitive:
654
- df[col] = df[col].str.replace(
655
- keyword, replacement, case=False
656
- )
657
- else:
658
- df[col] = df[col].str.replace(
659
- keyword, replacement
660
- )
661
-
662
- def _remove_message(df, node_id: str) -> bool:
663
- """
664
- Removes a message from the DataFrame based on its node_id.
665
-
666
- Args:
667
- df (pd.DataFrame): The DataFrame from which the message should be removed.
668
- node_id (str): The node_id of the message to be removed.
669
-
670
- Returns:
671
- bool: True if the message was successfully removed, False otherwise.
672
- """
673
- initial_length = len(df)
674
- df = df[df["node_id"] != node_id]
675
-
676
- return len(df) < initial_length
677
-
678
- def _update_row(
679
- df, node_id = None, col = "node_id", value = None
680
- ) -> bool:
681
- """
682
- Updates the value of a specified column for a row identified by node_id in a DataFrame.
683
-
684
- Args:
685
- df (pd.DataFrame): The DataFrame to update.
686
- node_id (Optional[str]): The node_id of the row to be updated.
687
- col (str): The column to update.
688
- value (Any): The new value to be assigned to the column.
689
-
690
- Returns:
691
- bool: True if the update was successful, False otherwise.
692
- """
693
- index = df.index[df[col] == node_id].tolist()
694
- if index:
695
- df.at[index[0], col] = value
696
- return True
697
- return False
698
-
699
- def _remove_last_n_rows(df, steps: int) -> None:
700
- """
701
- Removes the last 'n' rows from a DataFrame.
702
-
703
- Args:
704
- df (pd.DataFrame): The DataFrame from which rows will be removed.
705
- steps (int): The number of rows to remove.
706
-
707
- Returns:
708
- pd.DataFrame: The DataFrame after the last 'n' rows have been removed.
709
-
710
- Raises:
711
- ValueError: If 'steps' is less than 0 or greater than the number of rows in the DataFrame.
712
- """
713
- if steps < 0 or steps > len(df):
714
- raise ValueError("Steps must be a non-negative integer less than or equal to the number of messages.")
715
- df = to_df(df[:-steps])
716
-
717
- def get_rows(
718
- df,
719
- sender: Optional[str] = None,
720
- role: Optional[str] = None,
721
- n: int = 1,
722
- sign_ = False,
723
- from_="front",
724
- ) -> pd.DataFrame:
725
- """
726
- Retrieves rows from a DataFrame based on specified sender, role, and quantity, optionally signing them.
727
-
728
- Args:
729
- df (pd.DataFrame): The DataFrame to retrieve rows from.
730
- sender (Optional[str]): The sender based on which to filter rows.
731
- role (Optional[str]): The role based on which to filter rows.
732
- n (int): The number of rows to retrieve.
733
- sign_ (bool): Whether to sign the retrieved rows.
734
- from_ (str): Direction to retrieve rows ('front' for the first rows, 'last' for the last rows).
735
-
736
- Returns:
737
- pd.DataFrame: A DataFrame containing the retrieved rows.
738
- """
739
-
740
- if from_ == "last":
741
- if sender is None and role is None:
742
- outs = df.iloc[-n:]
743
- elif sender and role:
744
- outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[-n:]
745
- elif sender:
746
- outs = df[df['sender'] == sender].iloc[-n:]
747
- else:
748
- outs = df[df['role'] == role].iloc[-n:]
749
-
750
- elif from_ == "front":
751
- if sender is None and role is None:
752
- outs = df.iloc[:n]
753
- elif sender and role:
754
- outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[:n]
755
- elif sender:
756
- outs = df[df['sender'] == sender].iloc[:n]
757
- else:
758
- outs = df[df['role'] == role].iloc[:n]
759
-
760
- return _sign_message(outs, sender) if sign_ else outs
761
-
762
- def _extend(df1: pd.DataFrame, df2: pd.DataFrame, **kwargs) -> pd.DataFrame:
763
- """
764
- Extends a DataFrame with another DataFrame, optionally removing duplicates based on specified criteria.
765
-
766
- Args:
767
- df1 (pd.DataFrame): The original DataFrame to be extended.
768
- df2 (pd.DataFrame): The DataFrame containing new rows to add to df1.
769
- **kwargs: Additional keyword arguments for pandas.DataFrame.drop_duplicates().
770
-
771
- Returns:
772
- pd.DataFrame: The extended DataFrame after adding rows from df2 and removing duplicates.
773
-
774
- Raises:
775
- ValueError: If an error occurs during the extension process.
776
- """
777
- validate_messages(df2)
778
- try:
779
- if len(df2.dropna(how='all')) > 0 and len(df1.dropna(how='all')) > 0:
780
- df = to_df([df1, df2])
781
- df.drop_duplicates(
782
- inplace=True, subset=['node_id'], keep='first', **kwargs
783
- )
784
- return to_df(df)
785
- except Exception as e:
786
- raise ValueError(f"Error in extending messages: {e}")
787
-