lionagi 0.0.206__py3-none-any.whl → 0.0.208__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,483 +2,786 @@ import json
2
2
  import pandas as pd
3
3
  from datetime import datetime
4
4
  from typing import Any, Optional, Dict, Union
5
- from lionagi.schema import DataLogger
6
- from lionagi.utils import lcall, as_dict
7
- from ..messages.messages import Message, System, Instruction, Response
8
- from ..core_util import sign_message, validate_messages
9
5
 
6
+ from lionagi.utils.sys_util import as_dict, create_copy, strip_lower, to_df
7
+ from lionagi.utils.call_util import lcall
8
+ from ..messages.messages import Message, System, Instruction, Response
10
9
 
11
10
  class Conversation:
12
11
  """
13
- Represents a conversation with messages, tools, and instructions.
14
-
15
- A `Conversation` is a container for messages exchanged in a conversation, as well as tools and instructions
16
- for interacting with external services or tools.
17
-
12
+ A class to represent a conversation, encapsulating messages within a pandas DataFrame.
13
+
18
14
  Attributes:
19
- messages (pd.DataFrame): A DataFrame containing conversation messages.
20
- _logger (DataLogger): An instance of DataLogger for logging.
15
+ messages (pd.DataFrame): A DataFrame holding conversation messages with columns specified in _cols.
21
16
  """
17
+
18
+ _cols = ["node_id", "role", "sender", "timestamp", "content"]
22
19
 
23
- def __init__(self, dir: Optional[str] = None) -> None:
20
+ def __init__(self) -> None:
24
21
  """
25
- Initializes a Conversation object with an empty DataFrame for messages and a DataLogger.
26
-
27
- Args:
28
- dir (Optional[str]): The directory path for storing logs. Defaults to None.
29
-
30
- Examples:
31
- >>> conversation = Conversation(dir='logs/')
22
+ Initializes a Conversation instance with an empty DataFrame structured to hold messages.
32
23
  """
33
- self.messages = pd.DataFrame(columns=["node_id", "role", "sender", "timestamp", "content"])
34
- self.logger = DataLogger(dir=dir)
24
+ self.messages = pd.DataFrame(columns=Conversation._cols)
35
25
 
36
- def _create_message(
37
- self,
38
- system: Optional[Union[dict, list, System]] = None,
39
- instruction: Optional[Union[dict, list, Instruction]] = None,
40
- context: Optional[Union[str, Dict[str, Any]]] = None,
41
- response: Optional[Union[dict, list, Response]] = None,
42
- sender: Optional[str] = None
43
- ) -> Message:
26
+ @classmethod
27
+ def from_csv(cls, filepath: str, **kwargs) -> 'Conversation':
44
28
  """
45
- Creates a Message object based on the given parameters.
46
-
47
- Only one of `system`, `instruction`, or `response` can be provided to create a message.
29
+ Create a Conversation instance from a CSV file containing messages.
48
30
 
49
31
  Args:
50
- system (Optional[Union[dict, list, System]]): The system message content.
51
- instruction (Optional[Union[dict, list, Instruction]]): The instruction content.
52
- context (Optional[Union[str, Dict[str, Any]]]): The context associated with the instruction.
53
- response (Optional[Union[dict, list, Response]]): The response content.
54
- sender (Optional[str]): The sender of the message.
32
+ filepath (str): The path to the CSV file to be loaded.
33
+ **kwargs: Additional keyword arguments passed to pandas.read_csv function.
55
34
 
56
35
  Returns:
57
- Message: A message object created from the provided parameters.
58
-
59
- Raises:
60
- ValueError: If more than one or none of the parameters (system, instruction, response) are provided.
61
-
62
- Examples:
63
- >>> conversation = Conversation()
64
- >>> msg = conversation._create_message(system="System message", sender="system")
36
+ Conversation: An instance of Conversation with messages loaded from the specified CSV file.
65
37
  """
66
-
67
- if sum(lcall([system, instruction, response], bool)) != 1:
68
- raise ValueError("Error: Message must have one and only one role.")
69
- else:
70
- if isinstance(any([system, instruction, response]), Message):
71
- if system:
72
- return system
73
- elif instruction:
74
- return instruction
75
- elif response:
76
- return response
77
-
78
- msg = 0
79
- if response:
80
- msg = Response(response=response, sender=sender)
81
- elif instruction:
82
- msg = Instruction(instruction=instruction,
83
- context=context, sender=sender)
84
- elif system:
85
- msg = System(system=system, sender=sender)
86
- return msg
87
-
88
- def add_message(
89
- self,
90
- system: Optional[Union[dict, list, System]] = None,
91
- instruction: Optional[Union[dict, list, Instruction]] = None,
92
- context: Optional[Union[str, Dict[str, Any]]] = None,
93
- response: Optional[Union[dict, list, Response]] = None,
94
- sender: Optional[str] = None
95
- ) -> None:
38
+ messages = pd.read_csv(filepath)
39
+ messages = to_df(messages)
40
+ self = cls(messages=messages, **kwargs)
41
+ return self
42
+
43
+ @classmethod
44
+ def from_json(cls, filepath: str, **kwargs) -> 'Conversation':
96
45
  """
97
- Creates and adds a new message to the conversation's messages DataFrame.
46
+ Create a Conversation instance from a JSON file containing messages.
47
+
98
48
  Args:
99
- system (Optional[System]): Content for a system message.
100
- instruction (Optional[Instruction]): Content for an instruction message.
101
- context (Optional[Union[str, Dict[str, Any]]]): Context for the instruction message.
102
- response (Optional[Response]): Content for a response message.
103
- sender (Optional[str]): The sender of the message.
49
+ filepath (str): The path to the JSON file to be loaded.
50
+ **kwargs: Additional keyword arguments passed to pandas.read_json function.
104
51
 
105
- Examples:
106
- >>> conversation = Conversation()
107
- >>> conversation.add_message(instruction="What's the weather?", sender="user")
52
+ Returns:
53
+ Conversation: An instance of Conversation with messages loaded from the specified JSON file.
108
54
  """
109
- msg = self._create_message(
110
- system=system, instruction=instruction,
111
- context=context, response=response, sender=sender
112
- )
113
- message_dict = msg.to_dict()
114
- if isinstance(as_dict(message_dict['content']), dict):
115
- message_dict['content'] = json.dumps(message_dict['content'])
116
- message_dict['timestamp'] = datetime.now()
117
- self.messages.loc[len(self.messages)] = message_dict
118
-
55
+ messages = pd.read_json(filepath, orient="records", lines=True)
56
+ messages = to_df(messages)
57
+ self = cls(messages=messages, **kwargs)
58
+ return self
59
+
119
60
  @property
120
61
  def last_row(self) -> pd.Series:
121
62
  """
122
- Retrieves the last row from the messages DataFrame.
63
+ Retrieve the last row from the conversation messages as a pandas Series.
123
64
 
124
65
  Returns:
125
- pd.Series: A Series object representing the last message.
66
+ pd.Series: The last message in the conversation.
126
67
  """
127
- return self.messages.iloc[-1]
68
+ return get_rows(self.messages, n=1, from_='last')
128
69
 
129
70
  @property
130
71
  def first_system(self) -> pd.Series:
131
72
  """
132
- Retrieves the first system message from the messages DataFrame.
73
+ Retrieve the first system message from the conversation.
133
74
 
134
75
  Returns:
135
- pd.Series: A Series object representing the first system message.
76
+ pd.Series: The first message in the conversation where the role is 'system'.
136
77
  """
137
- return self.messages[self.messages.role == 'system'].iloc[0]
78
+ return get_rows(self.messages, role='system', n=1, from_='front')
138
79
 
139
80
  @property
140
81
  def last_response(self) -> pd.Series:
141
82
  """
142
- Retrieves the last response message from the messages DataFrame.
83
+ Retrieve the last response message from the conversation.
143
84
 
144
85
  Returns:
145
- pd.Series: A Series object representing the last response message.
86
+ pd.Series: The last message in the conversation where the role is 'assistant'.
146
87
  """
147
- return self.get_last_rows(role='assistant')
148
-
88
+ return get_rows(self.messages, role='assistant', n=1, from_='last')
89
+
149
90
  @property
150
- def last_instruction(self) -> pd.Series:
91
+ def last_response_content(self) -> Dict:
151
92
  """
152
- Retrieves the last instruction message from the messages DataFrame.
93
+ Retrieve the last response message content from the conversation.
153
94
 
154
95
  Returns:
155
- pd.Series: A Series object representing the last instruction message.
96
+ pd.Series: The last message in the conversation where the role is 'assistant'.
156
97
  """
157
- return self.get_last_rows(role='user')
158
-
159
- def get_last_rows(
160
- self,
161
- sender: Optional[str] = None,
162
- role: Optional[str] = None,
163
- n: int = 1,
164
- sign_ = False
165
- ) -> Union[pd.DataFrame, pd.Series]:
166
- """
167
- Retrieves the last n rows from the messages DataFrame filtered by sender or role.
98
+ return as_dict(self.last_response.content.iloc[-1])
168
99
 
169
- Args:
170
- sender (Optional[str]): The sender filter for the messages.
171
- role (Optional[str]): The role filter for the messages.
172
- n (int): The number of rows to retrieve.
173
- sign_: If sign messages with a sender identifier.
100
+ @property
101
+ def last_instruction(self) -> pd.Series:
102
+ """
103
+ Retrieve the last instruction message from the conversation.
174
104
 
175
105
  Returns:
176
- Union[pd.DataFrame, pd.Series]: The last n messages as a DataFrame or a single message as a Series.
177
-
178
- Raises:
179
- ValueError: If both sender and role are provided or if none is provided.
106
+ pd.Series: The last message in the conversation where the role is 'user'.
180
107
  """
108
+ return get_rows(self.messages, role='user', n=1, from_='last')
181
109
 
182
- if sender is None and role is None:
183
- outs = self.messages.iloc[-n:]
184
- elif sender and role:
185
- outs = self.messages[(self.messages['sender'] == sender) & (self.messages['role'] == role)].iloc[-n:]
186
- elif sender:
187
- outs = self.messages[self.messages['sender'] == sender].iloc[-n:]
188
- else:
189
- outs = self.messages[self.messages['role'] == role].iloc[-n:]
190
-
191
- return sign_message(outs, sender) if sign_ else outs
192
-
193
- def filter_messages_by(
194
- self,
195
- role: Optional[str] = None,
196
- sender: Optional[str] = None,
197
- start_time: Optional[datetime] = None,
198
- end_time: Optional[datetime] = None,
199
- content_keywords: Optional[Union[str, list]] = None,
200
- case_sensitive: bool = False
201
- ) -> pd.DataFrame:
110
+ @property
111
+ def last_action_request(self):
202
112
  """
203
- Retrieves messages filtered by a specific criterion.
204
-
205
- Args:
206
- role (Optional[str]): The role to filter the messages.
207
- sender (Optional[str]): The sender to filter the messages.
208
- start_time (Optional[datetime]): The start time to filter the messages.
209
- end_time (Optional[datetime]): The end time to filter the messages.
210
- content_keywords (Optional[Union[str, list]]): The content to filter the messages.
211
- case_sensitive (bool): Flag to indicate if the search should be case sensitive. Defaults to False.
113
+ Retrieve the last action request message from the conversation.
212
114
 
213
115
  Returns:
214
- pd.DataFrame: A DataFrame containing filtered messages.
215
-
216
- Raises:
217
- ValueError: If more than one or none of the filtering criteria are provided.
116
+ pd.Series: The last message in the conversation with sender 'action_request'.
218
117
  """
219
- outs = self.messages.copy()
220
-
221
- if content_keywords:
222
- outs = self.search_keywords(content_keywords, case_sensitive)
223
-
224
- outs = outs[outs['role'] == role] if role else outs
225
- outs = outs[outs['sender'] == sender] if sender else outs
226
- outs = outs[outs['timestamp'] > start_time] if start_time else outs
227
- outs = outs[outs['timestamp'] < end_time] if end_time else outs
228
- return outs
229
-
230
- def replace_keyword(
231
- self,
232
- keyword: str,
233
- replacement: str,
234
- case_sensitive: bool = False
235
- ) -> None:
118
+ return get_rows(self.messages, sender='action_request', n=1, from_='last')
119
+
120
+ @property
121
+ def last_action_response(self):
236
122
  """
237
- Replaces a keyword in the content of all messages with a replacement string.
123
+ Retrieve the last action response message from the conversation.
238
124
 
239
- Args:
240
- keyword (str): The keyword to replace.
241
- replacement (str): The string to replace the keyword with.
242
- case_sensitive (bool, optional): Flag to indicate if the replacement should be case sensitive. Defaults to False.
125
+ Returns:
126
+ pd.Series: The last message in the conversation with sender 'action_response'.
243
127
  """
244
- if not case_sensitive:
245
- self.messages["content"] = self.messages["content"].str.replace(
246
- keyword, replacement, case=False
247
- )
248
- else:
249
- self.messages["content"] = self.messages["content"].str.replace(
250
- keyword, replacement
251
- )
128
+ return get_rows(self.messages, sender='action_response', n=1, from_='last')
252
129
 
253
- def search_keywords(
254
- self,
255
- keywords: Union[str, list],
256
- case_sensitive: bool = False
257
- ) -> pd.DataFrame:
130
+ @property
131
+ def len_messages(self):
258
132
  """
259
- Searches for a keyword in the content of all messages and returns the messages containing it.
260
-
261
- Args:
262
- keywords (str): The keywords to search for.
263
- case_sensitive (bool, optional): Flag to indicate if the search should be case sensitive. Defaults to False.
133
+ Get the total number of messages in the conversation.
264
134
 
265
135
  Returns:
266
- pd.DataFrame: A DataFrame containing messages with the specified keyword.
136
+ int: The total number of messages.
267
137
  """
268
- if isinstance(keywords, list):
269
- keywords = '|'.join(keywords)
270
- if not case_sensitive:
271
- return self.messages[
272
- self.messages["content"].str.contains(keywords, case=False)
273
- ]
274
- return self.messages[self.messages["content"].str.contains(keywords)]
275
-
276
- def remove_from_messages(self, message_id: str) -> bool:
138
+ return len(self.messages)
139
+
140
+ @property
141
+ def len_instructions(self):
277
142
  """
278
- Removes a message from the conversation based on its message ID.
279
-
280
- Args:
281
- message_id (str): The ID of the message to be removed.
143
+ Get the total number of instruction messages (messages with role 'user') in the conversation.
282
144
 
283
145
  Returns:
284
- bool: True if the message was successfully removed, False otherwise.
146
+ int: The total number of instruction messages.
285
147
  """
286
- initial_length = len(self.messages)
287
- self.messages = self.messages[self.messages["node_id"] != message_id]
288
- return len(self.messages) < initial_length
148
+ return len(self.messages[self.messages.role == 'user'])
149
+
150
+ @property
151
+ def len_responses(self):
152
+ """
153
+ Get the total number of response messages (messages with role 'assistant') in the conversation.
289
154
 
290
- def update_messages_content(
291
- self,
292
- message_id: str,
293
- col: str,
294
- value: Any
295
- ) -> bool:
155
+ Returns:
156
+ int: The total number of response messages.
296
157
  """
297
- Updates the content of a specific message in the conversation.
298
158
 
299
- Args:
300
- message_id (str): The ID of the message to be updated.
301
- col (str): The column of the message that needs to be updated.
302
- value (Any): The new value to be set for the specified column.
159
+ return len(self.messages[self.messages.role == 'assistant'])
160
+
161
+ @property
162
+ def len_systems(self):
163
+ """
164
+ Get the total number of system messages (messages with role 'system') in the conversation.
303
165
 
304
166
  Returns:
305
- bool: True if the update was successful, False otherwise.
167
+ int: The total number of system messages.
168
+ """
169
+ return len(self.messages[self.messages.role == 'system'])
306
170
 
307
- Examples:
308
- >>> conversation = Conversation()
309
- >>> conversation.add_message(system="Initial message", sender="system")
310
- >>> success = conversation.update_messages_content(
311
- ... message_id="1", col="content", value="Updated message")
171
+ @property
172
+ def info(self):
312
173
  """
313
- index = self.messages.index[self.messages["id_"] == message_id].tolist()
314
- if index:
315
- self.messages.at[index[0], col] = value
316
- return True
317
- return False
174
+ Get a summary of the conversation messages categorized by role.
318
175
 
319
- def info(self, use_sender: bool = False) -> Dict[str, int]:
176
+ Returns:
177
+ Dict[str, int]: A dictionary with keys as message roles and values as counts.
320
178
  """
321
- Provides a summary of the conversation messages.
322
179
 
323
- Args:
324
- use_sender (bool, optional): Determines whether to summarize by sender or by role. Defaults to False.
180
+ return self._info()
181
+
182
+ @property
183
+ def sender_info(self):
184
+ """
185
+ Provides a descriptive summary of the conversation, including the total number of messages,
186
+ a summary by role, and the first five messages.
325
187
 
326
188
  Returns:
327
- Dict[str, int]: A dictionary containing counts of messages either by role or sender.
189
+ Dict[str, Any]: A dictionary containing the total number of messages, summary by role,
190
+ and a list of the first five message dictionaries.
328
191
  """
329
- messages = self.messages['sender'] if use_sender else self.messages['role']
330
- result = messages.value_counts().to_dict()
331
- result['total'] = len(self.messages)
332
- return result
333
-
192
+ return self._info(use_sender=True)
193
+
334
194
  @property
335
195
  def describe(self) -> Dict[str, Any]:
336
196
  """
337
- Describes the conversation with various statistics and information.
197
+ Provides a descriptive summary of the conversation, including the total number of messages,
198
+ a summary by role, and the first five messages.
338
199
 
339
200
  Returns:
340
- Dict[str, Any]: A dictionary containing information such as total number of messages, summary by role,
341
- and individual messages.
201
+ Dict[str, Any]: A dictionary containing the total number of messages, summary by role, and a list of the first maximum five message dictionaries.
342
202
  """
343
203
  return {
344
204
  "total_messages": len(self.messages),
345
- "summary_by_role": self.info(),
205
+ "summary_by_role": self._info(),
346
206
  "messages": [
347
207
  msg.to_dict() for _, msg in self.messages.iterrows()
348
- ],
208
+ ][: self.len_messages -1 if self.len_messages < 5 else 5],
349
209
  }
350
210
 
351
- def history(
352
- self, begin_: Optional[datetime] = None, end_: Optional[datetime] = None
353
- ) -> pd.DataFrame:
211
+ def clone(self, num: Optional[int] = None) -> 'Conversation':
354
212
  """
355
- Retrieves a history of messages within a specified date range.
213
+ Creates a copy or multiple copies of the current Conversation instance.
356
214
 
357
215
  Args:
358
- begin_ (Optional[datetime], optional): The start date of the message history. Defaults to None.
359
- end_ (Optional[datetime], optional): The end date of the message history. Defaults to None.
216
+ num (Optional[int], optional): The number of copies to create. If None, a single copy is created.
217
+ Defaults to None.
360
218
 
361
219
  Returns:
362
- pd.DataFrame: A DataFrame containing messages within the specified date range.
363
- """
364
-
365
- if isinstance(begin_, str):
366
- begin_ = datetime.strptime(begin_, '%Y-%m-%d')
367
- if isinstance(end_, str):
368
- end_ = datetime.strptime(end_, '%Y-%m-%d')
369
- if begin_ and end_:
370
- return self.messages[
371
- (self.messages["timestamp"].dt.date >= begin_.date())
372
- & (self.messages["timestamp"].dt.date <= end_.date())
373
- ]
374
- elif begin_:
375
- return self.messages[(self.messages["timestamp"].dt.date >= begin_.date())]
376
- elif end_:
377
- return self.messages[(self.messages["timestamp"].dt.date <= end_.date())]
378
- return self.messages
379
-
380
- def clone(self) -> 'Conversation':
381
- """
382
- Creates a clone of the current conversation.
383
-
384
- Returns:
385
- Conversation: A new Conversation object that is a clone of the current conversation.
220
+ Conversation: A new Conversation instance or a list of Conversation instances if num is specified.
386
221
  """
387
222
  cloned = Conversation()
388
223
  cloned.logger.set_dir(self.logger.dir)
389
224
  cloned.messages = self.messages.copy()
225
+ if num:
226
+ return create_copy(cloned, num=num)
390
227
  return cloned
391
228
 
392
- # def merge_conversation(self, other: 'Conversation', update: bool = False,) -> None:
393
- # """
394
- # Merges another conversation into the current one.
395
- #
396
- # Args:
397
- # other (Conversation): The other conversation to merge with the current one.
398
- # update (bool, optional): If True, updates the first system message before merging. Defaults to False.
399
- # """
400
- # if update:
401
- # self.first_system = other.first_system.copy()
402
- # df = pd.concat([self.messages.copy(), other.messages.copy()], ignore_index=True)
403
- # self.messages = df.drop_duplicates().reset_index(drop=True, inplace=True)
404
-
405
- def rollback(self, steps: int) -> None:
229
+ def add_message(
230
+ self,
231
+ system: Optional[Union[dict, list, System]] = None,
232
+ instruction: Optional[Union[dict, list, Instruction]] = None,
233
+ context: Optional[Union[str, Dict[str, Any]]] = None,
234
+ response: Optional[Union[dict, list, Response]] = None,
235
+ sender: Optional[str] = None
236
+ ) -> None:
406
237
  """
407
- Rollbacks the conversation by a specified number of steps (messages).
238
+ Adds a message to the conversation.
408
239
 
409
240
  Args:
410
- steps (int): The number of steps to rollback.
241
+ system (Optional[Union[dict, list, System]], optional): System message content or object.
242
+ instruction (Optional[Union[dict, list, Instruction]], optional): Instruction message content or object.
243
+ context (Optional[Union[str, Dict[str, Any]]], optional): Context for the message.
244
+ response (Optional[Union[dict, list, Response]], optional): Response message content or object.
245
+ sender (Optional[str], optional): The sender of the message.
411
246
 
412
247
  Raises:
413
- ValueError: If steps are not a non-negative integer or greater than the number of messages.
248
+ ValueError: If the content cannot be converted to a JSON string.
414
249
  """
415
- if steps < 0 or steps > len(self.messages):
416
- raise ValueError("Steps must be a non-negative integer less than or equal to the number of messages.")
417
- self.messages = self.messages[:-steps].reset_index(drop=True)
250
+ msg = self._create_message(
251
+ system=system, instruction=instruction,
252
+ context=context, response=response, sender=sender
253
+ )
254
+ message_dict = msg.to_dict()
255
+ if isinstance(as_dict(message_dict['content']), dict):
256
+ message_dict['content'] = json.dumps(message_dict['content'])
257
+ message_dict['timestamp'] = datetime.now().isoformat()
258
+ self.messages.loc[len(self.messages)] = message_dict
259
+
260
+ def remove_message(self, node_id: str) -> None:
261
+ """
262
+ Removes a message from the conversation based on its node_id.
418
263
 
419
- def reset(self) -> None:
264
+ Args:
265
+ node_id (str): The node_id of the message to be removed.
420
266
  """
421
- Resets the conversation, clearing all messages.
267
+ _remove_message(self.messages, node_id)
268
+
269
+ def update_message(
270
+ self, value: Any, node_id: Optional[str] = None, col: str = 'node_id'
271
+ ) -> None:
422
272
  """
423
- self.messages = pd.DataFrame(columns=self.messages.columns)
273
+ Updates a message in the conversation based on its node_id.
424
274
 
425
- def to_csv(self, filepath: str, **kwargs) -> None:
275
+ Args:
276
+ value (Any): The new value to update the message with.
277
+ node_id (Optional[str], optional): The node_id of the message to be updated. Defaults to None.
278
+ col (str, optional): The column to be updated. Defaults to 'node_id'.
279
+
280
+ Returns:
281
+ bool: True if the update was successful, False otherwise.
426
282
  """
427
- Exports the conversation messages to a CSV file.
283
+ return _update_row(self.messages, node_id=node_id, col=col, value=value)
284
+
285
+ def change_first_system_message(
286
+ self, system: Union[str, Dict[str, Any], System], sender: Optional[str] = None
287
+ ):
288
+ """
289
+ Updates the first system message in the conversation.
428
290
 
429
291
  Args:
430
- filepath (str): The file path where the CSV will be saved.
431
- **kwargs: Additional keyword arguments for `pandas.DataFrame.to_csv` method.
292
+ system (Union[str, Dict[str, Any], System]): The new system message content, which can be a string,
293
+ a dictionary of message content, or a System object.
294
+ sender (Optional[str], optional): The sender of the system message. Defaults to None.
295
+
296
+ Raises:
297
+ ValueError: If there are no system messages in the conversation or if the input cannot be
298
+ converted into a system message.
432
299
  """
433
- self.messages.to_csv(filepath, **kwargs)
300
+ if self.len_systems == 0:
301
+ raise ValueError("There is no system message in the messages.")
302
+
303
+ if not isinstance(system, (str, Dict, System)):
304
+ raise ValueError("Input cannot be converted into a system message.")
305
+
306
+ elif isinstance(system, (str, Dict)):
307
+ system = System(system, sender=sender)
308
+
309
+ elif isinstance(system, System):
310
+ message_dict = system.to_dict()
311
+ if sender:
312
+ message_dict['sender'] = sender
313
+ message_dict['timestamp'] = datetime.now().isoformat()
314
+ sys_index = self.messages[self.messages.role == 'system'].index
315
+ self.messages.loc[sys_index[0]] = message_dict
434
316
 
435
- def from_csv(self, filepath: str, **kwargs) -> None:
317
+ def rollback(self, steps: int) -> None:
318
+ """
319
+ Removes the last 'n' messages from the conversation.
320
+
321
+ Args:
322
+ steps (int): The number of messages to remove from the end of the conversation.
323
+
324
+ Raises:
325
+ ValueError: If 'steps' is not a positive integer or exceeds the number of messages.
326
+ """
327
+ return _remove_last_n_rows(self.messages, steps)
328
+
329
+ def clear_messages(self) -> None:
330
+ """
331
+ Clears all messages from the conversation, resetting it to an empty state.
332
+ """
333
+ self.messages = pd.DataFrame(columns=Conversation._cols)
334
+
335
+ def to_csv(self, filepath: str, **kwargs) -> None:
436
336
  """
437
- Imports conversation messages from a CSV file.
337
+ Exports the conversation messages to a CSV file.
438
338
 
439
339
  Args:
440
- filepath (str): The file path of the CSV to be read.
441
- **kwargs: Additional keyword arguments for `pandas.read_csv` method.
340
+ filepath (str): The path to the file where the CSV will be saved.
341
+ **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_csv() method.
442
342
  """
443
- self.messages = pd.read_csv(filepath, **kwargs)
343
+ self.messages.to_csv(filepath, **kwargs)
444
344
 
445
345
  def to_json(self, filepath: str) -> None:
446
346
  """
447
347
  Exports the conversation messages to a JSON file.
448
348
 
449
349
  Args:
450
- filepath (str): The file path where the JSON will be saved.
350
+ filepath (str): The path to the file where the JSON will be saved.
351
+ **kwargs: Additional keyword arguments passed to pandas.DataFrame.to_json() method, such as
352
+ 'orient', 'lines', and 'date_format'.
353
+
354
+ Note:
355
+ The recommended kwargs for compatibility with the from_json class method are
356
+ orient='records', lines=True, and date_format='iso'.
451
357
  """
452
358
  self.messages.to_json(
453
359
  filepath, orient="records", lines=True, date_format="iso")
454
360
 
455
- def from_json(self, filepath: str) -> None:
361
+ def replace_keyword(
362
+ self,
363
+ keyword: str,
364
+ replacement: str,
365
+ col: str = 'content',
366
+ case_sensitive: bool = False
367
+ ) -> None:
456
368
  """
457
- Imports conversation messages from a JSON file.
369
+ Replaces all occurrences of a keyword in a specified column of the conversation's messages with a given replacement.
458
370
 
459
371
  Args:
460
- filepath (str): The file path of the JSON to be read.
372
+ keyword (str): The keyword to be replaced.
373
+ replacement (str): The string to replace the keyword with.
374
+ col (str, optional): The column where the replacement should occur. Defaults to 'content'.
375
+ case_sensitive (bool, optional): If True, the replacement is case sensitive. Defaults to False.
461
376
  """
462
- self.reset()
463
- self.messages = pd.read_json(filepath, orient="records", lines=True)
377
+ _replace_keyword(
378
+ self.messages, keyword, replacement, col=col,
379
+ case_sensitive=case_sensitive
380
+ )
381
+
382
+ def search_keywords(
383
+ self,
384
+ keywords: Union[str, list],
385
+ case_sensitive: bool = False, reset_index: bool = False, dropna: bool = False
386
+ ) -> pd.DataFrame:
387
+ """
388
+ Searches for messages containing specified keywords within the conversation.
464
389
 
390
+ Args:
391
+ keywords (Union[str, list]): The keyword(s) to search for within the conversation's messages.
392
+ case_sensitive (bool, optional): If True, the search is case sensitive. Defaults to False.
393
+ reset_index (bool, optional): If True, resets the index of the resulting DataFrame. Defaults to False.
394
+ dropna (bool, optional): If True, drops messages with NA values before searching. Defaults to False.
395
+
396
+ Returns:
397
+ pd.DataFrame: A DataFrame containing messages that match the search criteria.
398
+ """
399
+ return _search_keywords(
400
+ self.messages, keywords, case_sensitive, reset_index, dropna
401
+ )
402
+
465
403
  def extend(self, messages: pd.DataFrame, **kwargs) -> None:
466
404
  """
467
- Extends the current conversation messages with additional messages from a DataFrame.
405
+ Extends the conversation by appending new messages, optionally avoiding duplicates based on specified criteria.
468
406
 
469
407
  Args:
470
- messages (pd.DataFrame): The DataFrame containing messages to be added to the conversation.
471
- kwargs: for pd.df.drop_duplicates
408
+ messages (pd.DataFrame): A DataFrame containing new messages to append to the conversation.
409
+ **kwargs: Additional keyword arguments for handling duplicates (passed to pandas' drop_duplicates method).
472
410
  """
411
+ self.messages = _extend(self.messages, messages, **kwargs)
473
412
 
474
- validate_messages(messages)
413
+ def filter_by(
414
+ self,
415
+ role: Optional[str] = None,
416
+ sender: Optional[str] = None,
417
+ start_time: Optional[datetime] = None,
418
+ end_time: Optional[datetime] = None,
419
+ content_keywords: Optional[Union[str, list]] = None,
420
+ case_sensitive: bool = False
421
+ ) -> pd.DataFrame:
422
+ """
423
+ Filters the conversation's messages based on specified criteria such as role, sender, time range, and keywords.
424
+
425
+ Args:
426
+ role (Optional[str]): Filter messages by role (e.g., 'user', 'assistant', 'system').
427
+ sender (Optional[str]): Filter messages by sender.
428
+ start_time (Optional[datetime]): Filter messages sent after this time.
429
+ end_time (Optional[datetime]): Filter messages sent before this time.
430
+ content_keywords (Optional[Union[str, list]]): Filter messages containing these keywords.
431
+ case_sensitive (bool, optional): If True, keyword search is case sensitive. Defaults to False.
432
+
433
+ Returns:
434
+ pd.DataFrame: A DataFrame containing messages that match the filter criteria.
435
+ """
436
+ return _filter_messages_by(
437
+ self.messages, role=role, sender=sender,
438
+ start_time=start_time, end_time=end_time,
439
+ content_keywords=content_keywords, case_sensitive=case_sensitive
440
+ )
441
+
442
+ def _create_message(
443
+ self,
444
+ system: Optional[Union[dict, list, System]] = None,
445
+ instruction: Optional[Union[dict, list, Instruction]] = None,
446
+ context: Optional[Union[str, Dict[str, Any]]] = None,
447
+ response: Optional[Union[dict, list, Response]] = None,
448
+ sender: Optional[str] = None
449
+ ) -> Message:
450
+ """
451
+ Creates a message object based on the given parameters, ensuring only one message type is specified.
452
+
453
+ Args:
454
+ system (Optional[Union[dict, list, System]]): System message to be added.
455
+ instruction (Optional[Union[dict, list, Instruction]]): Instruction message to be added.
456
+ context (Optional[Union[str, Dict[str, Any]]]): Context for the instruction message.
457
+ response (Optional[Union[dict, list, Response]]): Response message to be added.
458
+ sender (Optional[str]): The sender of the message.
459
+
460
+ Returns:
461
+ Message: A Message object created from the provided parameters.
462
+
463
+ Raises:
464
+ ValueError: If more than one message type is specified or if the parameters do not form a valid message.
465
+ """
466
+ if sum(lcall([system, instruction, response], bool)) != 1:
467
+ raise ValueError("Error: Message must have one and only one role.")
468
+
469
+ else:
470
+ if isinstance(any([system, instruction, response]), Message):
471
+ if system:
472
+ return system
473
+ elif instruction:
474
+ return instruction
475
+ elif response:
476
+ return response
477
+
478
+ msg = 0
479
+ if response:
480
+ msg = Response(response=response, sender=sender)
481
+ elif instruction:
482
+ msg = Instruction(instruction=instruction,
483
+ context=context, sender=sender)
484
+ elif system:
485
+ msg = System(system=system, sender=sender)
486
+ return msg
487
+
488
+ def _info(self, use_sender: bool = False) -> Dict[str, int]:
489
+ """
490
+ Generates a summary of the conversation's messages, either by role or sender.
491
+
492
+ Args:
493
+ use_sender (bool, optional): If True, generates the summary based on sender. If False, uses role. Defaults to False.
494
+
495
+ Returns:
496
+ Dict[str, int]: A dictionary with counts of messages, categorized either by role or sender.
497
+ """
498
+ messages = self.messages['sender'] if use_sender else self.messages['role']
499
+ result = messages.value_counts().to_dict()
500
+ result['total'] = len(self.len_messages)
501
+ return result
502
+
503
+ def validate_messages(messages):
504
+ """
505
+ Validates the structure and content of a DataFrame containing conversation messages.
506
+
507
+ Args:
508
+ messages (pd.DataFrame): The DataFrame containing conversation messages to validate.
509
+
510
+ Returns:
511
+ bool: True if the DataFrame is valid, raises a ValueError otherwise.
512
+
513
+ Raises:
514
+ ValueError: If the DataFrame has unmatched columns, contains null values, has an unsupported role, or
515
+ if the content cannot be parsed as a JSON string.
516
+ """
517
+ if list(messages.columns) != ['node_id', 'role', 'sender', 'timestamp', 'content']:
518
+ raise ValueError('Invalid messages dataframe. Unmatched columns.')
519
+ if messages.isnull().values.any():
520
+ raise ValueError('Invalid messages dataframe. Cannot have null.')
521
+ if not all(role in ['system', 'user', 'assistant'] for role in messages['role'].unique()):
522
+ raise ValueError('Invalid messages dataframe. Cannot have role other than ["system", "user", "assistant"].')
523
+ for cont in messages['content']:
524
+ if cont.startswith('Sender'):
525
+ cont = cont.split(':', 1)[1]
475
526
  try:
476
- if len(messages.dropna(how='all')) > 0 and len(self.messages.dropna(how='all')) > 0:
477
- self.messages = pd.concat([self.messages, messages], ignore_index=True)
478
- self.messages.drop_duplicates(
479
- inplace=True, subset=['node_id'], keep='first', **kwargs
480
- )
481
- self.messages.reset_index(drop=True, inplace=True)
482
- return
483
- except Exception as e:
484
- raise ValueError(f"Error in extending messages: {e}")
527
+ json.loads(cont)
528
+ except:
529
+ raise ValueError('Invalid messages dataframe. Content expect json string.')
530
+ return True
531
+
532
+ def _sign_message(messages, sender: str):
533
+ """
534
+ Prefixes each message in the DataFrame with 'Sender <sender>:' to indicate the message's origin.
535
+
536
+ Args:
537
+ messages (pd.DataFrame): The DataFrame containing conversation messages to sign.
538
+ sender (str): The name or identifier of the sender to prefix the messages with.
539
+
540
+ Returns:
541
+ pd.DataFrame: The DataFrame with updated messages signed by the specified sender.
542
+
543
+ Raises:
544
+ ValueError: If the sender is None or equivalent to the string 'none'.
545
+ """
546
+ if sender is None or strip_lower(sender) == 'none':
547
+ raise ValueError("sender cannot be None")
548
+ df = messages.copy()
549
+
550
+ for i in df.index:
551
+ if not df.loc[i, 'content'].startswith('Sender'):
552
+ df.loc[i, 'content'] = f"Sender {sender}: {df.loc[i, 'content']}"
553
+ else:
554
+ content = df.loc[i, 'content'].split(':', 1)[1]
555
+ df.loc[i, 'content'] = f"Sender {sender}: {content}"
556
+
557
+ return to_df(df)
558
+
559
+ def _search_keywords(
560
+ messages,
561
+ keywords: Union[str, list],
562
+ case_sensitive: bool = False, reset_index=False, dropna=False
563
+ ):
564
+ """
565
+ Searches for keywords in the 'content' column of a DataFrame and returns matching rows.
566
+
567
+ Args:
568
+ messages (pd.DataFrame): The DataFrame to search within.
569
+ keywords (Union[str, List[str]]): Keyword(s) to search for. If a list, combines keywords with an OR condition.
570
+ case_sensitive (bool, optional): Whether the search should be case-sensitive. Defaults to False.
571
+ reset_index (bool, optional): Whether to reset the index of the resulting DataFrame. Defaults to False.
572
+ dropna (bool, optional): Whether to drop rows with NA values in the 'content' column. Defaults to False.
573
+
574
+ Returns:
575
+ pd.DataFrame: A DataFrame containing rows where the 'content' column matches the search criteria.
576
+ """
577
+ out = ''
578
+ if isinstance(keywords, list):
579
+ keywords = '|'.join(keywords)
580
+ if not case_sensitive:
581
+ out = messages[
582
+ messages["content"].str.contains(keywords, case=False)
583
+ ]
584
+ out = messages[messages["content"].str.contains(keywords)]
585
+ if reset_index or dropna:
586
+ out = to_df(out, reset_index=reset_index)
587
+ return out
588
+
589
+ def _filter_messages_by(
590
+ messages,
591
+ role: Optional[str] = None,
592
+ sender: Optional[str] = None,
593
+ start_time: Optional[datetime] = None,
594
+ end_time: Optional[datetime] = None,
595
+ content_keywords: Optional[Union[str, list]] = None,
596
+ case_sensitive: bool = False
597
+ ) -> pd.DataFrame:
598
+ """
599
+ Filters messages in a DataFrame based on specified criteria such as role, sender, time range, and keywords.
600
+
601
+ Args:
602
+ messages (pd.DataFrame): The DataFrame of messages to filter.
603
+ role (Optional[str]): The role to filter messages by (e.g., 'user', 'assistant').
604
+ sender (Optional[str]): The sender to filter messages by.
605
+ start_time (Optional[datetime]): The start time for filtering messages.
606
+ end_time (Optional[datetime]): The end time for filtering messages.
607
+ content_keywords (Optional[Union[str, list]]): Keywords to filter messages by content.
608
+ case_sensitive (bool): Determines if the keyword search should be case-sensitive.
609
+
610
+ Returns:
611
+ pd.DataFrame: A DataFrame containing messages that match the filter criteria.
612
+
613
+ Raises:
614
+ ValueError: If an error occurs during the filtering process.
615
+ """
616
+
617
+ try:
618
+ outs = messages.copy()
619
+
620
+ if content_keywords:
621
+ outs = _search_keywords(content_keywords, case_sensitive)
622
+
623
+ outs = outs[outs['role'] == role] if role else outs
624
+ outs = outs[outs['sender'] == sender] if sender else outs
625
+ outs = outs[outs['timestamp'] > start_time] if start_time else outs
626
+ outs = outs[outs['timestamp'] < end_time] if end_time else outs
627
+
628
+ return to_df(outs)
629
+
630
+ except Exception as e:
631
+ raise ValueError(f"Error in filtering messages: {e}")
632
+
633
+ def _replace_keyword(
634
+ df,
635
+ keyword: str,
636
+ replacement: str,
637
+ col='content',
638
+ case_sensitive: bool = False
639
+ ) -> None:
640
+ """
641
+ Replaces occurrences of a keyword within a specified column of a DataFrame with a given replacement.
642
+
643
+ Args:
644
+ df (pd.DataFrame): The DataFrame to operate on.
645
+ keyword (str): The keyword to search for and replace.
646
+ replacement (str): The string to replace the keyword with.
647
+ col (str): The column to search for the keyword in.
648
+ case_sensitive (bool): If True, the search and replacement are case-sensitive.
649
+
650
+ Returns:
651
+ None: This function modifies the DataFrame in place.
652
+ """
653
+ if not case_sensitive:
654
+ df[col] = df[col].str.replace(
655
+ keyword, replacement, case=False
656
+ )
657
+ else:
658
+ df[col] = df[col].str.replace(
659
+ keyword, replacement
660
+ )
661
+
662
+ def _remove_message(df, node_id: str) -> bool:
663
+ """
664
+ Removes a message from the DataFrame based on its node_id.
665
+
666
+ Args:
667
+ df (pd.DataFrame): The DataFrame from which the message should be removed.
668
+ node_id (str): The node_id of the message to be removed.
669
+
670
+ Returns:
671
+ bool: True if the message was successfully removed, False otherwise.
672
+ """
673
+ initial_length = len(df)
674
+ df = df[df["node_id"] != node_id]
675
+
676
+ return len(df) < initial_length
677
+
678
+ def _update_row(
679
+ df, node_id = None, col = "node_id", value = None
680
+ ) -> bool:
681
+ """
682
+ Updates the value of a specified column for a row identified by node_id in a DataFrame.
683
+
684
+ Args:
685
+ df (pd.DataFrame): The DataFrame to update.
686
+ node_id (Optional[str]): The node_id of the row to be updated.
687
+ col (str): The column to update.
688
+ value (Any): The new value to be assigned to the column.
689
+
690
+ Returns:
691
+ bool: True if the update was successful, False otherwise.
692
+ """
693
+ index = df.index[df[col] == node_id].tolist()
694
+ if index:
695
+ df.at[index[0], col] = value
696
+ return True
697
+ return False
698
+
699
+ def _remove_last_n_rows(df, steps: int) -> None:
700
+ """
701
+ Removes the last 'n' rows from a DataFrame.
702
+
703
+ Args:
704
+ df (pd.DataFrame): The DataFrame from which rows will be removed.
705
+ steps (int): The number of rows to remove.
706
+
707
+ Returns:
708
+ pd.DataFrame: The DataFrame after the last 'n' rows have been removed.
709
+
710
+ Raises:
711
+ ValueError: If 'steps' is less than 0 or greater than the number of rows in the DataFrame.
712
+ """
713
+ if steps < 0 or steps > len(df):
714
+ raise ValueError("Steps must be a non-negative integer less than or equal to the number of messages.")
715
+ df = to_df(df[:-steps])
716
+
717
+ def get_rows(
718
+ df,
719
+ sender: Optional[str] = None,
720
+ role: Optional[str] = None,
721
+ n: int = 1,
722
+ sign_ = False,
723
+ from_="front",
724
+ ) -> pd.DataFrame:
725
+ """
726
+ Retrieves rows from a DataFrame based on specified sender, role, and quantity, optionally signing them.
727
+
728
+ Args:
729
+ df (pd.DataFrame): The DataFrame to retrieve rows from.
730
+ sender (Optional[str]): The sender based on which to filter rows.
731
+ role (Optional[str]): The role based on which to filter rows.
732
+ n (int): The number of rows to retrieve.
733
+ sign_ (bool): Whether to sign the retrieved rows.
734
+ from_ (str): Direction to retrieve rows ('front' for the first rows, 'last' for the last rows).
735
+
736
+ Returns:
737
+ pd.DataFrame: A DataFrame containing the retrieved rows.
738
+ """
739
+
740
+ if from_ == "last":
741
+ if sender is None and role is None:
742
+ outs = df.iloc[-n:]
743
+ elif sender and role:
744
+ outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[-n:]
745
+ elif sender:
746
+ outs = df[df['sender'] == sender].iloc[-n:]
747
+ else:
748
+ outs = df[df['role'] == role].iloc[-n:]
749
+
750
+ elif from_ == "front":
751
+ if sender is None and role is None:
752
+ outs = df.iloc[:n]
753
+ elif sender and role:
754
+ outs = df[(df['sender'] == sender) & (df['role'] == role)].iloc[:n]
755
+ elif sender:
756
+ outs = df[df['sender'] == sender].iloc[:n]
757
+ else:
758
+ outs = df[df['role'] == role].iloc[:n]
759
+
760
+ return _sign_message(outs, sender) if sign_ else outs
761
+
762
+ def _extend(df1: pd.DataFrame, df2: pd.DataFrame, **kwargs) -> pd.DataFrame:
763
+ """
764
+ Extends a DataFrame with another DataFrame, optionally removing duplicates based on specified criteria.
765
+
766
+ Args:
767
+ df1 (pd.DataFrame): The original DataFrame to be extended.
768
+ df2 (pd.DataFrame): The DataFrame containing new rows to add to df1.
769
+ **kwargs: Additional keyword arguments for pandas.DataFrame.drop_duplicates().
770
+
771
+ Returns:
772
+ pd.DataFrame: The extended DataFrame after adding rows from df2 and removing duplicates.
773
+
774
+ Raises:
775
+ ValueError: If an error occurs during the extension process.
776
+ """
777
+ validate_messages(df2)
778
+ try:
779
+ if len(df2.dropna(how='all')) > 0 and len(df1.dropna(how='all')) > 0:
780
+ df = to_df([df1, df2])
781
+ df.drop_duplicates(
782
+ inplace=True, subset=['node_id'], keep='first', **kwargs
783
+ )
784
+ return to_df(df)
785
+ except Exception as e:
786
+ raise ValueError(f"Error in extending messages: {e}")
787
+