reme-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reme_ai/__init__.py +6 -0
- reme_ai/app.py +17 -0
- reme_ai/config/__init__.py +0 -0
- reme_ai/config/config_parser.py +6 -0
- reme_ai/constants/__init__.py +7 -0
- reme_ai/constants/common_constants.py +48 -0
- reme_ai/constants/language_constants.py +215 -0
- reme_ai/enumeration/__init__.py +0 -0
- reme_ai/enumeration/language_constants.py +215 -0
- reme_ai/react/__init__.py +1 -0
- reme_ai/react/simple_react_op.py +21 -0
- reme_ai/retrieve/__init__.py +2 -0
- reme_ai/retrieve/personal/__init__.py +17 -0
- reme_ai/retrieve/personal/extract_time_op.py +97 -0
- reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
- reme_ai/retrieve/personal/print_memory_op.py +131 -0
- reme_ai/retrieve/personal/read_message_op.py +52 -0
- reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
- reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
- reme_ai/retrieve/personal/set_query_op.py +37 -0
- reme_ai/retrieve/task/__init__.py +4 -0
- reme_ai/retrieve/task/build_query_op.py +38 -0
- reme_ai/retrieve/task/merge_memory_op.py +27 -0
- reme_ai/retrieve/task/rerank_memory_op.py +149 -0
- reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
- reme_ai/schema/__init__.py +1 -0
- reme_ai/schema/memory.py +144 -0
- reme_ai/summary/__init__.py +2 -0
- reme_ai/summary/personal/__init__.py +8 -0
- reme_ai/summary/personal/contra_repeat_op.py +143 -0
- reme_ai/summary/personal/get_observation_op.py +147 -0
- reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
- reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
- reme_ai/summary/personal/info_filter_op.py +177 -0
- reme_ai/summary/personal/load_today_memory_op.py +117 -0
- reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
- reme_ai/summary/personal/update_insight_op.py +244 -0
- reme_ai/summary/task/__init__.py +10 -0
- reme_ai/summary/task/comparative_extraction_op.py +233 -0
- reme_ai/summary/task/failure_extraction_op.py +73 -0
- reme_ai/summary/task/memory_deduplication_op.py +163 -0
- reme_ai/summary/task/memory_validation_op.py +108 -0
- reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
- reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
- reme_ai/summary/task/simple_summary_op.py +67 -0
- reme_ai/summary/task/success_extraction_op.py +73 -0
- reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
- reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
- reme_ai/utils/__init__.py +0 -0
- reme_ai/utils/datetime_handler.py +345 -0
- reme_ai/utils/miner_u_pdf_processor.py +726 -0
- reme_ai/utils/op_utils.py +115 -0
- reme_ai/vector_store/__init__.py +6 -0
- reme_ai/vector_store/delete_memory_op.py +25 -0
- reme_ai/vector_store/recall_vector_store_op.py +36 -0
- reme_ai/vector_store/update_memory_freq_op.py +33 -0
- reme_ai/vector_store/update_memory_utility_op.py +32 -0
- reme_ai/vector_store/update_vector_store_op.py +32 -0
- reme_ai/vector_store/vector_store_action_op.py +55 -0
- reme_ai-0.1.0.dist-info/METADATA +218 -0
- reme_ai-0.1.0.dist-info/RECORD +65 -0
- reme_ai-0.1.0.dist-info/WHEEL +5 -0
- reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
- reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
- reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,345 @@
|
|
1
|
+
import datetime
|
2
|
+
import re
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
from reme_ai.constants.language_constants import WEEKDAYS, DATATIME_WORD_LIST, MONTH_DICT
|
6
|
+
from reme_ai.enumeration.language_constants import LanguageEnum
|
7
|
+
|
8
|
+
|
9
|
+
class DatetimeHandler(object):
|
10
|
+
"""
|
11
|
+
Handles operations related to datetime such as parsing, extraction, and formatting,
|
12
|
+
with support for both Chinese and English contexts including weekday names and
|
13
|
+
specialized text parsing for date components.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(self, dt: datetime.datetime | str | int | float = None):
|
17
|
+
"""
|
18
|
+
Initialize the DatetimeHandler instance with a datetime object, string, integer, or float representation
|
19
|
+
of a timestamp. If no argument is provided, the current time is used.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
dt (datetime.datetime | str | int | float, optional):
|
23
|
+
The datetime to be handled. Can be a datetime object, a timestamp string, or a numeric timestamp.
|
24
|
+
Defaults to None, which sets the instance to the current datetime.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
self._dt (datetime.datetime): The internal datetime representation of the input.
|
28
|
+
self._dt_info_dict (dict | None): A dictionary containing parsed datetime information, defaults to None.
|
29
|
+
"""
|
30
|
+
if isinstance(dt, str | int | float):
|
31
|
+
if isinstance(dt, str):
|
32
|
+
try:
|
33
|
+
dt = float(dt)
|
34
|
+
except:
|
35
|
+
dt = datetime.datetime.strptime(dt, "%Y-%m-%d %H:%M:%S")
|
36
|
+
dt = dt.timestamp()
|
37
|
+
self._dt: datetime.datetime = datetime.datetime.fromtimestamp(dt)
|
38
|
+
elif isinstance(dt, datetime.datetime):
|
39
|
+
self._dt: datetime.datetime = dt
|
40
|
+
else:
|
41
|
+
self._dt: datetime.datetime = datetime.datetime.now()
|
42
|
+
|
43
|
+
self._dt_info_dict: dict | None = None
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def language_transform(language: str | LanguageEnum) -> LanguageEnum:
|
47
|
+
if not language:
|
48
|
+
language = LanguageEnum.EN
|
49
|
+
elif language == "zh":
|
50
|
+
language = LanguageEnum.CN
|
51
|
+
else:
|
52
|
+
language = LanguageEnum(language)
|
53
|
+
|
54
|
+
return language
|
55
|
+
|
56
|
+
@classmethod
|
57
|
+
def get_language_value(cls, language: str, value_dict: dict):
|
58
|
+
return value_dict.get(cls.get_language_value(language))
|
59
|
+
|
60
|
+
def _parse_dt_info(self, language: LanguageEnum | str):
|
61
|
+
"""
|
62
|
+
Parses the datetime object (_dt) into a dictionary containing detailed date and time components,
|
63
|
+
including language-specific weekday representation.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
dict: A dictionary with keys representing date and time parts such as 'year', 'month',
|
67
|
+
'day', 'hour', 'minute', 'second', 'week', and 'weekday' with respective values.
|
68
|
+
The 'weekday' value is translated based on the current language context.
|
69
|
+
"""
|
70
|
+
language = self.language_transform(language=language)
|
71
|
+
|
72
|
+
return {
|
73
|
+
"year": self._dt.year,
|
74
|
+
"month": MONTH_DICT[language][self._dt.month - 1],
|
75
|
+
"day": self._dt.day,
|
76
|
+
"hour": self._dt.hour,
|
77
|
+
"minute": self._dt.minute,
|
78
|
+
"second": self._dt.second,
|
79
|
+
"week": self._dt.isocalendar().week,
|
80
|
+
"weekday": WEEKDAYS[language][self._dt.isocalendar().weekday - 1],
|
81
|
+
}
|
82
|
+
|
83
|
+
def get_dt_info_dict(self, language: LanguageEnum | str):
|
84
|
+
"""
|
85
|
+
Property method to get the dictionary containing parsed datetime information.
|
86
|
+
If None, initialize using `_parse_dt_info`.
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
dict: A dictionary with parsed datetime information.
|
90
|
+
"""
|
91
|
+
language = self.language_transform(language=language)
|
92
|
+
|
93
|
+
if self._dt_info_dict is None:
|
94
|
+
self._dt_info_dict = self._parse_dt_info(language=language)
|
95
|
+
return self._dt_info_dict
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def extract_date_parts_cn(cls, input_string: str) -> dict:
|
99
|
+
"""
|
100
|
+
Extracts various components of a date (year, month, day, etc.) from an input string based on Chinese formats.
|
101
|
+
|
102
|
+
This method identifies year, month, day, weekday, and hour components within the input
|
103
|
+
string based on predefined patterns. It supports relative terms like '每' (every) and
|
104
|
+
translates weekday names into numeric representations.
|
105
|
+
|
106
|
+
Args:
|
107
|
+
input_string (str): The Chinese text containing date and time information.
|
108
|
+
|
109
|
+
Returns:
|
110
|
+
dict: A dictionary with keys 'year', 'month', 'day', 'weekday', and 'hour',
|
111
|
+
each holding the corresponding extracted value. If a component is not found,
|
112
|
+
it will not be included in the dictionary. For relative terms like '每' (every),
|
113
|
+
the value is set to -1.
|
114
|
+
|
115
|
+
"""
|
116
|
+
# Extending our pattern to handle every/每 as a possible value.
|
117
|
+
patterns = {
|
118
|
+
"year": r"(\d+|每)年",
|
119
|
+
"month": r"(\d+|每)月",
|
120
|
+
"day": r"(\d+|每)日",
|
121
|
+
"weekday": r"周([一二三四五六日])",
|
122
|
+
"hour": r"(\d+)点"
|
123
|
+
}
|
124
|
+
weekday_dict = {"一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "日": 7}
|
125
|
+
extracted_data = {}
|
126
|
+
|
127
|
+
# Search for patterns in the input string and populate the dictionary
|
128
|
+
for key, pattern in patterns.items():
|
129
|
+
match = re.search(pattern, input_string)
|
130
|
+
if match: # If there is a match, include it in the output dictionary
|
131
|
+
if match.group(1) == "每":
|
132
|
+
extracted_data[key] = -1
|
133
|
+
elif match.group(1) in weekday_dict.keys():
|
134
|
+
extracted_data[key] = weekday_dict[match.group(1)]
|
135
|
+
else:
|
136
|
+
extracted_data[key] = int(match.group(1))
|
137
|
+
return extracted_data
|
138
|
+
|
139
|
+
@classmethod
|
140
|
+
def extract_date_parts_en(cls, input_string: str) -> dict:
|
141
|
+
"""
|
142
|
+
Extracts various components of a date (year, month, day, etc.) from an input string based on English formats.
|
143
|
+
|
144
|
+
This method employs regex patterns to identify and parse different date and time elements within the provided
|
145
|
+
text. It supports extraction of year, month name, day, 12-hour and 24-hour time formats, and weekdays.
|
146
|
+
|
147
|
+
Args:
|
148
|
+
input_string (str): The English text containing date and time information.
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
dict: A dictionary containing the extracted date parts with default values of -1 where components are not
|
152
|
+
found. Keys include 'year', 'month', 'day', 'hour', 'minute', 'second', and 'weekday'.
|
153
|
+
"""
|
154
|
+
date_info = {
|
155
|
+
"year": -1,
|
156
|
+
"month": -1,
|
157
|
+
"day": -1,
|
158
|
+
"hour": -1,
|
159
|
+
"minute": -1,
|
160
|
+
"second": -1,
|
161
|
+
"weekday": -1
|
162
|
+
}
|
163
|
+
|
164
|
+
# Patterns to extract the parts of the date/time
|
165
|
+
patterns = {
|
166
|
+
"year": r"\b(\d{4})\b",
|
167
|
+
"month": r"\b(January|February|March|April|May|June|July|August|September|October|November|December)\b",
|
168
|
+
"day_month_year": r"\b(?P<month>January|February|March|April|May|June|July|August|September|October"
|
169
|
+
r"|November|December) (?P<day>\d{1,2}),? (?P<year>\d{4})\b",
|
170
|
+
"day_month": r"\b(?P<month>January|February|March|April|May|June|July|August|September|October|November"
|
171
|
+
r"|December) (?P<day>\d{1,2})\b",
|
172
|
+
"hour_12": r"\b(\d{1,2})\s*(AM|PM|am|pm)\b",
|
173
|
+
"hour_24": r"\b(\d{1,2}):(\d{2}):(\d{2})\b"
|
174
|
+
}
|
175
|
+
|
176
|
+
month_mapping = {
|
177
|
+
"January": 1, "February": 2, "March": 3, "April": 4, "May": 5, "June": 6, "July": 7, "August": 8,
|
178
|
+
"September": 9, "October": 10, "November": 11, "December": 12
|
179
|
+
}
|
180
|
+
|
181
|
+
weekday_mapping = {
|
182
|
+
"Monday": 1, "Tuesday": 2, "Wednesday": 3, "Thursday": 4, "Friday": 5, "Saturday": 6, "Sunday": 7
|
183
|
+
}
|
184
|
+
|
185
|
+
# Attempt to match full date (day month year)
|
186
|
+
day_month_year_match = re.search(patterns["day_month_year"], input_string)
|
187
|
+
if day_month_year_match:
|
188
|
+
date_info["year"] = int(day_month_year_match.group("year"))
|
189
|
+
date_info["month"] = month_mapping[day_month_year_match.group("month")]
|
190
|
+
date_info["day"] = int(day_month_year_match.group("day"))
|
191
|
+
|
192
|
+
# If year wasn't found, try matching day and month without year
|
193
|
+
elif date_info["year"] == -1:
|
194
|
+
day_month_match = re.search(patterns["day_month"], input_string)
|
195
|
+
if day_month_match:
|
196
|
+
date_info["month"] = month_mapping[day_month_match.group("month")]
|
197
|
+
date_info["day"] = int(day_month_match.group("day"))
|
198
|
+
|
199
|
+
# Extract year if not already found
|
200
|
+
if date_info["year"] == -1:
|
201
|
+
year_match = re.search(patterns["year"], input_string)
|
202
|
+
if year_match:
|
203
|
+
date_info["year"] = int(year_match.group(0))
|
204
|
+
|
205
|
+
# Extract month if not already found
|
206
|
+
if date_info["month"] == -1:
|
207
|
+
month_match = re.search(patterns["month"], input_string)
|
208
|
+
if month_match:
|
209
|
+
date_info["month"] = month_mapping[month_match.group(0)]
|
210
|
+
|
211
|
+
# Extract 12-hour format time
|
212
|
+
hour_12_match = re.search(patterns["hour_12"], input_string)
|
213
|
+
if hour_12_match:
|
214
|
+
hour, period = int(hour_12_match.group(1)), hour_12_match.group(2).lower()
|
215
|
+
if period == 'pm' and hour != 12:
|
216
|
+
hour += 12
|
217
|
+
elif period == 'am' and hour == 12:
|
218
|
+
hour = 0
|
219
|
+
date_info["hour"] = hour
|
220
|
+
|
221
|
+
# Identify weekday
|
222
|
+
for week_day, value in weekday_mapping.items():
|
223
|
+
if week_day in input_string:
|
224
|
+
date_info["weekday"] = value
|
225
|
+
break
|
226
|
+
|
227
|
+
return date_info
|
228
|
+
|
229
|
+
@classmethod
|
230
|
+
def extract_date_parts(cls, input_string: str, language: LanguageEnum | str) -> dict:
|
231
|
+
"""
|
232
|
+
Extracts various date components from the input string based on the current language context.
|
233
|
+
|
234
|
+
This method dynamically selects a language-specific function to parse the input string and extract
|
235
|
+
date parts such as year, month, day, etc. If the function for current language context does not exist,
|
236
|
+
a warning is logged and an empty dictionary is returned.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
input_string (str): The string containing date information to be parsed.
|
240
|
+
language (str): current language.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
dict: A dictionary containing extracted date components, or an empty dictionary if parsing fails.
|
244
|
+
"""
|
245
|
+
language = cls.language_transform(language=language)
|
246
|
+
|
247
|
+
func_name = f"extract_date_parts_{language.value}"
|
248
|
+
if not hasattr(cls, func_name):
|
249
|
+
# cls.logger.warning(f"language={language.value} needs to complete extract_date_parts func!")
|
250
|
+
return {}
|
251
|
+
return getattr(cls, func_name)(input_string=input_string)
|
252
|
+
|
253
|
+
@classmethod
|
254
|
+
def has_time_word_cn(cls, query: str, datetime_word_list: List[str]) -> bool:
|
255
|
+
"""
|
256
|
+
Check if the input query contains any datetime-related words based on the cn language context.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
query (str): The input string to check for datetime-related words.
|
260
|
+
datetime_word_list (list[str]): datetime keywords
|
261
|
+
|
262
|
+
Returns:
|
263
|
+
bool: True if the query contains at least one datetime-related word, False otherwise.
|
264
|
+
"""
|
265
|
+
contain_datetime = False
|
266
|
+
# TODO use re
|
267
|
+
for datetime_word in datetime_word_list:
|
268
|
+
if datetime_word in query:
|
269
|
+
contain_datetime = True
|
270
|
+
break
|
271
|
+
return contain_datetime
|
272
|
+
|
273
|
+
@classmethod
|
274
|
+
def has_time_word_en(cls, query: str, datetime_word_list: List[str]) -> bool:
|
275
|
+
"""
|
276
|
+
Check if the input query contains any datetime-related words based on the en language context.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
query (str): The input string to check for datetime-related words.
|
280
|
+
datetime_word_list (list[str]): datetime keywords
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
bool: True if the query contains at least one datetime-related word, False otherwise.
|
284
|
+
"""
|
285
|
+
contain_datetime = False
|
286
|
+
for datetime_word in datetime_word_list:
|
287
|
+
datetime_word = datetime_word.lower()
|
288
|
+
# TODO fix strip
|
289
|
+
if datetime_word in [x.strip().lower().strip(",").strip(".").strip("?").strip(":")
|
290
|
+
for x in query.split(" ")]:
|
291
|
+
contain_datetime = True
|
292
|
+
break
|
293
|
+
return contain_datetime
|
294
|
+
|
295
|
+
@classmethod
|
296
|
+
def has_time_word(cls, query: str, language: LanguageEnum | str) -> bool:
|
297
|
+
language = cls.language_transform(language=language)
|
298
|
+
|
299
|
+
func_name = f"has_time_word_{language.value}"
|
300
|
+
if not hasattr(cls, func_name):
|
301
|
+
# cls.logger.warning(f"language={language.value} needs to complete has_time_word function!")
|
302
|
+
return False
|
303
|
+
|
304
|
+
if language not in DATATIME_WORD_LIST:
|
305
|
+
# cls.logger.warning(f"language={language.value} is missing in DATATIME_WORD_LIST!")
|
306
|
+
return False
|
307
|
+
|
308
|
+
datetime_word_list = DATATIME_WORD_LIST[language]
|
309
|
+
return getattr(cls, func_name)(query=query, datetime_word_list=datetime_word_list)
|
310
|
+
|
311
|
+
def datetime_format(self, dt_format: str = "%Y%m%d") -> str:
|
312
|
+
"""
|
313
|
+
Format the stored datetime object into a string based on the provided format.
|
314
|
+
|
315
|
+
Args:
|
316
|
+
dt_format (str, optional): The datetime format string. Defaults to "%Y%m%d".
|
317
|
+
|
318
|
+
Returns:
|
319
|
+
str: A formatted datetime string.
|
320
|
+
"""
|
321
|
+
return self._dt.strftime(dt_format)
|
322
|
+
|
323
|
+
def string_format(self, string_format: str, language: str | LanguageEnum) -> str:
|
324
|
+
"""
|
325
|
+
Format the datetime information stored in the instance using a custom string format.
|
326
|
+
|
327
|
+
Args:
|
328
|
+
string_format (str): A format string where placeholders are keys from `dt_info_dict`.
|
329
|
+
language (str): current language.
|
330
|
+
|
331
|
+
Returns:
|
332
|
+
str: A formatted datetime string.
|
333
|
+
"""
|
334
|
+
language = self.language_transform(language=language)
|
335
|
+
return string_format.format(**self.get_dt_info_dict(language=language))
|
336
|
+
|
337
|
+
@property
|
338
|
+
def timestamp(self) -> int:
|
339
|
+
"""
|
340
|
+
Get the timestamp representation of the stored datetime.
|
341
|
+
|
342
|
+
Returns:
|
343
|
+
int: A timestamp value.
|
344
|
+
"""
|
345
|
+
return int(self._dt.timestamp())
|