reme-ai 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,215 +0,0 @@
1
- from memoryscope.enumeration.language_enum import LanguageEnum
2
-
3
- # This dictionary maps languages to lists of words related to datetime expressions.
4
- # It aids in recognizing and processing datetime mentions in text, enhancing the system's ability to understand
5
- # temporal context across different languages.
6
- DATATIME_WORD_LIST = {
7
- LanguageEnum.CN: [
8
- "天",
9
- "周",
10
- "月",
11
- "年",
12
- "星期",
13
- "点",
14
- "分钟",
15
- "小时",
16
- "秒",
17
- "上午",
18
- "下午",
19
- "早上",
20
- "早晨",
21
- "晚上",
22
- "中午",
23
- "日",
24
- "夜",
25
- "清晨",
26
- "傍晚",
27
- "凌晨",
28
- "岁",
29
- ],
30
- LanguageEnum.EN: [
31
- # Units of Time
32
- "year", "yr",
33
- "month", "mo",
34
- "week", "wk",
35
- "day", "d",
36
- "hour", "hr",
37
- "minute", "min",
38
- "second", "sec",
39
-
40
- # Days of the Week
41
- "Monday", "Mon",
42
- "Tuesday", "Tue", "Tues",
43
- "Wednesday", "Wed",
44
- "Thursday", "Thu", "Thur", "Thurs",
45
- "Friday", "Fri",
46
- "Saturday", "Sat",
47
- "Sunday", "Sun",
48
-
49
- # Months of the Year
50
- "January", "Jan",
51
- "February", "Feb",
52
- "March", "Mar",
53
- "April", "Apr",
54
- "May", "May",
55
- "June", "Jun",
56
- "July", "Jul",
57
- "August", "Aug",
58
- "September", "Sep", "Sept",
59
- "October", "Oct",
60
- "November", "Nov",
61
- "December", "Dec",
62
-
63
- # Relative Time References
64
- "Today",
65
- "Tomorrow", "Tmrw",
66
- "Yesterday", "Yday",
67
- "Now",
68
- "Morning", "AM", "a.m.",
69
- "Afternoon", "PM", "p.m.",
70
- "Evening",
71
- "Night",
72
- "Midnight",
73
- "Noon",
74
-
75
- # Seasonal References
76
- "Spring",
77
- "Summer",
78
- "Autumn", "Fall",
79
- "Winter",
80
-
81
- # General Time References
82
- "Century", "cent.",
83
- "Decade",
84
- "Millennium",
85
- "Quarter", "Q1", "Q2", "Q3", "Q4",
86
- "Semester",
87
- "Fortnight",
88
- "Weekend"
89
- ]
90
- }
91
-
92
- # A mapping of weekdays for each supported language, facilitating calendar-related operations and understanding
93
- # within the application.
94
- WEEKDAYS = {
95
- LanguageEnum.CN: [
96
- "周一",
97
- "周二",
98
- "周三",
99
- "周四",
100
- "周五",
101
- "周六",
102
- "周日"
103
- ],
104
- LanguageEnum.EN: [
105
- "Monday",
106
- "Tuesday",
107
- "Wednesday",
108
- "Thursday",
109
- "Friday",
110
- "Saturday",
111
- "Sunday",
112
- ]
113
- }
114
-
115
- MONTH_DICT = {
116
- LanguageEnum.CN: [
117
- "1月",
118
- "2月",
119
- "3月",
120
- "4月",
121
- "5月",
122
- "6月",
123
- "7月",
124
- "8月",
125
- "9月",
126
- "10月",
127
- "11月",
128
- "12月",
129
- ],
130
- LanguageEnum.EN: [
131
- "January",
132
- "February",
133
- "March",
134
- "April",
135
- "May",
136
- "June",
137
- "July",
138
- "August",
139
- "September",
140
- "October",
141
- "November",
142
- "December",
143
- ]
144
- }
145
-
146
- # Constants for the word 'none' in different languages
147
- NONE_WORD = {
148
- LanguageEnum.CN: "无",
149
- LanguageEnum.EN: "none"
150
- }
151
-
152
- # Constants for the word 'repeated' in different languages
153
- REPEATED_WORD = {
154
- LanguageEnum.CN: "重复",
155
- LanguageEnum.EN: "repeated"
156
- }
157
-
158
- # Constants for the word 'contradictory' in different languages
159
- CONTRADICTORY_WORD = {
160
- LanguageEnum.CN: "矛盾",
161
- LanguageEnum.EN: "contradiction"
162
- }
163
-
164
- # Constants for the phrase 'included' in different languages
165
- CONTAINED_WORD = {
166
- LanguageEnum.CN: "被包含",
167
- LanguageEnum.EN: "contained"
168
- }
169
-
170
- # Constants for the symbol ':' in different languages' representations
171
- COLON_WORD = {
172
- LanguageEnum.CN: ":",
173
- LanguageEnum.EN: ":"
174
- }
175
-
176
- # Constants for the symbol ',' in different languages' representations
177
- COMMA_WORD = {
178
- LanguageEnum.CN: ",",
179
- LanguageEnum.EN: ","
180
- }
181
-
182
- # Default human name placeholders for different languages
183
- DEFAULT_HUMAN_NAME = {
184
- LanguageEnum.CN: "用户",
185
- LanguageEnum.EN: "user"
186
- }
187
-
188
- # Mapping of datetime terms from natural language to standardized keys for each supported language
189
- DATATIME_KEY_MAP = {
190
- LanguageEnum.CN: {
191
- "年": "year",
192
- "月": "month",
193
- "日": "day",
194
- "周": "week",
195
- "星期几": "weekday",
196
- },
197
- LanguageEnum.EN: {
198
- "Year": "year",
199
- "Month": "month",
200
- "Day": "day",
201
- "Week": "week",
202
- "Weekday": "weekday",
203
- }
204
- }
205
-
206
- # Phrase for indicating inferred time in different languages
207
- TIME_INFER_WORD = {
208
- LanguageEnum.CN: "推断时间",
209
- LanguageEnum.EN: "Inference time"
210
- }
211
-
212
- USER_NAME_EXPRESSION = {
213
- LanguageEnum.CN: "用户姓名是{name}。",
214
- LanguageEnum.EN: "User's name is {name}."
215
- }
@@ -1,50 +0,0 @@
1
- from flowllm import C, BaseOp
2
- from loguru import logger
3
-
4
- from reme_ai.utils.miner_u_pdf_processor import MinerUPDFProcessor, chunk_pdf_content
5
-
6
-
7
- @C.register_op()
8
- class PDFPreprocessOp(BaseOp):
9
- file_path: str = __file__
10
-
11
- def execute(self):
12
- """Process PDF files using MinerU and chunk content"""
13
- pdf_path = self.context.get("pdf_path")
14
- output_dir = self.context.get("output_dir")
15
-
16
- if not pdf_path:
17
- logger.error("No PDF path provided in context")
18
- return
19
-
20
- # Process PDF
21
- processor = MinerUPDFProcessor(log_level="INFO")
22
-
23
- try:
24
- content_list, markdown_content = processor.process_pdf(
25
- pdf_path=pdf_path,
26
- output_dir=output_dir,
27
- method=self.op_params.get("method", "auto"),
28
- lang=self.op_params.get("lang"),
29
- backend=self.op_params.get("backend", "pipeline")
30
- )
31
-
32
- # Create chunks if requested
33
- chunks = []
34
- if self.op_params.get("create_chunks", True):
35
- max_length = self.op_params.get("max_chunk_length", 4000)
36
- chunks = chunk_pdf_content(content_list, max_length=max_length)
37
-
38
- # Store results in context
39
- self.context.pdf_content_list = content_list
40
- self.context.pdf_markdown_content = markdown_content
41
- self.context.pdf_chunks = chunks
42
-
43
- logger.info(f"PDF processing completed: {len(content_list)} content blocks, "
44
- f"{len(chunks)} chunks, {len(markdown_content)} characters of markdown")
45
-
46
- except Exception as e:
47
- logger.error(f"PDF processing failed: {e}")
48
- self.context.pdf_content_list = []
49
- self.context.pdf_markdown_content = ""
50
- self.context.pdf_chunks = []