reme-ai 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. reme_ai/__init__.py +1 -1
  2. reme_ai/app.py +1 -1
  3. reme_ai/config/default.yaml +56 -5
  4. reme_ai/constants/common_constants.py +0 -2
  5. reme_ai/constants/language_constants.py +1 -1
  6. reme_ai/enumeration/language_enum.py +14 -0
  7. reme_ai/react/simple_react_op.py +11 -8
  8. reme_ai/retrieve/personal/extract_time_op.py +2 -3
  9. reme_ai/retrieve/personal/fuse_rerank_op.py +1 -1
  10. reme_ai/retrieve/personal/print_memory_op.py +1 -1
  11. reme_ai/retrieve/personal/read_message_op.py +1 -1
  12. reme_ai/retrieve/personal/retrieve_memory_op.py +34 -4
  13. reme_ai/retrieve/personal/semantic_rank_op.py +4 -4
  14. reme_ai/retrieve/personal/set_query_op.py +1 -1
  15. reme_ai/retrieve/task/build_query_op.py +2 -2
  16. reme_ai/retrieve/task/merge_memory_op.py +1 -1
  17. reme_ai/retrieve/task/rerank_memory_op.py +4 -4
  18. reme_ai/retrieve/task/rewrite_memory_op.py +6 -6
  19. reme_ai/service/__init__.py +0 -0
  20. reme_ai/service/base_memory_service.py +112 -0
  21. reme_ai/service/personal_memory_service.py +128 -0
  22. reme_ai/service/task_memory_service.py +126 -0
  23. reme_ai/summary/personal/contra_repeat_op.py +2 -2
  24. reme_ai/summary/personal/get_observation_op.py +4 -4
  25. reme_ai/summary/personal/get_observation_with_time_op.py +4 -4
  26. reme_ai/summary/personal/get_reflection_subject_op.py +4 -4
  27. reme_ai/summary/personal/info_filter_op.py +4 -4
  28. reme_ai/summary/personal/load_today_memory_op.py +6 -7
  29. reme_ai/summary/personal/long_contra_repeat_op.py +4 -4
  30. reme_ai/summary/personal/update_insight_op.py +4 -4
  31. reme_ai/summary/task/__init__.py +0 -1
  32. reme_ai/summary/task/comparative_extraction_op.py +9 -7
  33. reme_ai/summary/task/failure_extraction_op.py +7 -5
  34. reme_ai/summary/task/memory_deduplication_op.py +6 -6
  35. reme_ai/summary/task/memory_validation_op.py +8 -6
  36. reme_ai/summary/task/simple_comparative_summary_op.py +6 -4
  37. reme_ai/summary/task/simple_summary_op.py +6 -4
  38. reme_ai/summary/task/success_extraction_op.py +7 -5
  39. reme_ai/summary/task/trajectory_preprocess_op.py +3 -32
  40. reme_ai/summary/task/trajectory_segmentation_op.py +6 -4
  41. reme_ai/utils/datetime_handler.py +1 -1
  42. reme_ai/vector_store/delete_memory_op.py +1 -1
  43. reme_ai/vector_store/recall_vector_store_op.py +3 -3
  44. reme_ai/vector_store/update_memory_freq_op.py +1 -1
  45. reme_ai/vector_store/update_memory_utility_op.py +1 -1
  46. reme_ai/vector_store/update_vector_store_op.py +3 -3
  47. reme_ai/vector_store/vector_store_action_op.py +21 -18
  48. {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/METADATA +298 -133
  49. reme_ai-0.1.5.dist-info/RECORD +87 -0
  50. reme_ai/enumeration/language_constants.py +0 -215
  51. reme_ai/summary/task/pdf_preprocess_op_wrapper.py +0 -50
  52. reme_ai/utils/miner_u_pdf_processor.py +0 -726
  53. reme_ai-0.1.3.dist-info/RECORD +0 -85
  54. {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/WHEEL +0 -0
  55. {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/entry_points.txt +0 -0
  56. {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/licenses/LICENSE +0 -0
  57. {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,87 @@
1
+ reme_ai/__init__.py,sha256=kDmm8P-Agw4K7ROtPZ3Gv2GlwBozD-q-nccDJP-b44M,139
2
+ reme_ai/app.py,sha256=vzcC8cZdeTl6JZJK_JhJE8Kt7MBsIXlAQvg90bq8g-Q,325
3
+ reme_ai/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ reme_ai/config/config_parser.py,sha256=gk9bsMNnAlLLjChdalHWHgpOazyvZh5bItTUZcIGqbU,189
5
+ reme_ai/config/default.yaml,sha256=fxe9f43Lf6Affn-6cF-ZpuLkZCz0JKyzmRJGpaqBTnc,6324
6
+ reme_ai/constants/__init__.py,sha256=HdNweT3fTmdsCfoyTVKpBIPF9EELepLVNCrpKpJymY4,128
7
+ reme_ai/constants/common_constants.py,sha256=0JuJS8y--bdQ9Knx2f8f0bnNaWTBFLLMYYp2xhathjc,1038
8
+ reme_ai/constants/language_constants.py,sha256=bCNJJ8by5aNIaClDT6q2WqF7Xia6pXdHSMEK8DKLtfA,4754
9
+ reme_ai/enumeration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ reme_ai/enumeration/language_enum.py,sha256=zaWc0L1Etb4r3QcfNvVrkX6hwYNAHUms962PlUKLtas,261
11
+ reme_ai/react/__init__.py,sha256=-EEF7Moo56-R6O2CoWMbkXz12l8tsbpMht5vSUdRnbU,43
12
+ reme_ai/react/simple_react_op.py,sha256=H_rbqB03A9sqAq2qXqxD0p7jD80atA2aoHEJJ68GBsA,608
13
+ reme_ai/retrieve/__init__.py,sha256=K3qulFpRGZU_UsyYjIouR1F2CXwDBf9NkGiqYmIU2hQ,42
14
+ reme_ai/retrieve/personal/__init__.py,sha256=v5rpUQgBuMBMAodUzXPaRHg27ckGz82Vq5PXFSwHbks,462
15
+ reme_ai/retrieve/personal/extract_time_op.py,sha256=22znvAwW2gPIZ8gTzKW-f87MrqecRZ60EmrxJ1z_tYE,4154
16
+ reme_ai/retrieve/personal/extract_time_prompt.yaml,sha256=gO1RM4yFIDE5zr7EM6B_R-5a7mrtgqpeMRnKtzkeaOQ,5247
17
+ reme_ai/retrieve/personal/fuse_rerank_op.py,sha256=vDkkphPnN-Lph0yl5rL8u04xit_MKgn7ejO16G9llpw,7601
18
+ reme_ai/retrieve/personal/print_memory_op.py,sha256=X-iilW-LX0MJwaCqqlB6sV9ANItPQdYFyd0RtSy4dyY,4359
19
+ reme_ai/retrieve/personal/print_memory_prompt.yaml,sha256=fJ1DVLwu7crKLnNy9tsb-BYY56Y-kLRkuqthuEakgmM,487
20
+ reme_ai/retrieve/personal/read_message_op.py,sha256=_k-kNPPs8X3oC4R7A0qJr0xNVlhXJwvrLVcQ1G_2GBM,1724
21
+ reme_ai/retrieve/personal/retrieve_memory_op.py,sha256=KlpPmYKKxynwiTLjoc7AIhuz4wMX5Qo1SnOgpa4Th0k,1888
22
+ reme_ai/retrieve/personal/semantic_rank_op.py,sha256=ezCIQCSOuV6wfx2U78Q4c5z6Sz2TK1O1aNzFXTlEeHY,6614
23
+ reme_ai/retrieve/personal/set_query_op.py,sha256=UPq8_65x2AewBWZOxLhqNhw1fYAAClb9aUnW2pdtcc8,1322
24
+ reme_ai/retrieve/task/__init__.py,sha256=jTB7b3WpDbU3cyloqPD96TAdZkCIRs5enjSXybebbII,176
25
+ reme_ai/retrieve/task/build_query_op.py,sha256=8se7mzhXgHHOkqk7mmCXr9Iag5Q9OulaRZIVgbg-2HU,1492
26
+ reme_ai/retrieve/task/build_query_prompt.yaml,sha256=NQTvGe6u9w5k4J_tMo-AeXhRD4h7_urrl3NFhJlc8_8,323
27
+ reme_ai/retrieve/task/merge_memory_op.py,sha256=bR5kTBHsOS8NIlVwbh-dcEqzKAgQGFQkrYP2hznhztc,913
28
+ reme_ai/retrieve/task/rerank_memory_op.py,sha256=8DNuVufujOyGyVfGWrfxgDBRSR6jBqLu7bvCqksVl-Y,5567
29
+ reme_ai/retrieve/task/rerank_memory_prompt.yaml,sha256=_YuJCGa6N5gJ8LVcbeC8y6AFJHGkWuAyorB8NkUla34,965
30
+ reme_ai/retrieve/task/rewrite_memory_op.py,sha256=_yiyikbyRcJBKZueyyYYhzQz1TH3hPaFSP8Rmts5iJw,5519
31
+ reme_ai/retrieve/task/rewrite_memory_prompt.yaml,sha256=JY4gmTplmfxxZzDfdaUDyJoXXqWHXpc7Z-4_ry_Zy74,1594
32
+ reme_ai/schema/__init__.py,sha256=tbBjMfV_ojRr2yybA-XSPzN71A6K9HXZ8Gaw7I-2hBM,69
33
+ reme_ai/schema/memory.py,sha256=ur1ldmrrm3YZermeb6zhV9LgrBoUEJ29Ppz6J2Q8yhM,5483
34
+ reme_ai/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ reme_ai/service/base_memory_service.py,sha256=nBDVAgoEqyb98t1Kl43u1v0aCStcsy7XbcWmEuk6dac,3212
36
+ reme_ai/service/personal_memory_service.py,sha256=whosNiJbT777yJdpNzyvKcUOtCg6Fkmo2jvX2AevYjM,5030
37
+ reme_ai/service/task_memory_service.py,sha256=TnA9NjG0kw9rXB_Cv7ZxoGXRQDm7R9wEytmIIqrOtPw,4865
38
+ reme_ai/summary/__init__.py,sha256=K3qulFpRGZU_UsyYjIouR1F2CXwDBf9NkGiqYmIU2hQ,42
39
+ reme_ai/summary/personal/__init__.py,sha256=z2taU9ejTWvZaAJ6Mr0jKKRh4fcBdZVxpiFWreTDhTg,417
40
+ reme_ai/summary/personal/contra_repeat_op.py,sha256=cs3gnOkmFJgIH3KOS17FrxIiscW7lNqZiicCXZkgu9A,6297
41
+ reme_ai/summary/personal/contra_repeat_prompt.yaml,sha256=u5sp8MN944pU0HEeTKQmehdVlnqLXsIA7rAAh-bMJTw,7205
42
+ reme_ai/summary/personal/get_observation_op.py,sha256=IOVFnKgvzPREdwTjrvOLHodjQ4erSudDbb5DEqzn9mY,6431
43
+ reme_ai/summary/personal/get_observation_prompt.yaml,sha256=PK5iBDPMt6MqgsQ9Mj6YKp5Iq1PCA-MOYOp-rEFd-b8,15032
44
+ reme_ai/summary/personal/get_observation_with_time_op.py,sha256=2AE0HwxJzwoqTR0-OW42s92M3Ql6dXLlhQRBVbWRPBo,7905
45
+ reme_ai/summary/personal/get_observation_with_time_prompt.yaml,sha256=lRIlkU7BmgLzkghLLX-hx7hxibDTgsqa-IuIUIwyeMs,16389
46
+ reme_ai/summary/personal/get_reflection_subject_op.py,sha256=wP4zVOC2kBJlxWgXEHRmoSMvy6tgoXhrSc5y0LWhcg8,7712
47
+ reme_ai/summary/personal/get_reflection_subject_prompt.yaml,sha256=YYSCsgZhD7H7zkzRWrj6zzxOlprfuB1setUokjgvfK0,11139
48
+ reme_ai/summary/personal/info_filter_op.py,sha256=c0f91IpXDtYo4EgUWAleV9SgTlGVO1DP6mUHRo_7N6U,7897
49
+ reme_ai/summary/personal/info_filter_prompt.yaml,sha256=rgVLz_ptumi6lLYgLKRmTuap8vA5jQsfeFGpPiMpjMo,11192
50
+ reme_ai/summary/personal/load_today_memory_op.py,sha256=N37X9iDKdgnrGqZhzo46Jr3DVjdvNy9RHRHrt__9-IM,4060
51
+ reme_ai/summary/personal/long_contra_repeat_op.py,sha256=H2vodsFbLGmdlkEqGbAbTbBnRyOY-ywsRFo_nY6iEgM,9089
52
+ reme_ai/summary/personal/long_contra_repeat_prompt.yaml,sha256=6q3Y4xZx3liZwXLcJsOgeCIQZzAE2PwqVNQCFMNAWsI,7516
53
+ reme_ai/summary/personal/update_insight_op.py,sha256=S3bdRQaUoYVxcS4ts_mO08-h1vUpMdiSVYAL34VuesQ,11025
54
+ reme_ai/summary/personal/update_insight_prompt.yaml,sha256=02EPEC5vQGTEYOfHeq29GUEhlWxdLAcMnaj6jOgeI8c,10295
55
+ reme_ai/summary/task/__init__.py,sha256=GApTaFsmX7q-NbGtk5cTfx23cf-X-DBt-RORy0UMcvI,528
56
+ reme_ai/summary/task/comparative_extraction_op.py,sha256=vedE7i8UsZEW9hxWAc-0ew0SLG5c63TEE7EASVHIse8,10845
57
+ reme_ai/summary/task/comparative_extraction_prompt.yaml,sha256=EgsveH1fHcXle5sHDqyxZ_KxTfTUn2IGbkZ0KjZRl1I,3331
58
+ reme_ai/summary/task/failure_extraction_op.py,sha256=yWwC4_rNknAMAHDHNci92JRpBgpzkSTu9VuDTbf-tAQ,3199
59
+ reme_ai/summary/task/failure_extraction_prompt.yaml,sha256=kWz55BRxtEd_CjoaKWliCdYR_pzNCZwBETA0Ajdwggk,1534
60
+ reme_ai/summary/task/memory_deduplication_op.py,sha256=rg_k4tJkCKCERUV0Uuy5eVCBr4hEz3rhDBMg-J3DyiQ,6886
61
+ reme_ai/summary/task/memory_validation_op.py,sha256=pUk3mu4FpeQrC5bSIFqXwO8HJ9RIpbAMFp4PSuOPd9I,4525
62
+ reme_ai/summary/task/memory_validation_prompt.yaml,sha256=CwqT76ktjnkCXcZFEe0XtvJPkhZTpt32_N--A0gH3k0,1230
63
+ reme_ai/summary/task/simple_comparative_summary_op.py,sha256=4dAX0QqyIfu9UIuouSrhv30dqelMnI7Vm7toD56WRow,3587
64
+ reme_ai/summary/task/simple_comparative_summary_prompt.yaml,sha256=FGGj-jE8SvgEDEJAiq33ptB_-pI2qmBulsLDQyy8_bM,1140
65
+ reme_ai/summary/task/simple_summary_op.py,sha256=fsOogsmu6Na-UdRkxb2xOTqx7jBwXhEolGhKn4OBKPY,3126
66
+ reme_ai/summary/task/simple_summary_prompt.yaml,sha256=o0JbPBtGqKJ6_GIDhI_wGdBOSOy74bpaehLAKqtBYco,943
67
+ reme_ai/summary/task/success_extraction_op.py,sha256=W5rtY-KNovn9yvW6Q7-XDPWk_yrVbReUeJGVwDvK8no,3194
68
+ reme_ai/summary/task/success_extraction_prompt.yaml,sha256=rr_5sm9j2r_Ea5JgBBPG-yCnFMmCdTi391rqWovqraw,1527
69
+ reme_ai/summary/task/trajectory_preprocess_op.py,sha256=vbs-0dvN1EYg5_t_aY6pUBdLv6heWyaS1eUtdTXA6tE,1722
70
+ reme_ai/summary/task/trajectory_segmentation_op.py,sha256=nG-ri3UxXD85uSFokRi95iHom-5-xaRB495WNcaKn5Y,4777
71
+ reme_ai/summary/task/trajectory_segmentation_prompt.yaml,sha256=8E5nDQn9x7DHC6P1dsF3l_bd8UITla9AyfmSLoyFLpM,1193
72
+ reme_ai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
+ reme_ai/utils/datetime_handler.py,sha256=mTf-c7Ko7crrTzh1hU5v-A7Hbg_8gQRCeN2c-aoP3Tw,14451
74
+ reme_ai/utils/op_utils.py,sha256=tYsAl5LcQKe0I7YyEe8VXjrjJGVZ7Q_oD28qorJsCB0,4175
75
+ reme_ai/vector_store/__init__.py,sha256=1Yh14F5UZirKHxM7cgWiATpoZUqWGo147KlJK_wD03Q,327
76
+ reme_ai/vector_store/delete_memory_op.py,sha256=MqV-0UCt4S7h7yvR5EfmLQes-COgK1m6SqHIiWtGG08,917
77
+ reme_ai/vector_store/recall_vector_store_op.py,sha256=AUcEOTTllvppcClkSjksERnihekZIi2FOx_XBG036QY,1476
78
+ reme_ai/vector_store/update_memory_freq_op.py,sha256=nZ7WIqC8hN-Pv9QZrTviax6DA1CVyoCX2cF0XrmTAV4,1021
79
+ reme_ai/vector_store/update_memory_utility_op.py,sha256=fXYAtgfUxmhJkgKA2kHucOBalXAk9O6vvG8Bn9asOTQ,1025
80
+ reme_ai/vector_store/update_vector_store_op.py,sha256=_8T94tHe2nd5phsgmPZQdrYfVR7FvydMm9MJzlrESl4,1333
81
+ reme_ai/vector_store/vector_store_action_op.py,sha256=qeoB_e0WBTnB9yQUkyc8jNNRO9JK_bcCwS2oU0LTJxs,2535
82
+ reme_ai-0.1.5.dist-info/licenses/LICENSE,sha256=zFTWearO11HAlvEgtmY1XBBtk5TSj5P23zU5c_bNfb4,11343
83
+ reme_ai-0.1.5.dist-info/METADATA,sha256=EAsEdZuAJyF2zCO3Rv7hN3B9PIzrcMUcmwe_Iar8tAY,27137
84
+ reme_ai-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
85
+ reme_ai-0.1.5.dist-info/entry_points.txt,sha256=6SP3ncXOMyKotdT4LHWPeaXo3-Sv-1qmK7OhVw76Xhw,42
86
+ reme_ai-0.1.5.dist-info/top_level.txt,sha256=3ca2UBk97aSfPmGdg8LlVqyeLikb5qEnBEbfGgtzao0,8
87
+ reme_ai-0.1.5.dist-info/RECORD,,
@@ -1,215 +0,0 @@
1
- from memoryscope.enumeration.language_enum import LanguageEnum
2
-
3
- # This dictionary maps languages to lists of words related to datetime expressions.
4
- # It aids in recognizing and processing datetime mentions in text, enhancing the system's ability to understand
5
- # temporal context across different languages.
6
- DATATIME_WORD_LIST = {
7
- LanguageEnum.CN: [
8
- "天",
9
- "周",
10
- "月",
11
- "年",
12
- "星期",
13
- "点",
14
- "分钟",
15
- "小时",
16
- "秒",
17
- "上午",
18
- "下午",
19
- "早上",
20
- "早晨",
21
- "晚上",
22
- "中午",
23
- "日",
24
- "夜",
25
- "清晨",
26
- "傍晚",
27
- "凌晨",
28
- "岁",
29
- ],
30
- LanguageEnum.EN: [
31
- # Units of Time
32
- "year", "yr",
33
- "month", "mo",
34
- "week", "wk",
35
- "day", "d",
36
- "hour", "hr",
37
- "minute", "min",
38
- "second", "sec",
39
-
40
- # Days of the Week
41
- "Monday", "Mon",
42
- "Tuesday", "Tue", "Tues",
43
- "Wednesday", "Wed",
44
- "Thursday", "Thu", "Thur", "Thurs",
45
- "Friday", "Fri",
46
- "Saturday", "Sat",
47
- "Sunday", "Sun",
48
-
49
- # Months of the Year
50
- "January", "Jan",
51
- "February", "Feb",
52
- "March", "Mar",
53
- "April", "Apr",
54
- "May", "May",
55
- "June", "Jun",
56
- "July", "Jul",
57
- "August", "Aug",
58
- "September", "Sep", "Sept",
59
- "October", "Oct",
60
- "November", "Nov",
61
- "December", "Dec",
62
-
63
- # Relative Time References
64
- "Today",
65
- "Tomorrow", "Tmrw",
66
- "Yesterday", "Yday",
67
- "Now",
68
- "Morning", "AM", "a.m.",
69
- "Afternoon", "PM", "p.m.",
70
- "Evening",
71
- "Night",
72
- "Midnight",
73
- "Noon",
74
-
75
- # Seasonal References
76
- "Spring",
77
- "Summer",
78
- "Autumn", "Fall",
79
- "Winter",
80
-
81
- # General Time References
82
- "Century", "cent.",
83
- "Decade",
84
- "Millennium",
85
- "Quarter", "Q1", "Q2", "Q3", "Q4",
86
- "Semester",
87
- "Fortnight",
88
- "Weekend"
89
- ]
90
- }
91
-
92
- # A mapping of weekdays for each supported language, facilitating calendar-related operations and understanding
93
- # within the application.
94
- WEEKDAYS = {
95
- LanguageEnum.CN: [
96
- "周一",
97
- "周二",
98
- "周三",
99
- "周四",
100
- "周五",
101
- "周六",
102
- "周日"
103
- ],
104
- LanguageEnum.EN: [
105
- "Monday",
106
- "Tuesday",
107
- "Wednesday",
108
- "Thursday",
109
- "Friday",
110
- "Saturday",
111
- "Sunday",
112
- ]
113
- }
114
-
115
- MONTH_DICT = {
116
- LanguageEnum.CN: [
117
- "1月",
118
- "2月",
119
- "3月",
120
- "4月",
121
- "5月",
122
- "6月",
123
- "7月",
124
- "8月",
125
- "9月",
126
- "10月",
127
- "11月",
128
- "12月",
129
- ],
130
- LanguageEnum.EN: [
131
- "January",
132
- "February",
133
- "March",
134
- "April",
135
- "May",
136
- "June",
137
- "July",
138
- "August",
139
- "September",
140
- "October",
141
- "November",
142
- "December",
143
- ]
144
- }
145
-
146
- # Constants for the word 'none' in different languages
147
- NONE_WORD = {
148
- LanguageEnum.CN: "无",
149
- LanguageEnum.EN: "none"
150
- }
151
-
152
- # Constants for the word 'repeated' in different languages
153
- REPEATED_WORD = {
154
- LanguageEnum.CN: "重复",
155
- LanguageEnum.EN: "repeated"
156
- }
157
-
158
- # Constants for the word 'contradictory' in different languages
159
- CONTRADICTORY_WORD = {
160
- LanguageEnum.CN: "矛盾",
161
- LanguageEnum.EN: "contradiction"
162
- }
163
-
164
- # Constants for the phrase 'included' in different languages
165
- CONTAINED_WORD = {
166
- LanguageEnum.CN: "被包含",
167
- LanguageEnum.EN: "contained"
168
- }
169
-
170
- # Constants for the symbol ':' in different languages' representations
171
- COLON_WORD = {
172
- LanguageEnum.CN: ":",
173
- LanguageEnum.EN: ":"
174
- }
175
-
176
- # Constants for the symbol ',' in different languages' representations
177
- COMMA_WORD = {
178
- LanguageEnum.CN: ",",
179
- LanguageEnum.EN: ","
180
- }
181
-
182
- # Default human name placeholders for different languages
183
- DEFAULT_HUMAN_NAME = {
184
- LanguageEnum.CN: "用户",
185
- LanguageEnum.EN: "user"
186
- }
187
-
188
- # Mapping of datetime terms from natural language to standardized keys for each supported language
189
- DATATIME_KEY_MAP = {
190
- LanguageEnum.CN: {
191
- "年": "year",
192
- "月": "month",
193
- "日": "day",
194
- "周": "week",
195
- "星期几": "weekday",
196
- },
197
- LanguageEnum.EN: {
198
- "Year": "year",
199
- "Month": "month",
200
- "Day": "day",
201
- "Week": "week",
202
- "Weekday": "weekday",
203
- }
204
- }
205
-
206
- # Phrase for indicating inferred time in different languages
207
- TIME_INFER_WORD = {
208
- LanguageEnum.CN: "推断时间",
209
- LanguageEnum.EN: "Inference time"
210
- }
211
-
212
- USER_NAME_EXPRESSION = {
213
- LanguageEnum.CN: "用户姓名是{name}。",
214
- LanguageEnum.EN: "User's name is {name}."
215
- }
@@ -1,50 +0,0 @@
1
- from flowllm import C, BaseOp
2
- from loguru import logger
3
-
4
- from reme_ai.utils.miner_u_pdf_processor import MinerUPDFProcessor, chunk_pdf_content
5
-
6
-
7
- @C.register_op()
8
- class PDFPreprocessOp(BaseOp):
9
- file_path: str = __file__
10
-
11
- def execute(self):
12
- """Process PDF files using MinerU and chunk content"""
13
- pdf_path = self.context.get("pdf_path")
14
- output_dir = self.context.get("output_dir")
15
-
16
- if not pdf_path:
17
- logger.error("No PDF path provided in context")
18
- return
19
-
20
- # Process PDF
21
- processor = MinerUPDFProcessor(log_level="INFO")
22
-
23
- try:
24
- content_list, markdown_content = processor.process_pdf(
25
- pdf_path=pdf_path,
26
- output_dir=output_dir,
27
- method=self.op_params.get("method", "auto"),
28
- lang=self.op_params.get("lang"),
29
- backend=self.op_params.get("backend", "pipeline")
30
- )
31
-
32
- # Create chunks if requested
33
- chunks = []
34
- if self.op_params.get("create_chunks", True):
35
- max_length = self.op_params.get("max_chunk_length", 4000)
36
- chunks = chunk_pdf_content(content_list, max_length=max_length)
37
-
38
- # Store results in context
39
- self.context.pdf_content_list = content_list
40
- self.context.pdf_markdown_content = markdown_content
41
- self.context.pdf_chunks = chunks
42
-
43
- logger.info(f"PDF processing completed: {len(content_list)} content blocks, "
44
- f"{len(chunks)} chunks, {len(markdown_content)} characters of markdown")
45
-
46
- except Exception as e:
47
- logger.error(f"PDF processing failed: {e}")
48
- self.context.pdf_content_list = []
49
- self.context.pdf_markdown_content = ""
50
- self.context.pdf_chunks = []