reme-ai 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reme_ai/__init__.py +1 -1
- reme_ai/app.py +1 -1
- reme_ai/config/default.yaml +56 -5
- reme_ai/constants/common_constants.py +0 -2
- reme_ai/constants/language_constants.py +1 -1
- reme_ai/enumeration/language_enum.py +14 -0
- reme_ai/react/simple_react_op.py +11 -8
- reme_ai/retrieve/personal/extract_time_op.py +2 -3
- reme_ai/retrieve/personal/fuse_rerank_op.py +1 -1
- reme_ai/retrieve/personal/print_memory_op.py +1 -1
- reme_ai/retrieve/personal/read_message_op.py +1 -1
- reme_ai/retrieve/personal/retrieve_memory_op.py +34 -4
- reme_ai/retrieve/personal/semantic_rank_op.py +4 -4
- reme_ai/retrieve/personal/set_query_op.py +1 -1
- reme_ai/retrieve/task/build_query_op.py +2 -2
- reme_ai/retrieve/task/merge_memory_op.py +1 -1
- reme_ai/retrieve/task/rerank_memory_op.py +4 -4
- reme_ai/retrieve/task/rewrite_memory_op.py +6 -6
- reme_ai/service/__init__.py +0 -0
- reme_ai/service/base_memory_service.py +112 -0
- reme_ai/service/personal_memory_service.py +128 -0
- reme_ai/service/task_memory_service.py +126 -0
- reme_ai/summary/personal/contra_repeat_op.py +2 -2
- reme_ai/summary/personal/get_observation_op.py +4 -4
- reme_ai/summary/personal/get_observation_with_time_op.py +4 -4
- reme_ai/summary/personal/get_reflection_subject_op.py +4 -4
- reme_ai/summary/personal/info_filter_op.py +4 -4
- reme_ai/summary/personal/load_today_memory_op.py +6 -7
- reme_ai/summary/personal/long_contra_repeat_op.py +4 -4
- reme_ai/summary/personal/update_insight_op.py +4 -4
- reme_ai/summary/task/__init__.py +0 -1
- reme_ai/summary/task/comparative_extraction_op.py +9 -7
- reme_ai/summary/task/failure_extraction_op.py +7 -5
- reme_ai/summary/task/memory_deduplication_op.py +6 -6
- reme_ai/summary/task/memory_validation_op.py +8 -6
- reme_ai/summary/task/simple_comparative_summary_op.py +6 -4
- reme_ai/summary/task/simple_summary_op.py +6 -4
- reme_ai/summary/task/success_extraction_op.py +7 -5
- reme_ai/summary/task/trajectory_preprocess_op.py +3 -32
- reme_ai/summary/task/trajectory_segmentation_op.py +6 -4
- reme_ai/utils/datetime_handler.py +1 -1
- reme_ai/vector_store/delete_memory_op.py +1 -1
- reme_ai/vector_store/recall_vector_store_op.py +3 -3
- reme_ai/vector_store/update_memory_freq_op.py +1 -1
- reme_ai/vector_store/update_memory_utility_op.py +1 -1
- reme_ai/vector_store/update_vector_store_op.py +3 -3
- reme_ai/vector_store/vector_store_action_op.py +21 -18
- {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/METADATA +298 -133
- reme_ai-0.1.5.dist-info/RECORD +87 -0
- reme_ai/enumeration/language_constants.py +0 -215
- reme_ai/summary/task/pdf_preprocess_op_wrapper.py +0 -50
- reme_ai/utils/miner_u_pdf_processor.py +0 -726
- reme_ai-0.1.3.dist-info/RECORD +0 -85
- {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/WHEEL +0 -0
- {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/entry_points.txt +0 -0
- {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/licenses/LICENSE +0 -0
- {reme_ai-0.1.3.dist-info → reme_ai-0.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,87 @@
|
|
1
|
+
reme_ai/__init__.py,sha256=kDmm8P-Agw4K7ROtPZ3Gv2GlwBozD-q-nccDJP-b44M,139
|
2
|
+
reme_ai/app.py,sha256=vzcC8cZdeTl6JZJK_JhJE8Kt7MBsIXlAQvg90bq8g-Q,325
|
3
|
+
reme_ai/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
reme_ai/config/config_parser.py,sha256=gk9bsMNnAlLLjChdalHWHgpOazyvZh5bItTUZcIGqbU,189
|
5
|
+
reme_ai/config/default.yaml,sha256=fxe9f43Lf6Affn-6cF-ZpuLkZCz0JKyzmRJGpaqBTnc,6324
|
6
|
+
reme_ai/constants/__init__.py,sha256=HdNweT3fTmdsCfoyTVKpBIPF9EELepLVNCrpKpJymY4,128
|
7
|
+
reme_ai/constants/common_constants.py,sha256=0JuJS8y--bdQ9Knx2f8f0bnNaWTBFLLMYYp2xhathjc,1038
|
8
|
+
reme_ai/constants/language_constants.py,sha256=bCNJJ8by5aNIaClDT6q2WqF7Xia6pXdHSMEK8DKLtfA,4754
|
9
|
+
reme_ai/enumeration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
|
+
reme_ai/enumeration/language_enum.py,sha256=zaWc0L1Etb4r3QcfNvVrkX6hwYNAHUms962PlUKLtas,261
|
11
|
+
reme_ai/react/__init__.py,sha256=-EEF7Moo56-R6O2CoWMbkXz12l8tsbpMht5vSUdRnbU,43
|
12
|
+
reme_ai/react/simple_react_op.py,sha256=H_rbqB03A9sqAq2qXqxD0p7jD80atA2aoHEJJ68GBsA,608
|
13
|
+
reme_ai/retrieve/__init__.py,sha256=K3qulFpRGZU_UsyYjIouR1F2CXwDBf9NkGiqYmIU2hQ,42
|
14
|
+
reme_ai/retrieve/personal/__init__.py,sha256=v5rpUQgBuMBMAodUzXPaRHg27ckGz82Vq5PXFSwHbks,462
|
15
|
+
reme_ai/retrieve/personal/extract_time_op.py,sha256=22znvAwW2gPIZ8gTzKW-f87MrqecRZ60EmrxJ1z_tYE,4154
|
16
|
+
reme_ai/retrieve/personal/extract_time_prompt.yaml,sha256=gO1RM4yFIDE5zr7EM6B_R-5a7mrtgqpeMRnKtzkeaOQ,5247
|
17
|
+
reme_ai/retrieve/personal/fuse_rerank_op.py,sha256=vDkkphPnN-Lph0yl5rL8u04xit_MKgn7ejO16G9llpw,7601
|
18
|
+
reme_ai/retrieve/personal/print_memory_op.py,sha256=X-iilW-LX0MJwaCqqlB6sV9ANItPQdYFyd0RtSy4dyY,4359
|
19
|
+
reme_ai/retrieve/personal/print_memory_prompt.yaml,sha256=fJ1DVLwu7crKLnNy9tsb-BYY56Y-kLRkuqthuEakgmM,487
|
20
|
+
reme_ai/retrieve/personal/read_message_op.py,sha256=_k-kNPPs8X3oC4R7A0qJr0xNVlhXJwvrLVcQ1G_2GBM,1724
|
21
|
+
reme_ai/retrieve/personal/retrieve_memory_op.py,sha256=KlpPmYKKxynwiTLjoc7AIhuz4wMX5Qo1SnOgpa4Th0k,1888
|
22
|
+
reme_ai/retrieve/personal/semantic_rank_op.py,sha256=ezCIQCSOuV6wfx2U78Q4c5z6Sz2TK1O1aNzFXTlEeHY,6614
|
23
|
+
reme_ai/retrieve/personal/set_query_op.py,sha256=UPq8_65x2AewBWZOxLhqNhw1fYAAClb9aUnW2pdtcc8,1322
|
24
|
+
reme_ai/retrieve/task/__init__.py,sha256=jTB7b3WpDbU3cyloqPD96TAdZkCIRs5enjSXybebbII,176
|
25
|
+
reme_ai/retrieve/task/build_query_op.py,sha256=8se7mzhXgHHOkqk7mmCXr9Iag5Q9OulaRZIVgbg-2HU,1492
|
26
|
+
reme_ai/retrieve/task/build_query_prompt.yaml,sha256=NQTvGe6u9w5k4J_tMo-AeXhRD4h7_urrl3NFhJlc8_8,323
|
27
|
+
reme_ai/retrieve/task/merge_memory_op.py,sha256=bR5kTBHsOS8NIlVwbh-dcEqzKAgQGFQkrYP2hznhztc,913
|
28
|
+
reme_ai/retrieve/task/rerank_memory_op.py,sha256=8DNuVufujOyGyVfGWrfxgDBRSR6jBqLu7bvCqksVl-Y,5567
|
29
|
+
reme_ai/retrieve/task/rerank_memory_prompt.yaml,sha256=_YuJCGa6N5gJ8LVcbeC8y6AFJHGkWuAyorB8NkUla34,965
|
30
|
+
reme_ai/retrieve/task/rewrite_memory_op.py,sha256=_yiyikbyRcJBKZueyyYYhzQz1TH3hPaFSP8Rmts5iJw,5519
|
31
|
+
reme_ai/retrieve/task/rewrite_memory_prompt.yaml,sha256=JY4gmTplmfxxZzDfdaUDyJoXXqWHXpc7Z-4_ry_Zy74,1594
|
32
|
+
reme_ai/schema/__init__.py,sha256=tbBjMfV_ojRr2yybA-XSPzN71A6K9HXZ8Gaw7I-2hBM,69
|
33
|
+
reme_ai/schema/memory.py,sha256=ur1ldmrrm3YZermeb6zhV9LgrBoUEJ29Ppz6J2Q8yhM,5483
|
34
|
+
reme_ai/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
|
+
reme_ai/service/base_memory_service.py,sha256=nBDVAgoEqyb98t1Kl43u1v0aCStcsy7XbcWmEuk6dac,3212
|
36
|
+
reme_ai/service/personal_memory_service.py,sha256=whosNiJbT777yJdpNzyvKcUOtCg6Fkmo2jvX2AevYjM,5030
|
37
|
+
reme_ai/service/task_memory_service.py,sha256=TnA9NjG0kw9rXB_Cv7ZxoGXRQDm7R9wEytmIIqrOtPw,4865
|
38
|
+
reme_ai/summary/__init__.py,sha256=K3qulFpRGZU_UsyYjIouR1F2CXwDBf9NkGiqYmIU2hQ,42
|
39
|
+
reme_ai/summary/personal/__init__.py,sha256=z2taU9ejTWvZaAJ6Mr0jKKRh4fcBdZVxpiFWreTDhTg,417
|
40
|
+
reme_ai/summary/personal/contra_repeat_op.py,sha256=cs3gnOkmFJgIH3KOS17FrxIiscW7lNqZiicCXZkgu9A,6297
|
41
|
+
reme_ai/summary/personal/contra_repeat_prompt.yaml,sha256=u5sp8MN944pU0HEeTKQmehdVlnqLXsIA7rAAh-bMJTw,7205
|
42
|
+
reme_ai/summary/personal/get_observation_op.py,sha256=IOVFnKgvzPREdwTjrvOLHodjQ4erSudDbb5DEqzn9mY,6431
|
43
|
+
reme_ai/summary/personal/get_observation_prompt.yaml,sha256=PK5iBDPMt6MqgsQ9Mj6YKp5Iq1PCA-MOYOp-rEFd-b8,15032
|
44
|
+
reme_ai/summary/personal/get_observation_with_time_op.py,sha256=2AE0HwxJzwoqTR0-OW42s92M3Ql6dXLlhQRBVbWRPBo,7905
|
45
|
+
reme_ai/summary/personal/get_observation_with_time_prompt.yaml,sha256=lRIlkU7BmgLzkghLLX-hx7hxibDTgsqa-IuIUIwyeMs,16389
|
46
|
+
reme_ai/summary/personal/get_reflection_subject_op.py,sha256=wP4zVOC2kBJlxWgXEHRmoSMvy6tgoXhrSc5y0LWhcg8,7712
|
47
|
+
reme_ai/summary/personal/get_reflection_subject_prompt.yaml,sha256=YYSCsgZhD7H7zkzRWrj6zzxOlprfuB1setUokjgvfK0,11139
|
48
|
+
reme_ai/summary/personal/info_filter_op.py,sha256=c0f91IpXDtYo4EgUWAleV9SgTlGVO1DP6mUHRo_7N6U,7897
|
49
|
+
reme_ai/summary/personal/info_filter_prompt.yaml,sha256=rgVLz_ptumi6lLYgLKRmTuap8vA5jQsfeFGpPiMpjMo,11192
|
50
|
+
reme_ai/summary/personal/load_today_memory_op.py,sha256=N37X9iDKdgnrGqZhzo46Jr3DVjdvNy9RHRHrt__9-IM,4060
|
51
|
+
reme_ai/summary/personal/long_contra_repeat_op.py,sha256=H2vodsFbLGmdlkEqGbAbTbBnRyOY-ywsRFo_nY6iEgM,9089
|
52
|
+
reme_ai/summary/personal/long_contra_repeat_prompt.yaml,sha256=6q3Y4xZx3liZwXLcJsOgeCIQZzAE2PwqVNQCFMNAWsI,7516
|
53
|
+
reme_ai/summary/personal/update_insight_op.py,sha256=S3bdRQaUoYVxcS4ts_mO08-h1vUpMdiSVYAL34VuesQ,11025
|
54
|
+
reme_ai/summary/personal/update_insight_prompt.yaml,sha256=02EPEC5vQGTEYOfHeq29GUEhlWxdLAcMnaj6jOgeI8c,10295
|
55
|
+
reme_ai/summary/task/__init__.py,sha256=GApTaFsmX7q-NbGtk5cTfx23cf-X-DBt-RORy0UMcvI,528
|
56
|
+
reme_ai/summary/task/comparative_extraction_op.py,sha256=vedE7i8UsZEW9hxWAc-0ew0SLG5c63TEE7EASVHIse8,10845
|
57
|
+
reme_ai/summary/task/comparative_extraction_prompt.yaml,sha256=EgsveH1fHcXle5sHDqyxZ_KxTfTUn2IGbkZ0KjZRl1I,3331
|
58
|
+
reme_ai/summary/task/failure_extraction_op.py,sha256=yWwC4_rNknAMAHDHNci92JRpBgpzkSTu9VuDTbf-tAQ,3199
|
59
|
+
reme_ai/summary/task/failure_extraction_prompt.yaml,sha256=kWz55BRxtEd_CjoaKWliCdYR_pzNCZwBETA0Ajdwggk,1534
|
60
|
+
reme_ai/summary/task/memory_deduplication_op.py,sha256=rg_k4tJkCKCERUV0Uuy5eVCBr4hEz3rhDBMg-J3DyiQ,6886
|
61
|
+
reme_ai/summary/task/memory_validation_op.py,sha256=pUk3mu4FpeQrC5bSIFqXwO8HJ9RIpbAMFp4PSuOPd9I,4525
|
62
|
+
reme_ai/summary/task/memory_validation_prompt.yaml,sha256=CwqT76ktjnkCXcZFEe0XtvJPkhZTpt32_N--A0gH3k0,1230
|
63
|
+
reme_ai/summary/task/simple_comparative_summary_op.py,sha256=4dAX0QqyIfu9UIuouSrhv30dqelMnI7Vm7toD56WRow,3587
|
64
|
+
reme_ai/summary/task/simple_comparative_summary_prompt.yaml,sha256=FGGj-jE8SvgEDEJAiq33ptB_-pI2qmBulsLDQyy8_bM,1140
|
65
|
+
reme_ai/summary/task/simple_summary_op.py,sha256=fsOogsmu6Na-UdRkxb2xOTqx7jBwXhEolGhKn4OBKPY,3126
|
66
|
+
reme_ai/summary/task/simple_summary_prompt.yaml,sha256=o0JbPBtGqKJ6_GIDhI_wGdBOSOy74bpaehLAKqtBYco,943
|
67
|
+
reme_ai/summary/task/success_extraction_op.py,sha256=W5rtY-KNovn9yvW6Q7-XDPWk_yrVbReUeJGVwDvK8no,3194
|
68
|
+
reme_ai/summary/task/success_extraction_prompt.yaml,sha256=rr_5sm9j2r_Ea5JgBBPG-yCnFMmCdTi391rqWovqraw,1527
|
69
|
+
reme_ai/summary/task/trajectory_preprocess_op.py,sha256=vbs-0dvN1EYg5_t_aY6pUBdLv6heWyaS1eUtdTXA6tE,1722
|
70
|
+
reme_ai/summary/task/trajectory_segmentation_op.py,sha256=nG-ri3UxXD85uSFokRi95iHom-5-xaRB495WNcaKn5Y,4777
|
71
|
+
reme_ai/summary/task/trajectory_segmentation_prompt.yaml,sha256=8E5nDQn9x7DHC6P1dsF3l_bd8UITla9AyfmSLoyFLpM,1193
|
72
|
+
reme_ai/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
73
|
+
reme_ai/utils/datetime_handler.py,sha256=mTf-c7Ko7crrTzh1hU5v-A7Hbg_8gQRCeN2c-aoP3Tw,14451
|
74
|
+
reme_ai/utils/op_utils.py,sha256=tYsAl5LcQKe0I7YyEe8VXjrjJGVZ7Q_oD28qorJsCB0,4175
|
75
|
+
reme_ai/vector_store/__init__.py,sha256=1Yh14F5UZirKHxM7cgWiATpoZUqWGo147KlJK_wD03Q,327
|
76
|
+
reme_ai/vector_store/delete_memory_op.py,sha256=MqV-0UCt4S7h7yvR5EfmLQes-COgK1m6SqHIiWtGG08,917
|
77
|
+
reme_ai/vector_store/recall_vector_store_op.py,sha256=AUcEOTTllvppcClkSjksERnihekZIi2FOx_XBG036QY,1476
|
78
|
+
reme_ai/vector_store/update_memory_freq_op.py,sha256=nZ7WIqC8hN-Pv9QZrTviax6DA1CVyoCX2cF0XrmTAV4,1021
|
79
|
+
reme_ai/vector_store/update_memory_utility_op.py,sha256=fXYAtgfUxmhJkgKA2kHucOBalXAk9O6vvG8Bn9asOTQ,1025
|
80
|
+
reme_ai/vector_store/update_vector_store_op.py,sha256=_8T94tHe2nd5phsgmPZQdrYfVR7FvydMm9MJzlrESl4,1333
|
81
|
+
reme_ai/vector_store/vector_store_action_op.py,sha256=qeoB_e0WBTnB9yQUkyc8jNNRO9JK_bcCwS2oU0LTJxs,2535
|
82
|
+
reme_ai-0.1.5.dist-info/licenses/LICENSE,sha256=zFTWearO11HAlvEgtmY1XBBtk5TSj5P23zU5c_bNfb4,11343
|
83
|
+
reme_ai-0.1.5.dist-info/METADATA,sha256=EAsEdZuAJyF2zCO3Rv7hN3B9PIzrcMUcmwe_Iar8tAY,27137
|
84
|
+
reme_ai-0.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
85
|
+
reme_ai-0.1.5.dist-info/entry_points.txt,sha256=6SP3ncXOMyKotdT4LHWPeaXo3-Sv-1qmK7OhVw76Xhw,42
|
86
|
+
reme_ai-0.1.5.dist-info/top_level.txt,sha256=3ca2UBk97aSfPmGdg8LlVqyeLikb5qEnBEbfGgtzao0,8
|
87
|
+
reme_ai-0.1.5.dist-info/RECORD,,
|
@@ -1,215 +0,0 @@
|
|
1
|
-
from memoryscope.enumeration.language_enum import LanguageEnum
|
2
|
-
|
3
|
-
# This dictionary maps languages to lists of words related to datetime expressions.
|
4
|
-
# It aids in recognizing and processing datetime mentions in text, enhancing the system's ability to understand
|
5
|
-
# temporal context across different languages.
|
6
|
-
DATATIME_WORD_LIST = {
|
7
|
-
LanguageEnum.CN: [
|
8
|
-
"天",
|
9
|
-
"周",
|
10
|
-
"月",
|
11
|
-
"年",
|
12
|
-
"星期",
|
13
|
-
"点",
|
14
|
-
"分钟",
|
15
|
-
"小时",
|
16
|
-
"秒",
|
17
|
-
"上午",
|
18
|
-
"下午",
|
19
|
-
"早上",
|
20
|
-
"早晨",
|
21
|
-
"晚上",
|
22
|
-
"中午",
|
23
|
-
"日",
|
24
|
-
"夜",
|
25
|
-
"清晨",
|
26
|
-
"傍晚",
|
27
|
-
"凌晨",
|
28
|
-
"岁",
|
29
|
-
],
|
30
|
-
LanguageEnum.EN: [
|
31
|
-
# Units of Time
|
32
|
-
"year", "yr",
|
33
|
-
"month", "mo",
|
34
|
-
"week", "wk",
|
35
|
-
"day", "d",
|
36
|
-
"hour", "hr",
|
37
|
-
"minute", "min",
|
38
|
-
"second", "sec",
|
39
|
-
|
40
|
-
# Days of the Week
|
41
|
-
"Monday", "Mon",
|
42
|
-
"Tuesday", "Tue", "Tues",
|
43
|
-
"Wednesday", "Wed",
|
44
|
-
"Thursday", "Thu", "Thur", "Thurs",
|
45
|
-
"Friday", "Fri",
|
46
|
-
"Saturday", "Sat",
|
47
|
-
"Sunday", "Sun",
|
48
|
-
|
49
|
-
# Months of the Year
|
50
|
-
"January", "Jan",
|
51
|
-
"February", "Feb",
|
52
|
-
"March", "Mar",
|
53
|
-
"April", "Apr",
|
54
|
-
"May", "May",
|
55
|
-
"June", "Jun",
|
56
|
-
"July", "Jul",
|
57
|
-
"August", "Aug",
|
58
|
-
"September", "Sep", "Sept",
|
59
|
-
"October", "Oct",
|
60
|
-
"November", "Nov",
|
61
|
-
"December", "Dec",
|
62
|
-
|
63
|
-
# Relative Time References
|
64
|
-
"Today",
|
65
|
-
"Tomorrow", "Tmrw",
|
66
|
-
"Yesterday", "Yday",
|
67
|
-
"Now",
|
68
|
-
"Morning", "AM", "a.m.",
|
69
|
-
"Afternoon", "PM", "p.m.",
|
70
|
-
"Evening",
|
71
|
-
"Night",
|
72
|
-
"Midnight",
|
73
|
-
"Noon",
|
74
|
-
|
75
|
-
# Seasonal References
|
76
|
-
"Spring",
|
77
|
-
"Summer",
|
78
|
-
"Autumn", "Fall",
|
79
|
-
"Winter",
|
80
|
-
|
81
|
-
# General Time References
|
82
|
-
"Century", "cent.",
|
83
|
-
"Decade",
|
84
|
-
"Millennium",
|
85
|
-
"Quarter", "Q1", "Q2", "Q3", "Q4",
|
86
|
-
"Semester",
|
87
|
-
"Fortnight",
|
88
|
-
"Weekend"
|
89
|
-
]
|
90
|
-
}
|
91
|
-
|
92
|
-
# A mapping of weekdays for each supported language, facilitating calendar-related operations and understanding
|
93
|
-
# within the application.
|
94
|
-
WEEKDAYS = {
|
95
|
-
LanguageEnum.CN: [
|
96
|
-
"周一",
|
97
|
-
"周二",
|
98
|
-
"周三",
|
99
|
-
"周四",
|
100
|
-
"周五",
|
101
|
-
"周六",
|
102
|
-
"周日"
|
103
|
-
],
|
104
|
-
LanguageEnum.EN: [
|
105
|
-
"Monday",
|
106
|
-
"Tuesday",
|
107
|
-
"Wednesday",
|
108
|
-
"Thursday",
|
109
|
-
"Friday",
|
110
|
-
"Saturday",
|
111
|
-
"Sunday",
|
112
|
-
]
|
113
|
-
}
|
114
|
-
|
115
|
-
MONTH_DICT = {
|
116
|
-
LanguageEnum.CN: [
|
117
|
-
"1月",
|
118
|
-
"2月",
|
119
|
-
"3月",
|
120
|
-
"4月",
|
121
|
-
"5月",
|
122
|
-
"6月",
|
123
|
-
"7月",
|
124
|
-
"8月",
|
125
|
-
"9月",
|
126
|
-
"10月",
|
127
|
-
"11月",
|
128
|
-
"12月",
|
129
|
-
],
|
130
|
-
LanguageEnum.EN: [
|
131
|
-
"January",
|
132
|
-
"February",
|
133
|
-
"March",
|
134
|
-
"April",
|
135
|
-
"May",
|
136
|
-
"June",
|
137
|
-
"July",
|
138
|
-
"August",
|
139
|
-
"September",
|
140
|
-
"October",
|
141
|
-
"November",
|
142
|
-
"December",
|
143
|
-
]
|
144
|
-
}
|
145
|
-
|
146
|
-
# Constants for the word 'none' in different languages
|
147
|
-
NONE_WORD = {
|
148
|
-
LanguageEnum.CN: "无",
|
149
|
-
LanguageEnum.EN: "none"
|
150
|
-
}
|
151
|
-
|
152
|
-
# Constants for the word 'repeated' in different languages
|
153
|
-
REPEATED_WORD = {
|
154
|
-
LanguageEnum.CN: "重复",
|
155
|
-
LanguageEnum.EN: "repeated"
|
156
|
-
}
|
157
|
-
|
158
|
-
# Constants for the word 'contradictory' in different languages
|
159
|
-
CONTRADICTORY_WORD = {
|
160
|
-
LanguageEnum.CN: "矛盾",
|
161
|
-
LanguageEnum.EN: "contradiction"
|
162
|
-
}
|
163
|
-
|
164
|
-
# Constants for the phrase 'included' in different languages
|
165
|
-
CONTAINED_WORD = {
|
166
|
-
LanguageEnum.CN: "被包含",
|
167
|
-
LanguageEnum.EN: "contained"
|
168
|
-
}
|
169
|
-
|
170
|
-
# Constants for the symbol ':' in different languages' representations
|
171
|
-
COLON_WORD = {
|
172
|
-
LanguageEnum.CN: ":",
|
173
|
-
LanguageEnum.EN: ":"
|
174
|
-
}
|
175
|
-
|
176
|
-
# Constants for the symbol ',' in different languages' representations
|
177
|
-
COMMA_WORD = {
|
178
|
-
LanguageEnum.CN: ",",
|
179
|
-
LanguageEnum.EN: ","
|
180
|
-
}
|
181
|
-
|
182
|
-
# Default human name placeholders for different languages
|
183
|
-
DEFAULT_HUMAN_NAME = {
|
184
|
-
LanguageEnum.CN: "用户",
|
185
|
-
LanguageEnum.EN: "user"
|
186
|
-
}
|
187
|
-
|
188
|
-
# Mapping of datetime terms from natural language to standardized keys for each supported language
|
189
|
-
DATATIME_KEY_MAP = {
|
190
|
-
LanguageEnum.CN: {
|
191
|
-
"年": "year",
|
192
|
-
"月": "month",
|
193
|
-
"日": "day",
|
194
|
-
"周": "week",
|
195
|
-
"星期几": "weekday",
|
196
|
-
},
|
197
|
-
LanguageEnum.EN: {
|
198
|
-
"Year": "year",
|
199
|
-
"Month": "month",
|
200
|
-
"Day": "day",
|
201
|
-
"Week": "week",
|
202
|
-
"Weekday": "weekday",
|
203
|
-
}
|
204
|
-
}
|
205
|
-
|
206
|
-
# Phrase for indicating inferred time in different languages
|
207
|
-
TIME_INFER_WORD = {
|
208
|
-
LanguageEnum.CN: "推断时间",
|
209
|
-
LanguageEnum.EN: "Inference time"
|
210
|
-
}
|
211
|
-
|
212
|
-
USER_NAME_EXPRESSION = {
|
213
|
-
LanguageEnum.CN: "用户姓名是{name}。",
|
214
|
-
LanguageEnum.EN: "User's name is {name}."
|
215
|
-
}
|
@@ -1,50 +0,0 @@
|
|
1
|
-
from flowllm import C, BaseOp
|
2
|
-
from loguru import logger
|
3
|
-
|
4
|
-
from reme_ai.utils.miner_u_pdf_processor import MinerUPDFProcessor, chunk_pdf_content
|
5
|
-
|
6
|
-
|
7
|
-
@C.register_op()
|
8
|
-
class PDFPreprocessOp(BaseOp):
|
9
|
-
file_path: str = __file__
|
10
|
-
|
11
|
-
def execute(self):
|
12
|
-
"""Process PDF files using MinerU and chunk content"""
|
13
|
-
pdf_path = self.context.get("pdf_path")
|
14
|
-
output_dir = self.context.get("output_dir")
|
15
|
-
|
16
|
-
if not pdf_path:
|
17
|
-
logger.error("No PDF path provided in context")
|
18
|
-
return
|
19
|
-
|
20
|
-
# Process PDF
|
21
|
-
processor = MinerUPDFProcessor(log_level="INFO")
|
22
|
-
|
23
|
-
try:
|
24
|
-
content_list, markdown_content = processor.process_pdf(
|
25
|
-
pdf_path=pdf_path,
|
26
|
-
output_dir=output_dir,
|
27
|
-
method=self.op_params.get("method", "auto"),
|
28
|
-
lang=self.op_params.get("lang"),
|
29
|
-
backend=self.op_params.get("backend", "pipeline")
|
30
|
-
)
|
31
|
-
|
32
|
-
# Create chunks if requested
|
33
|
-
chunks = []
|
34
|
-
if self.op_params.get("create_chunks", True):
|
35
|
-
max_length = self.op_params.get("max_chunk_length", 4000)
|
36
|
-
chunks = chunk_pdf_content(content_list, max_length=max_length)
|
37
|
-
|
38
|
-
# Store results in context
|
39
|
-
self.context.pdf_content_list = content_list
|
40
|
-
self.context.pdf_markdown_content = markdown_content
|
41
|
-
self.context.pdf_chunks = chunks
|
42
|
-
|
43
|
-
logger.info(f"PDF processing completed: {len(content_list)} content blocks, "
|
44
|
-
f"{len(chunks)} chunks, {len(markdown_content)} characters of markdown")
|
45
|
-
|
46
|
-
except Exception as e:
|
47
|
-
logger.error(f"PDF processing failed: {e}")
|
48
|
-
self.context.pdf_content_list = []
|
49
|
-
self.context.pdf_markdown_content = ""
|
50
|
-
self.context.pdf_chunks = []
|