palimpzest 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/constants.py +38 -62
- palimpzest/core/data/iter_dataset.py +5 -5
- palimpzest/core/elements/groupbysig.py +1 -1
- palimpzest/core/elements/records.py +91 -109
- palimpzest/core/lib/schemas.py +23 -0
- palimpzest/core/models.py +3 -3
- palimpzest/prompts/__init__.py +2 -6
- palimpzest/prompts/convert_prompts.py +10 -66
- palimpzest/prompts/critique_and_refine_prompts.py +66 -0
- palimpzest/prompts/filter_prompts.py +8 -46
- palimpzest/prompts/join_prompts.py +12 -75
- palimpzest/prompts/{moa_aggregator_convert_prompts.py → moa_aggregator_prompts.py} +51 -2
- palimpzest/prompts/moa_proposer_prompts.py +87 -0
- palimpzest/prompts/prompt_factory.py +351 -479
- palimpzest/prompts/split_merge_prompts.py +51 -2
- palimpzest/prompts/split_proposer_prompts.py +48 -16
- palimpzest/prompts/utils.py +109 -0
- palimpzest/query/execution/all_sample_execution_strategy.py +1 -1
- palimpzest/query/execution/execution_strategy.py +4 -4
- palimpzest/query/execution/mab_execution_strategy.py +1 -2
- palimpzest/query/execution/parallel_execution_strategy.py +3 -3
- palimpzest/query/execution/single_threaded_execution_strategy.py +8 -8
- palimpzest/query/generators/generators.py +31 -17
- palimpzest/query/operators/__init__.py +15 -2
- palimpzest/query/operators/aggregate.py +21 -19
- palimpzest/query/operators/compute.py +6 -8
- palimpzest/query/operators/convert.py +12 -37
- palimpzest/query/operators/critique_and_refine.py +194 -0
- palimpzest/query/operators/distinct.py +7 -7
- palimpzest/query/operators/filter.py +13 -25
- palimpzest/query/operators/join.py +321 -192
- palimpzest/query/operators/limit.py +4 -4
- palimpzest/query/operators/mixture_of_agents.py +246 -0
- palimpzest/query/operators/physical.py +25 -2
- palimpzest/query/operators/project.py +4 -4
- palimpzest/query/operators/{rag_convert.py → rag.py} +202 -5
- palimpzest/query/operators/retrieve.py +10 -9
- palimpzest/query/operators/scan.py +9 -10
- palimpzest/query/operators/search.py +18 -24
- palimpzest/query/operators/split.py +321 -0
- palimpzest/query/optimizer/__init__.py +12 -8
- palimpzest/query/optimizer/optimizer.py +12 -10
- palimpzest/query/optimizer/rules.py +201 -108
- palimpzest/query/optimizer/tasks.py +18 -6
- palimpzest/validator/validator.py +7 -9
- {palimpzest-0.8.2.dist-info → palimpzest-0.8.3.dist-info}/METADATA +3 -8
- palimpzest-0.8.3.dist-info/RECORD +95 -0
- palimpzest/prompts/critique_and_refine_convert_prompts.py +0 -216
- palimpzest/prompts/moa_proposer_convert_prompts.py +0 -75
- palimpzest/prompts/util_phrases.py +0 -19
- palimpzest/query/operators/critique_and_refine_convert.py +0 -113
- palimpzest/query/operators/mixture_of_agents_convert.py +0 -140
- palimpzest/query/operators/split_convert.py +0 -170
- palimpzest-0.8.2.dist-info/RECORD +0 -95
- {palimpzest-0.8.2.dist-info → palimpzest-0.8.3.dist-info}/WHEEL +0 -0
- {palimpzest-0.8.2.dist-info → palimpzest-0.8.3.dist-info}/licenses/LICENSE +0 -0
- {palimpzest-0.8.2.dist-info → palimpzest-0.8.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
palimpzest/__init__.py,sha256=1PzadDDOVMQJKNEYUH0_tw8tQKUYTT31M0vuzTr2Rqk,1694
|
|
2
|
+
palimpzest/constants.py,sha256=7WO9eJEBgERN3XkheS1lsIzrB3ljhT4jRqtRlQa6_lA,23095
|
|
3
|
+
palimpzest/policy.py,sha256=lIvw_C_rmwCH4LZaeNkAuixl8zw9RAW_JcSWSHPjKyc,11628
|
|
4
|
+
palimpzest/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
palimpzest/agents/compute_agents.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
palimpzest/agents/search_agents.py,sha256=t2QMreB5Ph71aoNk5bBtV-0l8im79z-pMAR3JDAySDw,29418
|
|
7
|
+
palimpzest/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
palimpzest/core/models.py,sha256=VNi49i9xn_FxekyYrGPS1-_C_PaGXL8dz-dqjrIOk8g,42477
|
|
9
|
+
palimpzest/core/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
palimpzest/core/data/context.py,sha256=x1xYyu9qW65dvtK_XayIfv_CgsCEPW6Qe0DTiSf9sjU,16207
|
|
11
|
+
palimpzest/core/data/context_manager.py,sha256=8hAKWD2jhFZgghTu7AYgjkvKDsJUPVxq8g4nG0HWvfo,6150
|
|
12
|
+
palimpzest/core/data/dataset.py,sha256=M7SxPXzHsfj-ljy_P3ckcJNqGf4RwNxtZI02q_tmL2M,28178
|
|
13
|
+
palimpzest/core/data/index_dataset.py,sha256=adO67DgzHhA4lBME0-h4SjXfdz9UcNMSDGXTpUdKbgE,1929
|
|
14
|
+
palimpzest/core/data/iter_dataset.py,sha256=K47ajOXsCZV3WhOuDkw3xfiHzn8mXPU976uN3SjaP2U,20507
|
|
15
|
+
palimpzest/core/elements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
palimpzest/core/elements/filters.py,sha256=fU2x0eWDwfP52_5fUmqJXTuhs4H0vvHtPZLdA3IIw8I,1642
|
|
17
|
+
palimpzest/core/elements/groupbysig.py,sha256=oFH5UkZzcR0msAgfQiRQOOvyJ3HaW4Dwr03h7tVOcrM,2324
|
|
18
|
+
palimpzest/core/elements/records.py,sha256=KNY55cN9OuV9Q6apUaUq9W_WyfNremqFlQjClyWR1PU,18518
|
|
19
|
+
palimpzest/core/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
palimpzest/core/lib/schemas.py,sha256=eFH_Lw3UoXLPk_-5Pds5NPRUXKhILPtCkErB26FyKMo,8983
|
|
21
|
+
palimpzest/prompts/__init__.py,sha256=942kdENfPU5mFjIxYm-FusL0FD6LNhoj6cYoSGiUsCI,1628
|
|
22
|
+
palimpzest/prompts/agent_prompts.py,sha256=CUzBVLBiPSw8OShtKp4VTpQwtrNMtcMglo-IZHMvuDM,17459
|
|
23
|
+
palimpzest/prompts/context_search.py,sha256=s3pti4XNRiIyiWzjVNL_NqmqEc31jzSKMF2SlN0Aaf8,357
|
|
24
|
+
palimpzest/prompts/convert_prompts.py,sha256=KJ6x9MqM63mAPT61v3GFFLdRZB-NKSZYolWXszlhDx8,3122
|
|
25
|
+
palimpzest/prompts/critique_and_refine_prompts.py,sha256=hFApYb3e5NdyL5DhX2haxA0cCyhp1WUJsegQfkeg1zM,2857
|
|
26
|
+
palimpzest/prompts/filter_prompts.py,sha256=D-aY3-th1GzEHrVGbKORVN2R7x7coYGjp8FrEN_1UKs,2338
|
|
27
|
+
palimpzest/prompts/join_prompts.py,sha256=z-y4L1cw1O3I_F9DW6MvqeztdQoKDQawX6nK6vQAkdM,2916
|
|
28
|
+
palimpzest/prompts/moa_aggregator_prompts.py,sha256=b5cz4G2oF86LlHOy8vmtxoMcZ9zaZoppKrURHgzCzNU,5248
|
|
29
|
+
palimpzest/prompts/moa_proposer_prompts.py,sha256=yfZYwmCg-Tg9h0H7PJMEuDYPR45EbYnORmVX6cY2vRQ,3125
|
|
30
|
+
palimpzest/prompts/prompt_factory.py,sha256=32GFAfvWOwRHUsAMRDPEiFzaObRK8FeVfGgkG-QKcYs,44187
|
|
31
|
+
palimpzest/prompts/split_merge_prompts.py,sha256=hX-MThmW4VU7rjgm7gb-bpniEMdj25mtp0o8qBeWvIQ,5573
|
|
32
|
+
palimpzest/prompts/split_proposer_prompts.py,sha256=Ucqwfn4FqFk-b9E024EK4e_3_QndTJjggwiwa1x5CQs,3115
|
|
33
|
+
palimpzest/prompts/utils.py,sha256=iFv4nuFRuON-DEAdO2JI-J84ukV8Ev27YYWPLwfk44A,5655
|
|
34
|
+
palimpzest/prompts/validator.py,sha256=pJTZjlt_OiFM3IFOgsJ0jQdayra8iRVrpqENlXI9tQQ,10532
|
|
35
|
+
palimpzest/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
36
|
+
palimpzest/query/execution/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
palimpzest/query/execution/all_sample_execution_strategy.py,sha256=8a8-eKsndo_edCwIamNgcISLQmTzVSv5vmD6Ogl8a6k,14367
|
|
38
|
+
palimpzest/query/execution/execution_strategy.py,sha256=Lg2anGM8I4WBetqduWwnPFE67sfihHJwsu3fQ_sYYJk,18909
|
|
39
|
+
palimpzest/query/execution/execution_strategy_type.py,sha256=vRQBPCQN5_aoyD3TLIeW3VPo15mqF-5RBvEXkENz9FE,987
|
|
40
|
+
palimpzest/query/execution/mab_execution_strategy.py,sha256=LKAi1RWGsIK7ppjWmH8sbAbHjyLA12y4tBn1ycOt35Q,43554
|
|
41
|
+
palimpzest/query/execution/parallel_execution_strategy.py,sha256=roZZy7wLcmAwm_ecYvqSJanRaiox3OoNPuXxvRZ5TXg,15710
|
|
42
|
+
palimpzest/query/execution/single_threaded_execution_strategy.py,sha256=sESji79ytKxth9Tpm02c34Mltw0YiFn4GL5h0MI5Noo,16255
|
|
43
|
+
palimpzest/query/generators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
palimpzest/query/generators/generators.py,sha256=UldCUEwaiBfpvQDieA-h7SiC8KM76gCABPj-mvqAJus,21500
|
|
45
|
+
palimpzest/query/operators/__init__.py,sha256=yjOdbx3Sm5uVTR5R7fw66F602qBBkguH1ktv48T_JJ8,4264
|
|
46
|
+
palimpzest/query/operators/aggregate.py,sha256=NZ_rmi0YrbNFivbBgKtonrCrK6fZw4h9Pm4lMMI5XVc,11376
|
|
47
|
+
palimpzest/query/operators/compute.py,sha256=X_pWN45smg8L4dV54nOae7dldQGL1nJVlVyJ3ULWSmI,8432
|
|
48
|
+
palimpzest/query/operators/convert.py,sha256=VfrWUFyuZC8fPf7LR7mMfpOjqSfxAuTLUxw-S-pn7hk,16123
|
|
49
|
+
palimpzest/query/operators/critique_and_refine.py,sha256=Q-NhasVoD9meX7g36RPrv3q4R48_8XEU4d3TE46hRJI,8979
|
|
50
|
+
palimpzest/query/operators/distinct.py,sha256=ZTXlIS7IaFRTsWv9RemzCo1JLz25vEma-TB42CV5fJQ,2614
|
|
51
|
+
palimpzest/query/operators/filter.py,sha256=ufREsO2-8CBk4u4fabDBYpEvb806E11EOyW-wuRs4vw,10356
|
|
52
|
+
palimpzest/query/operators/join.py,sha256=79pdA4gEgyXcWirkIRYdWNljrKjDvjtApGXKsroYElA,25591
|
|
53
|
+
palimpzest/query/operators/limit.py,sha256=pdo7WfWY97SW3c-WqZ4SIPw7lHIVbaXPEWqHyK8qkF8,2130
|
|
54
|
+
palimpzest/query/operators/logical.py,sha256=K_dRlNKkda35kQ7gYGsrW9PoFuDPzexpjtDq_FYdhVw,20223
|
|
55
|
+
palimpzest/query/operators/mixture_of_agents.py,sha256=TWdg6XEg2u4TQM4d94gmbYqnK15wC7Q4Cyefp8SA4i8,11547
|
|
56
|
+
palimpzest/query/operators/physical.py,sha256=T32uwehr5t0LXB9wcXZ3Sm41zthmyz86NdRQ9rsqLrE,9505
|
|
57
|
+
palimpzest/query/operators/project.py,sha256=gxbjsHEACCP9wxATH-mw6wOSUi5s13JyWsLqqhAYWXQ,2111
|
|
58
|
+
palimpzest/query/operators/rag.py,sha256=k2vdthQBNb7FEYX7K1UP9fSQfwK09HNwVJu72ExSsCA,20045
|
|
59
|
+
palimpzest/query/operators/retrieve.py,sha256=-OvEWmxwbepGz0w40FpHbqcOHZQ4Bp-MdXFbYTmxdVY,13082
|
|
60
|
+
palimpzest/query/operators/scan.py,sha256=OqCiPRTvTY7SbauNMyFvGT5nRVeRzVsGYSrkoN1Ib_w,7407
|
|
61
|
+
palimpzest/query/operators/search.py,sha256=cQin-Qc9FT7V0Gv3-pxMLbVMjqE6ALe99V0OrQhA6CI,22711
|
|
62
|
+
palimpzest/query/operators/split.py,sha256=oLzwnYb8TNf3XA9TMKEAIw7EIA12wHneaD42BNLIHiI,15043
|
|
63
|
+
palimpzest/query/optimizer/__init__.py,sha256=XsWk_qDh4lvPGhQgsya6D3ZWWHk6UXRUEzHuPHDifx8,2418
|
|
64
|
+
palimpzest/query/optimizer/cost_model.py,sha256=OldPy-TJdfsQbYRoKlb3yWeKbi15jcldTIUS6BTi9T8,12678
|
|
65
|
+
palimpzest/query/optimizer/optimizer.py,sha256=bsH4RhdDncbrGLA9Pq6kZ5CoqjetQm_5Vyl7l48jKpQ,19578
|
|
66
|
+
palimpzest/query/optimizer/optimizer_strategy.py,sha256=9YlNGkqwgX0WaV6y8tKOOHVN8kC8GjDI3DttvGW5SYY,10206
|
|
67
|
+
palimpzest/query/optimizer/optimizer_strategy_type.py,sha256=V-MMHvJdnfZKoUX1xxxwh66q1RjN2FL35IsiT1C62c8,1084
|
|
68
|
+
palimpzest/query/optimizer/plan.py,sha256=VIhN7tWT7EoRE9BKYa1qvvOhX7dEaM-aiobByX0qjzg,22900
|
|
69
|
+
palimpzest/query/optimizer/primitives.py,sha256=jMMVq37y1tWiPU1lSSKQP9OP-mzkpSxSmUeDajRYYOQ,5445
|
|
70
|
+
palimpzest/query/optimizer/rules.py,sha256=rVWIsygEMQmT1_rdHUzsOXC1GYCzLDhiuGjwkszyl-Y,48591
|
|
71
|
+
palimpzest/query/optimizer/tasks.py,sha256=SAoErqxHwoE7nIhLwyH-2YN2nHYG3gR4m4NV7p0U7AE,30028
|
|
72
|
+
palimpzest/query/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
+
palimpzest/query/processor/config.py,sha256=vHVsgeBnKigacO0QA7bLf5q8pJhFWA2j9-p_no2bmYo,2366
|
|
74
|
+
palimpzest/query/processor/query_processor.py,sha256=T4ffPbnOX23G8FDITzmM7Iw7DUEDWIHnwl8XLYllgjg,6240
|
|
75
|
+
palimpzest/query/processor/query_processor_factory.py,sha256=6w9R1Y8AOV22X8MUf7g2G5Qb15BGEZAXQKbCQJafWJ0,8048
|
|
76
|
+
palimpzest/schemabuilder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
|
+
palimpzest/schemabuilder/schema_builder.py,sha256=QraGp66dcD-ej6Y2mER40o86G9JqlBkL7swkJzjUAIY,7968
|
|
78
|
+
palimpzest/tools/README.md,sha256=56_6LPG80uc0CLVhTBP6I1wgIffNv9cyTr0TmVZqmrM,483
|
|
79
|
+
palimpzest/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
80
|
+
palimpzest/tools/allenpdf.py,sha256=fXMOmSDdSSLXDKAPYYJ8k4egtWEBf_Me9Lq9tM3iyoA,1690
|
|
81
|
+
palimpzest/tools/pdfparser.py,sha256=0DOVUZLxYfqjxM8WNEfYcyiXb1qW9BWVIHEB_B_YhWA,9570
|
|
82
|
+
palimpzest/tools/skema_tools.py,sha256=HXUFpjMhbVxZwKKkATeK-FwtlTCawaCbeP-uHntI1Kg,669
|
|
83
|
+
palimpzest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
+
palimpzest/utils/env_helpers.py,sha256=n81KzoJ459pRxo7QmJA7duazwWsfoMGTHc71D2LatFk,334
|
|
85
|
+
palimpzest/utils/hash_helpers.py,sha256=3A8dA7SbXTwnnvZvPVNqqMLlVRhCKyKF_bjNNAu3Exk,334
|
|
86
|
+
palimpzest/utils/model_helpers.py,sha256=X6SlMgD5I5Aj_cxaFaoGaaNvOOqTNZVmjj6zbfn63Yk,2476
|
|
87
|
+
palimpzest/utils/progress.py,sha256=7gucyZr82udMDZitrrkAOSKHZVljE3R2wv9nf5gA5TM,20807
|
|
88
|
+
palimpzest/utils/udfs.py,sha256=LjHic54B1az-rKgNLur0wOpaz2ko_UodjLEJrazkxvY,1854
|
|
89
|
+
palimpzest/validator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
|
+
palimpzest/validator/validator.py,sha256=dbqpSnis-3u6fpVmRMNwBlx5owwyAXH-sktN-eFoZVU,15995
|
|
91
|
+
palimpzest-0.8.3.dist-info/licenses/LICENSE,sha256=5GUlHy9lr-Py9kvV38FF1m3yy3NqM18fefuE9wkWumo,1079
|
|
92
|
+
palimpzest-0.8.3.dist-info/METADATA,sha256=ncTqIyyVX_1nCCF2X4h3JqSAWJ5CtWcvUYIvywiI8ZE,7048
|
|
93
|
+
palimpzest-0.8.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
94
|
+
palimpzest-0.8.3.dist-info/top_level.txt,sha256=raV06dJUgohefUn3ZyJS2uqp_Y76EOLA9Y2e_fxt8Ew,11
|
|
95
|
+
palimpzest-0.8.3.dist-info/RECORD,,
|
|
@@ -1,216 +0,0 @@
|
|
|
1
|
-
"""This file contains prompts for CriticAndRefineConvert operations."""
|
|
2
|
-
|
|
3
|
-
### CRITIQUE PROMPT AND CRITERIA ###
|
|
4
|
-
BASE_CRITIQUE_PROMPT = """You are a helpful assistant tasked with critiquing the output of a model based on a given prompt.
|
|
5
|
-
Below is the original user prompt used to generate the output:
|
|
6
|
-
|
|
7
|
-
ORIGINAL PROMPT:
|
|
8
|
-
<<original-prompt-placeholder>>
|
|
9
|
-
|
|
10
|
-
Here is the output generated by the model:
|
|
11
|
-
|
|
12
|
-
OUTPUT:
|
|
13
|
-
{original_output}
|
|
14
|
-
|
|
15
|
-
Your task is to critique the output based on the following:
|
|
16
|
-
{critique_criteria}
|
|
17
|
-
|
|
18
|
-
{finish_instruction}
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
# BASE_CRITIQUE_PROMPT = """You are a helpful assistant tasked with critiquing the output of a model based on a given prompt.
|
|
22
|
-
# Below is the original user prompt used to generate the output:
|
|
23
|
-
|
|
24
|
-
# ORIGINAL PROMPT:
|
|
25
|
-
# {user_prompt}
|
|
26
|
-
|
|
27
|
-
# Here is the output generated by the model:
|
|
28
|
-
|
|
29
|
-
# OUTPUT:
|
|
30
|
-
# {original_output}
|
|
31
|
-
|
|
32
|
-
# Your task is to critique the output based on the following:
|
|
33
|
-
# {critique_criteria}
|
|
34
|
-
|
|
35
|
-
# {finish_instruction}
|
|
36
|
-
# """
|
|
37
|
-
|
|
38
|
-
# COT_BOOL_CRITIQUE_CRITERIA = """1. Does the output adhere to the required TRUE or FALSE format?
|
|
39
|
-
# 2. Is the reasoning provided logically sound and well-supported by the context?
|
|
40
|
-
# 3. Are there any errors in applying the filter condition to the given context?
|
|
41
|
-
# """
|
|
42
|
-
|
|
43
|
-
# COT_BOOL_IMAGE_CRITIQUE_CRITERIA = """1. Does the output adhere to the required TRUE or FALSE format?
|
|
44
|
-
# 2. Is the reasoning provided logically sound and well-supported by the image(s) and/or text?
|
|
45
|
-
# 3. Are there any errors in applying the filter condition to the given context?
|
|
46
|
-
# """
|
|
47
|
-
|
|
48
|
-
COT_QA_CRITIQUE_CRITERIA = """1. Does the JSON object adhere to the required format? Highlight any structural issues.
|
|
49
|
-
2. Are the values of the output fields accurate based on the provided context? If any fields are incorrect or missing, provide specific examples.
|
|
50
|
-
3. Are there any logical errors in reasoning used to derive the output? Provide detailed feedback on potential mistakes.
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
COT_QA_IMAGE_CRITIQUE_CRITERIA = """1. Does the JSON object adhere to the required format?
|
|
54
|
-
2. Are the values of the output fields accurate based on the provided image(s) and/or text?
|
|
55
|
-
3. Are there any logical errors in the model's reasoning when extracting information from images and text?
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
# COT_BO0L_CRITIQUE_FINISH_INSTRUCTION = """Finish your critique with actionable recommendations for improving the model's reasoning and answer format."""
|
|
59
|
-
COT_QA_CRITIQUE_FINISH_INSTRUCTION = """Finish your critique with actionable recommendations for improving the JSON object."""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
### REFINEMENT PROMPT AND CRITERIA ###
|
|
63
|
-
BASE_REFINEMENT_PROMPT = """You are a helpful assistant tasked with refining the output of a model based on a critique.
|
|
64
|
-
Below is the original user prompt used to generate the output:
|
|
65
|
-
|
|
66
|
-
ORIGINAL PROMPT:
|
|
67
|
-
<<original-prompt-placeholder>>
|
|
68
|
-
|
|
69
|
-
Here is the original output generated by the model:
|
|
70
|
-
|
|
71
|
-
OUTPUT:
|
|
72
|
-
{original_output}
|
|
73
|
-
|
|
74
|
-
Here is the critique of the output:
|
|
75
|
-
|
|
76
|
-
CRITIQUE:
|
|
77
|
-
{critique_output}
|
|
78
|
-
|
|
79
|
-
Your task is to refine the original output to address the critique. Ensure that:
|
|
80
|
-
{refinement_criteria}
|
|
81
|
-
|
|
82
|
-
{finish_instruction}
|
|
83
|
-
"""
|
|
84
|
-
|
|
85
|
-
# BASE_REFINEMENT_PROMPT = """You are a helpful assistant tasked with refining the output of a model based on a critique.
|
|
86
|
-
# Below is the original user prompt used to generate the output:
|
|
87
|
-
|
|
88
|
-
# ORIGINAL PROMPT:
|
|
89
|
-
# {user_prompt}
|
|
90
|
-
|
|
91
|
-
# Here is the output generated by the model:
|
|
92
|
-
|
|
93
|
-
# OUTPUT:
|
|
94
|
-
# {original_output}
|
|
95
|
-
|
|
96
|
-
# Here is the critique of the output:
|
|
97
|
-
|
|
98
|
-
# CRITIQUE:
|
|
99
|
-
# {critique_output}
|
|
100
|
-
|
|
101
|
-
# Your task is to refine the original output to address the critique. Ensure that:
|
|
102
|
-
# {refinement_criteria}
|
|
103
|
-
|
|
104
|
-
# {finish_instruction}
|
|
105
|
-
# """
|
|
106
|
-
|
|
107
|
-
# COT_BOOL_REFINEMENT_CRITERIA = """1. The answer adheres to the required TRUE or FALSE format.
|
|
108
|
-
# 2. The reasoning is logically sound and supported by the given context.
|
|
109
|
-
# 3. The filter condition is correctly applied.
|
|
110
|
-
# """
|
|
111
|
-
|
|
112
|
-
# COT_BOOL_IMAGE_REFINEMENT_CRITERIA = """1. The answer adheres to the required TRUE or FALSE format.
|
|
113
|
-
# 2. The reasoning correctly considers the image(s) and/or text provided in the context.
|
|
114
|
-
# 3. The filter condition is properly applied.
|
|
115
|
-
# """
|
|
116
|
-
|
|
117
|
-
COT_QA_REFINEMENT_CRITERIA = """1. The answer adheres to the required JSON format specified in the original prompt.
|
|
118
|
-
2. Correctly derives all values for the output fields based on the provided context.
|
|
119
|
-
3. Resolves any logical errors identified in the critique.
|
|
120
|
-
"""
|
|
121
|
-
|
|
122
|
-
COT_QA_IMAGE_REFINEMENT_CRITERIA = """1. The answer adheres to the required JSON format specified in the original prompt.
|
|
123
|
-
2. Correctly derives all values for the output fields based on the provided image(s) and/or text.
|
|
124
|
-
3. Resolves any logical errors identified in the critique.
|
|
125
|
-
"""
|
|
126
|
-
|
|
127
|
-
# COT_BO0L_REFINEMENT_FINISH_INSTRUCTION = """Return the improved answer."""
|
|
128
|
-
COT_QA_REFINEMENT_FINISH_INSTRUCTION = """Return the refined JSON object as your final answer."""
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
### PROMPTS FOR MOA + CRITIC; WHICH ARE BEING DEFERRED FOR NOW ###
|
|
132
|
-
# COT_MOA_AGG_BASE_SYSTEM_PROMPT_CRITIQUE = """You are a helpful assistant tasked with reviewing the output of a model based on a given prompt.
|
|
133
|
-
# Below is the original user prompt used to generate the output:
|
|
134
|
-
|
|
135
|
-
# USER PROMPT:
|
|
136
|
-
# {user_prompt}
|
|
137
|
-
|
|
138
|
-
# Here is the synthesized JSON object generated by the model:
|
|
139
|
-
|
|
140
|
-
# OUTPUT:
|
|
141
|
-
# {original_output}
|
|
142
|
-
|
|
143
|
-
# Your task is to critique the output based on the following:
|
|
144
|
-
# 1. Does the JSON object adhere to the required format?
|
|
145
|
-
# 2. Does the synthesis appropriately combine responses from multiple models, resolving conflicts where necessary?
|
|
146
|
-
# 3. Are there any biases, inaccuracies, or missing information in the final output?
|
|
147
|
-
|
|
148
|
-
# Finish your critique with actionable recommendations for improving the synthesized response.
|
|
149
|
-
# """
|
|
150
|
-
|
|
151
|
-
# COT_MOA_AGG_BASE_SYSTEM_PROMPT_REFINEMENT = """You are a helpful assistant tasked with refining the output of a model based on a critique.
|
|
152
|
-
# Below is the original user prompt used to generate the output:
|
|
153
|
-
|
|
154
|
-
# USER PROMPT:
|
|
155
|
-
# {user_prompt}
|
|
156
|
-
|
|
157
|
-
# Here is the original synthesized JSON object generated by the model:
|
|
158
|
-
|
|
159
|
-
# ORIGINAL OUTPUT:
|
|
160
|
-
# {original_output}
|
|
161
|
-
|
|
162
|
-
# Here is the critique of the output:
|
|
163
|
-
|
|
164
|
-
# CRITIQUE:
|
|
165
|
-
# {critique_output}
|
|
166
|
-
|
|
167
|
-
# Your task is to refine the synthesized JSON object based on the critique. Ensure that:
|
|
168
|
-
# 1. The JSON object adheres to the required format.
|
|
169
|
-
# 2. The synthesis properly reconciles different model responses, making informed decisions on conflicts.
|
|
170
|
-
# 3. The final output is accurate, unbiased, and complete.
|
|
171
|
-
|
|
172
|
-
# Return the improved JSON object.
|
|
173
|
-
# """
|
|
174
|
-
|
|
175
|
-
# COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT_CRITIQUE = """You are a helpful assistant tasked with reviewing the output of a model based on a given prompt.
|
|
176
|
-
# Below is the original user prompt used to generate the output:
|
|
177
|
-
|
|
178
|
-
# USER PROMPT:
|
|
179
|
-
# {user_prompt}
|
|
180
|
-
|
|
181
|
-
# Here is the response generated by the model:
|
|
182
|
-
|
|
183
|
-
# OUTPUT:
|
|
184
|
-
# {original_output}
|
|
185
|
-
|
|
186
|
-
# Your task is to critique the output based on the following:
|
|
187
|
-
# 1. Is the response well-structured and does it clearly explain each output field?
|
|
188
|
-
# 2. Are all claims supported by the provided context? Identify any instances of hallucination or missing evidence.
|
|
189
|
-
# 3. Does the response cite specific parts of the context when making claims?
|
|
190
|
-
|
|
191
|
-
# Finish your critique with actionable recommendations for improving the response.
|
|
192
|
-
# """
|
|
193
|
-
|
|
194
|
-
# COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT_REFINEMENT = """You are a helpful assistant tasked with refining the output of a model based on a critique.
|
|
195
|
-
# Below is the original user prompt used to generate the output:
|
|
196
|
-
|
|
197
|
-
# USER PROMPT:
|
|
198
|
-
# {user_prompt}
|
|
199
|
-
|
|
200
|
-
# Here is the original response generated by the model:
|
|
201
|
-
|
|
202
|
-
# ORIGINAL OUTPUT:
|
|
203
|
-
# {original_output}
|
|
204
|
-
|
|
205
|
-
# Here is the critique of the output:
|
|
206
|
-
|
|
207
|
-
# CRITIQUE:
|
|
208
|
-
# {critique_output}
|
|
209
|
-
|
|
210
|
-
# Your task is to refine the response based on the critique. Ensure that:
|
|
211
|
-
# 1. The response is well-structured and clearly explains each output field.
|
|
212
|
-
# 2. All claims are directly supported by the provided context.
|
|
213
|
-
# 3. The response explicitly cites relevant parts of the context.
|
|
214
|
-
|
|
215
|
-
# Return the improved response.
|
|
216
|
-
# """
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
"""This file contains prompts for MixtureOfAgentsConvert operations on text inputs."""
|
|
2
|
-
|
|
3
|
-
### BASE PROMPTS ###
|
|
4
|
-
COT_MOA_PROPOSER_BASE_SYSTEM_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
|
|
5
|
-
You will be presented with a context and a set of output fields to generate. Your task is to generate a paragraph or two which describes what you believe is the correct value for each output field.
|
|
6
|
-
Be sure to cite information from the context as evidence of why your answers are correct. Do not hallucinate evidence.
|
|
7
|
-
|
|
8
|
-
You will be provided with a description of each input field and each output field.
|
|
9
|
-
|
|
10
|
-
An example is shown below:
|
|
11
|
-
---
|
|
12
|
-
INPUT FIELDS:
|
|
13
|
-
{example_input_fields}
|
|
14
|
-
|
|
15
|
-
OUTPUT FIELDS:
|
|
16
|
-
{example_output_fields}
|
|
17
|
-
|
|
18
|
-
CONTEXT:
|
|
19
|
-
{example_context}{image_disclaimer}
|
|
20
|
-
|
|
21
|
-
Let's think step-by-step in order to answer the question.
|
|
22
|
-
|
|
23
|
-
ANSWER: {example_answer}
|
|
24
|
-
---
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
COT_MOA_PROPOSER_BASE_USER_PROMPT = """You are a helpful assistant whose job is to {job_instruction}.
|
|
28
|
-
You will be presented with a context and a set of output fields to generate. Your task is to generate a paragraph or two which describes what you believe is the correct value for each output field.
|
|
29
|
-
Be sure to cite information from the context as evidence of why your answers are correct. Do not hallucinate evidence.
|
|
30
|
-
{desc_section}
|
|
31
|
-
You will be provided with a description of each input field and each output field.
|
|
32
|
-
---
|
|
33
|
-
INPUT FIELDS:
|
|
34
|
-
{input_fields_desc}
|
|
35
|
-
|
|
36
|
-
OUTPUT FIELDS:
|
|
37
|
-
{output_fields_desc}
|
|
38
|
-
|
|
39
|
-
CONTEXT:
|
|
40
|
-
{context}<<image-placeholder>>
|
|
41
|
-
|
|
42
|
-
Let's think step-by-step in order to answer the question.
|
|
43
|
-
|
|
44
|
-
ANSWER: """
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
### TEMPLATE INPUTS ###
|
|
48
|
-
COT_MOA_PROPOSER_JOB_INSTRUCTION = "produce an answer to a question"
|
|
49
|
-
COT_MOA_PROPOSER_IMAGE_JOB_INSTRUCTION = "analyze input image(s) and/or text in order to produce an answer to a question"
|
|
50
|
-
|
|
51
|
-
COT_MOA_PROPOSER_EXAMPLE_INPUT_FIELDS = """- text: a text passage describing a scientist
|
|
52
|
-
- birthday: the scientist's birthday"""
|
|
53
|
-
COT_MOA_PROPOSER_IMAGE_EXAMPLE_INPUT_FIELDS = """- image: an image of a scene
|
|
54
|
-
- photographer: the photographer of the image"""
|
|
55
|
-
|
|
56
|
-
COT_MOA_PROPOSER_EXAMPLE_OUTPUT_FIELDS = """- name: the name of the scientist
|
|
57
|
-
- birth_year: the year the scientist was born"""
|
|
58
|
-
COT_MOA_PROPOSER_IMAGE_EXAMPLE_OUTPUT_FIELDS = """- dog_in_image: true if a dog is in the image and false otherwise
|
|
59
|
-
- person_in_image: true if a person is in the image and false otherwise"""
|
|
60
|
-
|
|
61
|
-
COT_MOA_PROPOSER_EXAMPLE_CONTEXT = """{{
|
|
62
|
-
"text": "Augusta Ada King, Countess of Lovelace, also known as Ada Lovelace, was an English mathematician and writer chiefly known for her work on Charles Babbage's proposed mechanical general-purpose computer, the Analytical Engine. She was the first to recognise that the machine had applications beyond pure calculation.",
|
|
63
|
-
"birthday": "December 10, 1815"
|
|
64
|
-
}}"""
|
|
65
|
-
COT_MOA_PROPOSER_IMAGE_EXAMPLE_CONTEXT = """{{
|
|
66
|
-
"image": <bytes>,
|
|
67
|
-
"photographer": "CameraEnthusiast1"
|
|
68
|
-
}}"""
|
|
69
|
-
|
|
70
|
-
COT_MOA_PROPOSER_IMAGE_DISCLAIMER = """
|
|
71
|
-
\n<image content provided here; assume in this example the image shows a dog and a cat playing>
|
|
72
|
-
"""
|
|
73
|
-
|
|
74
|
-
COT_MOA_PROPOSER_EXAMPLE_ANSWER = """the text passage mentions the scientist's name as "Augusta Ada King, Countess of Lovelace, also known as Ada Lovelace" and the scientist's birthday as "December 10, 1815". Therefore, the name of the scientist is "Augusta Ada King" and the birth year is 1815."""
|
|
75
|
-
COT_MOA_PROPOSER_IMAGE_EXAMPLE_ANSWER = """The image shows a dog playing with a cat, so there is a dog in the image. There is no person in the image."""
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"""This file contains utility phrases which are templated into many of our prompts."""
|
|
2
|
-
|
|
3
|
-
### FORMATTING INSTRUCTIONS ###
|
|
4
|
-
ONE_TO_ONE_OUTPUT_FORMAT_INSTRUCTION = "Remember, your answer must be a valid JSON dictionary. The dictionary should only have the specified output fields."
|
|
5
|
-
ONE_TO_MANY_OUTPUT_FORMAT_INSTRUCTION = "Remember, your answer must be a valid JSON list of dictionaries. The list may contain one or more dictionaries, and each dictionary should only have the specified output fields."
|
|
6
|
-
|
|
7
|
-
### REASONING INSTRUCTION FOR IMAGE PROMPTS ###
|
|
8
|
-
COT_REASONING_INSTRUCTION = """Let's think step-by-step in order to answer the question.
|
|
9
|
-
|
|
10
|
-
REASONING: """
|
|
11
|
-
|
|
12
|
-
COT_ANSWER_INSTRUCTION = """Let's think step-by-step in order to answer the question.
|
|
13
|
-
|
|
14
|
-
ANSWER: """
|
|
15
|
-
|
|
16
|
-
DESC_SECTION = """
|
|
17
|
-
The user has additionally provided you with this description of the task you need to perform:
|
|
18
|
-
{desc}
|
|
19
|
-
"""
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import Any
|
|
4
|
-
|
|
5
|
-
from pydantic.fields import FieldInfo
|
|
6
|
-
|
|
7
|
-
from palimpzest.constants import MODEL_CARDS, Model, PromptStrategy
|
|
8
|
-
from palimpzest.core.elements.records import DataRecord
|
|
9
|
-
from palimpzest.core.models import GenerationStats, OperatorCostEstimates
|
|
10
|
-
from palimpzest.query.generators.generators import Generator
|
|
11
|
-
from palimpzest.query.operators.convert import LLMConvert
|
|
12
|
-
|
|
13
|
-
# TYPE DEFINITIONS
|
|
14
|
-
FieldName = str
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class CriticAndRefineConvert(LLMConvert):
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
critic_model: Model,
|
|
22
|
-
refine_model: Model,
|
|
23
|
-
*args,
|
|
24
|
-
**kwargs,
|
|
25
|
-
):
|
|
26
|
-
super().__init__(*args, **kwargs)
|
|
27
|
-
self.critic_model = critic_model
|
|
28
|
-
self.refine_model = refine_model
|
|
29
|
-
|
|
30
|
-
if self.prompt_strategy == PromptStrategy.COT_QA:
|
|
31
|
-
self.critic_prompt_strategy = PromptStrategy.COT_QA_CRITIC
|
|
32
|
-
self.refinement_prompt_strategy = PromptStrategy.COT_QA_REFINE
|
|
33
|
-
elif self.prompt_strategy == PromptStrategy.COT_QA_IMAGE:
|
|
34
|
-
self.critic_prompt_strategy = PromptStrategy.COT_QA_IMAGE_CRITIC
|
|
35
|
-
self.refinement_prompt_strategy = PromptStrategy.COT_QA_IMAGE_REFINE
|
|
36
|
-
else:
|
|
37
|
-
raise ValueError(f"Unsupported prompt strategy: {self.prompt_strategy}")
|
|
38
|
-
|
|
39
|
-
# create generators
|
|
40
|
-
self.critic_generator = Generator(self.critic_model, self.critic_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
|
|
41
|
-
self.refine_generator = Generator(self.refine_model, self.refinement_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
|
|
42
|
-
|
|
43
|
-
def __str__(self):
|
|
44
|
-
op = super().__str__()
|
|
45
|
-
op += f" Critic Model: {self.critic_model}\n"
|
|
46
|
-
op += f" Critic Prompt Strategy: {self.critic_prompt_strategy}\n"
|
|
47
|
-
op += f" Refine Model: {self.refine_model}\n"
|
|
48
|
-
op += f" Refinement Prompt Strategy: {self.refinement_prompt_strategy}\n"
|
|
49
|
-
return op
|
|
50
|
-
|
|
51
|
-
def get_id_params(self):
|
|
52
|
-
id_params = super().get_id_params()
|
|
53
|
-
id_params = {
|
|
54
|
-
"critic_model": self.critic_model.value,
|
|
55
|
-
"refine_model": self.refine_model.value,
|
|
56
|
-
**id_params,
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
return id_params
|
|
60
|
-
|
|
61
|
-
def get_op_params(self):
|
|
62
|
-
op_params = super().get_op_params()
|
|
63
|
-
op_params = {
|
|
64
|
-
"critic_model": self.critic_model,
|
|
65
|
-
"refine_model": self.refine_model,
|
|
66
|
-
**op_params,
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
return op_params
|
|
70
|
-
|
|
71
|
-
def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates) -> OperatorCostEstimates:
|
|
72
|
-
"""
|
|
73
|
-
Currently, we are invoking `self.model`, then critiquing its output with `self.critic_model`, and
|
|
74
|
-
finally refining the output with `self.refine_model`. Thus, we roughly expect to incur the cost
|
|
75
|
-
and time of three LLMConverts. In practice, this naive quality estimate will be overwritten by the
|
|
76
|
-
CostModel's estimate once it executes a few instances of the operator.
|
|
77
|
-
"""
|
|
78
|
-
# get naive cost estimates for first LLM call and multiply by 3 for now;
|
|
79
|
-
# of course we should sum individual estimates for each model, but this is a rough estimate
|
|
80
|
-
# and in practice we will need to revamp our naive cost estimates in the near future
|
|
81
|
-
naive_op_cost_estimates = 3 * super().naive_cost_estimates(source_op_cost_estimates)
|
|
82
|
-
|
|
83
|
-
# for naive setting, estimate quality as quality of refine model
|
|
84
|
-
model_quality = MODEL_CARDS[self.refine_model.value]["overall"] / 100.0
|
|
85
|
-
naive_op_cost_estimates.quality = model_quality
|
|
86
|
-
naive_op_cost_estimates.quality_lower_bound = naive_op_cost_estimates.quality
|
|
87
|
-
naive_op_cost_estimates.quality_upper_bound = naive_op_cost_estimates.quality
|
|
88
|
-
|
|
89
|
-
return naive_op_cost_estimates
|
|
90
|
-
|
|
91
|
-
def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo]) -> tuple[dict[FieldName, list[Any]], GenerationStats]:
|
|
92
|
-
# get input fields
|
|
93
|
-
input_fields = self.get_input_fields()
|
|
94
|
-
|
|
95
|
-
# NOTE: when I merge in the `abacus` branch, I will want to update this to reflect the changes I made to reasoning extraction
|
|
96
|
-
# execute the initial model
|
|
97
|
-
original_gen_kwargs = {"project_cols": input_fields, "output_schema": self.output_schema}
|
|
98
|
-
field_answers, reasoning, original_gen_stats, original_messages = self.generator(candidate, fields, **original_gen_kwargs)
|
|
99
|
-
original_output = f"REASONING: {reasoning}\nANSWER: {field_answers}\n"
|
|
100
|
-
|
|
101
|
-
# execute the critic model
|
|
102
|
-
critic_gen_kwargs = {"original_output": original_output, "original_messages": original_messages, **original_gen_kwargs}
|
|
103
|
-
_, reasoning, critic_gen_stats, _ = self.critic_generator(candidate, fields, json_output=False, **critic_gen_kwargs)
|
|
104
|
-
critique_output = f"CRITIQUE: {reasoning}\n"
|
|
105
|
-
|
|
106
|
-
# execute the refinement model
|
|
107
|
-
refine_gen_kwargs = {"critique_output": critique_output, **critic_gen_kwargs}
|
|
108
|
-
field_answers, reasoning, refine_gen_stats, _ = self.refine_generator(candidate, fields, **refine_gen_kwargs)
|
|
109
|
-
|
|
110
|
-
# compute the total generation stats
|
|
111
|
-
generation_stats = original_gen_stats + critic_gen_stats + refine_gen_stats
|
|
112
|
-
|
|
113
|
-
return field_answers, generation_stats
|
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from pydantic.fields import FieldInfo
|
|
4
|
-
|
|
5
|
-
from palimpzest.constants import MODEL_CARDS, Model, PromptStrategy
|
|
6
|
-
from palimpzest.core.elements.records import DataRecord
|
|
7
|
-
from palimpzest.core.models import GenerationStats, OperatorCostEstimates
|
|
8
|
-
from palimpzest.query.generators.generators import Generator
|
|
9
|
-
from palimpzest.query.operators.convert import LLMConvert
|
|
10
|
-
|
|
11
|
-
# TYPE DEFINITIONS
|
|
12
|
-
FieldName = str
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class MixtureOfAgentsConvert(LLMConvert):
|
|
16
|
-
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
proposer_models: list[Model],
|
|
20
|
-
temperatures: list[float],
|
|
21
|
-
aggregator_model: Model,
|
|
22
|
-
proposer_prompt_strategy: PromptStrategy = PromptStrategy.COT_MOA_PROPOSER,
|
|
23
|
-
aggregator_prompt_strategy: PromptStrategy = PromptStrategy.COT_MOA_AGG,
|
|
24
|
-
*args,
|
|
25
|
-
**kwargs,
|
|
26
|
-
):
|
|
27
|
-
kwargs["model"] = None
|
|
28
|
-
kwargs["prompt_strategy"] = None
|
|
29
|
-
super().__init__(*args, **kwargs)
|
|
30
|
-
sorted_proposers, sorted_temps = zip(*[(m, t) for m, t in sorted(zip(proposer_models, temperatures), key=lambda pair: pair[0])])
|
|
31
|
-
self.proposer_models = list(sorted_proposers)
|
|
32
|
-
self.temperatures = list(sorted_temps)
|
|
33
|
-
self.aggregator_model = aggregator_model
|
|
34
|
-
self.proposer_prompt_strategy = proposer_prompt_strategy
|
|
35
|
-
self.aggregator_prompt_strategy = aggregator_prompt_strategy
|
|
36
|
-
|
|
37
|
-
# create generators
|
|
38
|
-
self.proposer_generators = [
|
|
39
|
-
Generator(model, self.proposer_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
|
|
40
|
-
for model in proposer_models
|
|
41
|
-
]
|
|
42
|
-
self.aggregator_generator = Generator(aggregator_model, self.aggregator_prompt_strategy, self.reasoning_effort, self.api_base, self.cardinality, self.desc, self.verbose)
|
|
43
|
-
|
|
44
|
-
def __str__(self):
|
|
45
|
-
op = super().__str__()
|
|
46
|
-
op += f" Proposer Models: {self.proposer_models}\n"
|
|
47
|
-
op += f" Temperatures: {self.temperatures}\n"
|
|
48
|
-
op += f" Aggregator Model: {self.aggregator_model}\n"
|
|
49
|
-
op += f" Proposer Prompt Strategy: {self.proposer_prompt_strategy.value}\n"
|
|
50
|
-
op += f" Aggregator Prompt Strategy: {self.aggregator_prompt_strategy.value}\n"
|
|
51
|
-
return op
|
|
52
|
-
|
|
53
|
-
def get_id_params(self):
|
|
54
|
-
id_params = super().get_id_params()
|
|
55
|
-
id_params = {
|
|
56
|
-
"proposer_models": [model.value for model in self.proposer_models],
|
|
57
|
-
"temperatures": self.temperatures,
|
|
58
|
-
"aggregator_model": self.aggregator_model.value,
|
|
59
|
-
"proposer_prompt_strategy": self.proposer_prompt_strategy.value,
|
|
60
|
-
"aggregator_prompt_strategy": self.aggregator_prompt_strategy.value,
|
|
61
|
-
**id_params,
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
return id_params
|
|
65
|
-
|
|
66
|
-
def get_op_params(self):
|
|
67
|
-
op_params = super().get_op_params()
|
|
68
|
-
op_params = {
|
|
69
|
-
"proposer_models": self.proposer_models,
|
|
70
|
-
"temperatures": self.temperatures,
|
|
71
|
-
"aggregator_model": self.aggregator_model,
|
|
72
|
-
"proposer_prompt_strategy": self.proposer_prompt_strategy,
|
|
73
|
-
"aggregator_prompt_strategy": self.aggregator_prompt_strategy,
|
|
74
|
-
**op_params,
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
return op_params
|
|
78
|
-
|
|
79
|
-
def is_image_conversion(self) -> bool:
|
|
80
|
-
return self.proposer_prompt_strategy.is_image_prompt()
|
|
81
|
-
|
|
82
|
-
def naive_cost_estimates(self, source_op_cost_estimates: OperatorCostEstimates) -> OperatorCostEstimates:
|
|
83
|
-
"""
|
|
84
|
-
Currently, we are using multiple proposer models with different temperatures to synthesize
|
|
85
|
-
answers, which are then aggregated and summarized by a single aggregator model. Thus, we
|
|
86
|
-
roughly expect to incur the cost and time of an LLMConvert * (len(proposer_models) + 1).
|
|
87
|
-
In practice, this naive quality estimate will be overwritten by the CostModel's estimate
|
|
88
|
-
once it executes a few instances of the operator.
|
|
89
|
-
"""
|
|
90
|
-
# temporarily set self.model so that super().naive_cost_estimates(...) can compute an estimate
|
|
91
|
-
self.model = self.proposer_models[0]
|
|
92
|
-
|
|
93
|
-
# get naive cost estimates for single LLM call and scale it by number of LLMs used in MoA
|
|
94
|
-
naive_op_cost_estimates = super().naive_cost_estimates(source_op_cost_estimates)
|
|
95
|
-
naive_op_cost_estimates.time_per_record *= (len(self.proposer_models) + 1)
|
|
96
|
-
naive_op_cost_estimates.time_per_record_lower_bound = naive_op_cost_estimates.time_per_record
|
|
97
|
-
naive_op_cost_estimates.time_per_record_upper_bound = naive_op_cost_estimates.time_per_record
|
|
98
|
-
naive_op_cost_estimates.cost_per_record *= (len(self.proposer_models) + 1)
|
|
99
|
-
naive_op_cost_estimates.cost_per_record_lower_bound = naive_op_cost_estimates.cost_per_record
|
|
100
|
-
naive_op_cost_estimates.cost_per_record_upper_bound = naive_op_cost_estimates.cost_per_record
|
|
101
|
-
|
|
102
|
-
# for naive setting, estimate quality as mean of all model qualities
|
|
103
|
-
model_qualities = [
|
|
104
|
-
MODEL_CARDS[model.value]["overall"] / 100.0
|
|
105
|
-
for model in self.proposer_models + [self.aggregator_model]
|
|
106
|
-
]
|
|
107
|
-
naive_op_cost_estimates.quality = sum(model_qualities)/(len(self.proposer_models) + 1)
|
|
108
|
-
naive_op_cost_estimates.quality_lower_bound = naive_op_cost_estimates.quality
|
|
109
|
-
naive_op_cost_estimates.quality_upper_bound = naive_op_cost_estimates.quality
|
|
110
|
-
|
|
111
|
-
# reset self.model to be None
|
|
112
|
-
self.model = None
|
|
113
|
-
|
|
114
|
-
return naive_op_cost_estimates
|
|
115
|
-
|
|
116
|
-
def convert(self, candidate: DataRecord, fields: dict[str, FieldInfo]) -> tuple[dict[str, list], GenerationStats]:
|
|
117
|
-
# get input fields
|
|
118
|
-
input_fields = self.get_input_fields()
|
|
119
|
-
|
|
120
|
-
# execute generator models in sequence
|
|
121
|
-
proposer_model_final_answers, proposer_model_generation_stats = [], []
|
|
122
|
-
for proposer_generator, temperature in zip(self.proposer_generators, self.temperatures):
|
|
123
|
-
gen_kwargs = {"project_cols": input_fields, "output_schema": self.output_schema, "temperature": temperature}
|
|
124
|
-
_, reasoning, generation_stats, _ = proposer_generator(candidate, fields, json_output=False, **gen_kwargs)
|
|
125
|
-
proposer_text = f"REASONING:{reasoning}\n"
|
|
126
|
-
proposer_model_final_answers.append(proposer_text)
|
|
127
|
-
proposer_model_generation_stats.append(generation_stats)
|
|
128
|
-
|
|
129
|
-
# call the aggregator
|
|
130
|
-
gen_kwargs = {
|
|
131
|
-
"project_cols": input_fields,
|
|
132
|
-
"output_schema": self.output_schema,
|
|
133
|
-
"model_responses": proposer_model_final_answers,
|
|
134
|
-
}
|
|
135
|
-
field_answers, _, aggregator_gen_stats, _ = self.aggregator_generator(candidate, fields, **gen_kwargs)
|
|
136
|
-
|
|
137
|
-
# compute the total generation stats
|
|
138
|
-
generation_stats = sum(proposer_model_generation_stats) + aggregator_gen_stats
|
|
139
|
-
|
|
140
|
-
return field_answers, generation_stats
|