palimpzest 0.6.4__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- palimpzest/__init__.py +5 -0
- palimpzest/constants.py +110 -43
- palimpzest/core/__init__.py +0 -78
- palimpzest/core/data/dataclasses.py +382 -44
- palimpzest/core/elements/filters.py +7 -3
- palimpzest/core/elements/index.py +70 -0
- palimpzest/core/elements/records.py +33 -11
- palimpzest/core/lib/fields.py +1 -0
- palimpzest/core/lib/schemas.py +4 -3
- palimpzest/prompts/moa_proposer_convert_prompts.py +0 -4
- palimpzest/prompts/prompt_factory.py +44 -7
- palimpzest/prompts/split_merge_prompts.py +56 -0
- palimpzest/prompts/split_proposer_prompts.py +55 -0
- palimpzest/query/execution/execution_strategy.py +435 -53
- palimpzest/query/execution/execution_strategy_type.py +20 -0
- palimpzest/query/execution/mab_execution_strategy.py +532 -0
- palimpzest/query/execution/parallel_execution_strategy.py +143 -172
- palimpzest/query/execution/random_sampling_execution_strategy.py +240 -0
- palimpzest/query/execution/single_threaded_execution_strategy.py +173 -203
- palimpzest/query/generators/api_client_factory.py +31 -0
- palimpzest/query/generators/generators.py +256 -76
- palimpzest/query/operators/__init__.py +1 -2
- palimpzest/query/operators/code_synthesis_convert.py +33 -18
- palimpzest/query/operators/convert.py +30 -97
- palimpzest/query/operators/critique_and_refine_convert.py +5 -6
- palimpzest/query/operators/filter.py +7 -10
- palimpzest/query/operators/logical.py +54 -10
- palimpzest/query/operators/map.py +130 -0
- palimpzest/query/operators/mixture_of_agents_convert.py +6 -6
- palimpzest/query/operators/physical.py +3 -12
- palimpzest/query/operators/rag_convert.py +66 -18
- palimpzest/query/operators/retrieve.py +230 -34
- palimpzest/query/operators/scan.py +5 -2
- palimpzest/query/operators/split_convert.py +169 -0
- palimpzest/query/operators/token_reduction_convert.py +8 -14
- palimpzest/query/optimizer/__init__.py +4 -16
- palimpzest/query/optimizer/cost_model.py +73 -266
- palimpzest/query/optimizer/optimizer.py +87 -58
- palimpzest/query/optimizer/optimizer_strategy.py +18 -97
- palimpzest/query/optimizer/optimizer_strategy_type.py +37 -0
- palimpzest/query/optimizer/plan.py +2 -3
- palimpzest/query/optimizer/primitives.py +5 -3
- palimpzest/query/optimizer/rules.py +336 -172
- palimpzest/query/optimizer/tasks.py +30 -100
- palimpzest/query/processor/config.py +38 -22
- palimpzest/query/processor/nosentinel_processor.py +16 -520
- palimpzest/query/processor/processing_strategy_type.py +28 -0
- palimpzest/query/processor/query_processor.py +38 -206
- palimpzest/query/processor/query_processor_factory.py +117 -130
- palimpzest/query/processor/sentinel_processor.py +90 -0
- palimpzest/query/processor/streaming_processor.py +25 -32
- palimpzest/sets.py +88 -41
- palimpzest/utils/model_helpers.py +8 -7
- palimpzest/utils/progress.py +368 -152
- palimpzest/utils/token_reduction_helpers.py +1 -3
- {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/METADATA +19 -9
- palimpzest-0.7.1.dist-info/RECORD +96 -0
- {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/WHEEL +1 -1
- palimpzest/query/processor/mab_sentinel_processor.py +0 -884
- palimpzest/query/processor/random_sampling_sentinel_processor.py +0 -639
- palimpzest/utils/index_helpers.py +0 -6
- palimpzest-0.6.4.dist-info/RECORD +0 -87
- {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info/licenses}/LICENSE +0 -0
- {palimpzest-0.6.4.dist-info → palimpzest-0.7.1.dist-info}/top_level.txt +0 -0
palimpzest/__init__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
1
3
|
from palimpzest.constants import Cardinality
|
|
2
4
|
from palimpzest.core.data.datareaders import DataReader
|
|
3
5
|
from palimpzest.policy import (
|
|
@@ -14,6 +16,9 @@ from palimpzest.policy import (
|
|
|
14
16
|
from palimpzest.query.processor.config import QueryProcessorConfig
|
|
15
17
|
from palimpzest.sets import Dataset
|
|
16
18
|
|
|
19
|
+
# Initialize the root logger
|
|
20
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
21
|
+
|
|
17
22
|
__all__ = [
|
|
18
23
|
# constants
|
|
19
24
|
"Cardinality",
|
palimpzest/constants.py
CHANGED
|
@@ -10,23 +10,36 @@ class Model(str, Enum):
|
|
|
10
10
|
which requires invoking an LLM. It does NOT specify whether the model need be executed
|
|
11
11
|
remotely or locally (if applicable).
|
|
12
12
|
"""
|
|
13
|
-
LLAMA3 = "meta-llama/Llama-3-8b-chat-hf"
|
|
14
|
-
|
|
13
|
+
# LLAMA3 = "meta-llama/Llama-3-8b-chat-hf"
|
|
14
|
+
LLAMA3 = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
|
|
15
|
+
LLAMA3_V = "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo"
|
|
15
16
|
MIXTRAL = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
|
17
|
+
DEEPSEEK = "deepseek-ai/DeepSeek-V3"
|
|
16
18
|
GPT_4o = "gpt-4o-2024-08-06"
|
|
17
19
|
GPT_4o_V = "gpt-4o-2024-08-06"
|
|
18
20
|
GPT_4o_MINI = "gpt-4o-mini-2024-07-18"
|
|
19
21
|
GPT_4o_MINI_V = "gpt-4o-mini-2024-07-18"
|
|
22
|
+
TEXT_EMBEDDING_3_SMALL = "text-embedding-3-small"
|
|
23
|
+
CLIP_VIT_B_32 = "clip-ViT-B-32"
|
|
20
24
|
|
|
21
25
|
def __repr__(self):
|
|
22
26
|
return f"{self.name}"
|
|
23
27
|
|
|
24
28
|
|
|
29
|
+
class APIClient(str, Enum):
|
|
30
|
+
"""
|
|
31
|
+
APIClient describes the API client to be used when invoking an LLM.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
OPENAI = "openai"
|
|
35
|
+
TOGETHER = "together"
|
|
36
|
+
|
|
25
37
|
class PromptStrategy(str, Enum):
|
|
26
38
|
"""
|
|
27
39
|
PromptStrategy describes the prompting technique to be used by a Generator when
|
|
28
40
|
performing some task with a specified Model.
|
|
29
41
|
"""
|
|
42
|
+
|
|
30
43
|
# Chain-of-Thought Boolean Prompt Strategies
|
|
31
44
|
COT_BOOL = "chain-of-thought-bool"
|
|
32
45
|
# COT_BOOL_CRITIC = "chain-of-thought-bool-critic"
|
|
@@ -52,14 +65,18 @@ class PromptStrategy(str, Enum):
|
|
|
52
65
|
COT_MOA_PROPOSER_IMAGE = "chain-of-thought-mixture-of-agents-proposer-image"
|
|
53
66
|
COT_MOA_AGG = "chain-of-thought-mixture-of-agents-aggregation"
|
|
54
67
|
|
|
68
|
+
# Split Convert Prompt Strategies
|
|
69
|
+
SPLIT_PROPOSER = "split-proposer"
|
|
70
|
+
SPLIT_MERGER = "split-merger"
|
|
71
|
+
|
|
55
72
|
def is_image_prompt(self):
|
|
56
73
|
return "image" in self.value
|
|
57
74
|
|
|
58
|
-
def
|
|
59
|
-
return "
|
|
75
|
+
def is_bool_prompt(self):
|
|
76
|
+
return "bool" in self.value
|
|
60
77
|
|
|
61
|
-
def
|
|
62
|
-
return "
|
|
78
|
+
def is_convert_prompt(self):
|
|
79
|
+
return "bool" not in self.value
|
|
63
80
|
|
|
64
81
|
def is_critic_prompt(self):
|
|
65
82
|
return "critic" in self.value
|
|
@@ -73,6 +90,11 @@ class PromptStrategy(str, Enum):
|
|
|
73
90
|
def is_moa_aggregator_prompt(self):
|
|
74
91
|
return "mixture-of-agents-aggregation" in self.value
|
|
75
92
|
|
|
93
|
+
def is_split_proposer_prompt(self):
|
|
94
|
+
return "split-proposer" in self.value
|
|
95
|
+
|
|
96
|
+
def is_split_merger_prompt(self):
|
|
97
|
+
return "split-merger" in self.value
|
|
76
98
|
|
|
77
99
|
class AggFunc(str, Enum):
|
|
78
100
|
COUNT = "count"
|
|
@@ -93,10 +115,12 @@ class Cardinality(str, Enum):
|
|
|
93
115
|
return member
|
|
94
116
|
return cls.ONE_TO_ONE
|
|
95
117
|
|
|
118
|
+
|
|
96
119
|
class PickOutputStrategy(str, Enum):
|
|
97
120
|
CHAMPION = "champion"
|
|
98
121
|
ENSEMBLE = "ensemble"
|
|
99
122
|
|
|
123
|
+
|
|
100
124
|
IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff"]
|
|
101
125
|
PDF_EXTENSIONS = [".pdf"]
|
|
102
126
|
XLS_EXTENSIONS = [".xls", ".xlsx"]
|
|
@@ -111,11 +135,6 @@ DEFAULT_PDF_PROCESSOR = "pypdf"
|
|
|
111
135
|
# character limit for various IDs
|
|
112
136
|
MAX_ID_CHARS = 10
|
|
113
137
|
|
|
114
|
-
# retry LLM executions 2^x * (multiplier) for up to 10 seconds and at most 4 times
|
|
115
|
-
RETRY_MULTIPLIER = 2
|
|
116
|
-
RETRY_MAX_SECS = 10
|
|
117
|
-
RETRY_MAX_ATTEMPTS = 1
|
|
118
|
-
|
|
119
138
|
# maximum number of rows to display in a table
|
|
120
139
|
MAX_ROWS = 5
|
|
121
140
|
|
|
@@ -201,25 +220,45 @@ LOG_LLM_OUTPUT = False
|
|
|
201
220
|
# values more precisely:
|
|
202
221
|
# - https://artificialanalysis.ai/models/llama-3-1-instruct-8b
|
|
203
222
|
#
|
|
204
|
-
LLAMA3_8B_MODEL_CARD = {
|
|
223
|
+
# LLAMA3_8B_MODEL_CARD = {
|
|
224
|
+
# ##### Cost in USD #####
|
|
225
|
+
# "usd_per_input_token": 0.18 / 1E6,
|
|
226
|
+
# "usd_per_output_token": 0.18 / 1E6,
|
|
227
|
+
# ##### Time #####
|
|
228
|
+
# "seconds_per_output_token": 0.0061,
|
|
229
|
+
# ##### Agg. Benchmark #####
|
|
230
|
+
# "overall": 71.0,
|
|
231
|
+
# ##### Code #####
|
|
232
|
+
# "code": 64.0,
|
|
233
|
+
# }
|
|
234
|
+
LLAMA3_3_70B_INSTRUCT_MODEL_CARD = {
|
|
205
235
|
##### Cost in USD #####
|
|
206
|
-
"usd_per_input_token": 0.
|
|
207
|
-
"usd_per_output_token": 0.
|
|
236
|
+
"usd_per_input_token": 0.88 / 1e6,
|
|
237
|
+
"usd_per_output_token": 0.88 / 1e6,
|
|
208
238
|
##### Time #####
|
|
209
|
-
"seconds_per_output_token": 0.
|
|
239
|
+
"seconds_per_output_token": 0.0139,
|
|
210
240
|
##### Agg. Benchmark #####
|
|
211
|
-
"overall":
|
|
241
|
+
"overall": 86.0,
|
|
212
242
|
##### Code #####
|
|
213
|
-
"code":
|
|
243
|
+
"code": 88.4,
|
|
214
244
|
}
|
|
215
|
-
|
|
245
|
+
# LLAMA3_2_11B_V_MODEL_CARD = {
|
|
246
|
+
# ##### Cost in USD #####
|
|
247
|
+
# "usd_per_input_token": 0.18 / 1E6,
|
|
248
|
+
# "usd_per_output_token": 0.18 / 1E6,
|
|
249
|
+
# ##### Time #####
|
|
250
|
+
# "seconds_per_output_token": 0.0061,
|
|
251
|
+
# ##### Agg. Benchmark #####
|
|
252
|
+
# "overall": 71.0,
|
|
253
|
+
# }
|
|
254
|
+
LLAMA3_2_90B_V_MODEL_CARD = {
|
|
216
255
|
##### Cost in USD #####
|
|
217
|
-
"usd_per_input_token":
|
|
218
|
-
"usd_per_output_token":
|
|
256
|
+
"usd_per_input_token": 1.2 / 1e6,
|
|
257
|
+
"usd_per_output_token": 1.2 / 1e6,
|
|
219
258
|
##### Time #####
|
|
220
|
-
"seconds_per_output_token": 0.
|
|
259
|
+
"seconds_per_output_token": 0.0222,
|
|
221
260
|
##### Agg. Benchmark #####
|
|
222
|
-
"overall":
|
|
261
|
+
"overall": 84.0,
|
|
223
262
|
}
|
|
224
263
|
MIXTRAL_8X_7B_MODEL_CARD = {
|
|
225
264
|
##### Cost in USD #####
|
|
@@ -232,10 +271,21 @@ MIXTRAL_8X_7B_MODEL_CARD = {
|
|
|
232
271
|
##### Code #####
|
|
233
272
|
"code": 40.0,
|
|
234
273
|
}
|
|
274
|
+
DEEPSEEK_V3_MODEL_CARD = {
|
|
275
|
+
##### Cost in USD #####
|
|
276
|
+
"usd_per_input_token": 1.25 / 1E6,
|
|
277
|
+
"usd_per_output_token": 1.25 / 1E6,
|
|
278
|
+
##### Time #####
|
|
279
|
+
"seconds_per_output_token": 0.0769,
|
|
280
|
+
##### Agg. Benchmark #####
|
|
281
|
+
"overall": 87.0,
|
|
282
|
+
##### Code #####
|
|
283
|
+
"code": 92.0,
|
|
284
|
+
}
|
|
235
285
|
GPT_4o_MODEL_CARD = {
|
|
236
286
|
##### Cost in USD #####
|
|
237
|
-
"usd_per_input_token": 2.5 /
|
|
238
|
-
"usd_per_output_token": 10.0 /
|
|
287
|
+
"usd_per_input_token": 2.5 / 1e6,
|
|
288
|
+
"usd_per_output_token": 10.0 / 1e6,
|
|
239
289
|
##### Time #####
|
|
240
290
|
"seconds_per_output_token": 0.0079,
|
|
241
291
|
##### Agg. Benchmark #####
|
|
@@ -246,8 +296,8 @@ GPT_4o_MODEL_CARD = {
|
|
|
246
296
|
GPT_4o_V_MODEL_CARD = {
|
|
247
297
|
# NOTE: it is unclear if the same ($ / token) costs can be applied, or if we have to calculate this ourselves
|
|
248
298
|
##### Cost in USD #####
|
|
249
|
-
"usd_per_input_token": 2.5 /
|
|
250
|
-
"usd_per_output_token": 10.0 /
|
|
299
|
+
"usd_per_input_token": 2.5 / 1e6,
|
|
300
|
+
"usd_per_output_token": 10.0 / 1e6,
|
|
251
301
|
##### Time #####
|
|
252
302
|
"seconds_per_output_token": 0.0079,
|
|
253
303
|
##### Agg. Benchmark #####
|
|
@@ -255,8 +305,8 @@ GPT_4o_V_MODEL_CARD = {
|
|
|
255
305
|
}
|
|
256
306
|
GPT_4o_MINI_MODEL_CARD = {
|
|
257
307
|
##### Cost in USD #####
|
|
258
|
-
"usd_per_input_token": 0.15 /
|
|
259
|
-
"usd_per_output_token": 0.6 /
|
|
308
|
+
"usd_per_input_token": 0.15 / 1e6,
|
|
309
|
+
"usd_per_output_token": 0.6 / 1e6,
|
|
260
310
|
##### Time #####
|
|
261
311
|
"seconds_per_output_token": 0.0098,
|
|
262
312
|
##### Agg. Benchmark #####
|
|
@@ -267,24 +317,44 @@ GPT_4o_MINI_MODEL_CARD = {
|
|
|
267
317
|
GPT_4o_MINI_V_MODEL_CARD = {
|
|
268
318
|
# NOTE: it is unclear if the same ($ / token) costs can be applied, or if we have to calculate this ourselves
|
|
269
319
|
##### Cost in USD #####
|
|
270
|
-
"usd_per_input_token": 0.15 /
|
|
271
|
-
"usd_per_output_token": 0.6 /
|
|
320
|
+
"usd_per_input_token": 0.15 / 1e6,
|
|
321
|
+
"usd_per_output_token": 0.6 / 1e6,
|
|
272
322
|
##### Time #####
|
|
273
323
|
"seconds_per_output_token": 0.0098,
|
|
274
324
|
##### Agg. Benchmark #####
|
|
275
325
|
"overall": 82.0,
|
|
276
326
|
}
|
|
327
|
+
TEXT_EMBEDDING_3_SMALL_MODEL_CARD = {
|
|
328
|
+
##### Cost in USD #####
|
|
329
|
+
"usd_per_input_token": 0.02 / 1e6,
|
|
330
|
+
"usd_per_output_token": None,
|
|
331
|
+
##### Time #####
|
|
332
|
+
"seconds_per_output_token": 0.0098, # NOTE: just copying GPT_4o_MINI_MODEL_CARD for now
|
|
333
|
+
##### Agg. Benchmark #####
|
|
334
|
+
"overall": 82.0, # NOTE: just copying GPT_4o_MINI_MODEL_CARD for now
|
|
335
|
+
}
|
|
336
|
+
CLIP_VIT_B_32_MODEL_CARD = {
|
|
337
|
+
##### Cost in USD #####
|
|
338
|
+
"usd_per_input_token": 0.00,
|
|
339
|
+
"usd_per_output_token": None,
|
|
340
|
+
##### Time #####
|
|
341
|
+
"seconds_per_output_token": 0.0098, # NOTE: just copying TEXT_EMBEDDING_3_SMALL_MODEL_CARD for now
|
|
342
|
+
##### Agg. Benchmark #####
|
|
343
|
+
"overall": 63.3, # NOTE: ImageNet top-1 accuracy
|
|
344
|
+
}
|
|
277
345
|
|
|
278
346
|
|
|
279
347
|
MODEL_CARDS = {
|
|
280
|
-
Model.LLAMA3.value:
|
|
281
|
-
Model.LLAMA3_V.value:
|
|
348
|
+
Model.LLAMA3.value: LLAMA3_3_70B_INSTRUCT_MODEL_CARD,
|
|
349
|
+
Model.LLAMA3_V.value: LLAMA3_2_90B_V_MODEL_CARD,
|
|
350
|
+
Model.DEEPSEEK.value: DEEPSEEK_V3_MODEL_CARD,
|
|
282
351
|
Model.MIXTRAL.value: MIXTRAL_8X_7B_MODEL_CARD,
|
|
283
352
|
Model.GPT_4o.value: GPT_4o_MODEL_CARD,
|
|
284
353
|
Model.GPT_4o_V.value: GPT_4o_V_MODEL_CARD,
|
|
285
354
|
Model.GPT_4o_MINI.value: GPT_4o_MINI_MODEL_CARD,
|
|
286
355
|
Model.GPT_4o_MINI_V.value: GPT_4o_MINI_V_MODEL_CARD,
|
|
287
|
-
|
|
356
|
+
Model.TEXT_EMBEDDING_3_SMALL.value: TEXT_EMBEDDING_3_SMALL_MODEL_CARD,
|
|
357
|
+
Model.CLIP_VIT_B_32.value: CLIP_VIT_B_32_MODEL_CARD,
|
|
288
358
|
###
|
|
289
359
|
# Model.GPT_3_5.value: GPT_3_5_MODEL_CARD,
|
|
290
360
|
# Model.GPT_4.value: GPT_4_MODEL_CARD,
|
|
@@ -294,9 +364,6 @@ MODEL_CARDS = {
|
|
|
294
364
|
}
|
|
295
365
|
|
|
296
366
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
367
|
###### DEPRECATED ######
|
|
301
368
|
# # NOTE: seconds_per_output_token is based on `gpt-3.5-turbo-1106`
|
|
302
369
|
# GPT_3_5_MODEL_CARD = {
|
|
@@ -317,7 +384,7 @@ MODEL_CARDS = {
|
|
|
317
384
|
# ### "DROP": 64.1, # 3-shot
|
|
318
385
|
# ##### Code #####
|
|
319
386
|
# "code": 48.1,
|
|
320
|
-
# ### "HumanEval": 48.1,^ # 0-shot
|
|
387
|
+
# ### "HumanEval": 48.1,^ # 0-shot
|
|
321
388
|
# ##### Math #####
|
|
322
389
|
# "math": 57.1,
|
|
323
390
|
# ### "GSM8K": 57.1,^ # 5-shot
|
|
@@ -364,10 +431,10 @@ MODEL_CARDS = {
|
|
|
364
431
|
|
|
365
432
|
# GEMINI_1_MODEL_CARD = {
|
|
366
433
|
# ##### Cost in USD #####
|
|
367
|
-
# "usd_per_input_token": 125 / 1E8, # Gemini is free but rate limited for now. Pricing will be updated
|
|
434
|
+
# "usd_per_input_token": 125 / 1E8, # Gemini is free but rate limited for now. Pricing will be updated
|
|
368
435
|
# "usd_per_output_token": 375 / 1E9,
|
|
369
436
|
# ##### Time #####
|
|
370
|
-
# "seconds_per_output_token": 0.042 / 10.0, # TODO:
|
|
437
|
+
# "seconds_per_output_token": 0.042 / 10.0, # TODO:
|
|
371
438
|
# ##### Agg. Benchmark #####
|
|
372
439
|
# "overall": 65.0, # 90.0 TODO: we are using the free version of Gemini which is substantially worse than its paid version; I'm manually revising it's quality below that of Mixtral
|
|
373
440
|
# ##### Commonsense Reasoning #####
|
|
@@ -379,7 +446,7 @@ MODEL_CARDS = {
|
|
|
379
446
|
# ##### Code #####
|
|
380
447
|
# "code": 74.4,
|
|
381
448
|
# # "HumanEval": 74.4, # 0-shot (IT)*
|
|
382
|
-
# # "Natural2Code": 74.9, # 0-shot
|
|
449
|
+
# # "Natural2Code": 74.9, # 0-shot
|
|
383
450
|
# ##### Math #####
|
|
384
451
|
# "math": 94.4,
|
|
385
452
|
# # "GSM8K": 94.4, # maj1@32
|
|
@@ -388,10 +455,10 @@ MODEL_CARDS = {
|
|
|
388
455
|
|
|
389
456
|
# GEMINI_1V_MODEL_CARD = {
|
|
390
457
|
# ##### Cost in USD #####
|
|
391
|
-
# "usd_per_input_token": 25 / 1E6, # Gemini is free but rate limited for now. Pricing will be updated
|
|
458
|
+
# "usd_per_input_token": 25 / 1E6, # Gemini is free but rate limited for now. Pricing will be updated
|
|
392
459
|
# "usd_per_output_token": 375 / 1E9,
|
|
393
460
|
# ##### Time #####
|
|
394
|
-
# "seconds_per_output_token": 0.042, # / 10.0, # TODO:
|
|
461
|
+
# "seconds_per_output_token": 0.042, # / 10.0, # TODO:
|
|
395
462
|
# ##### Agg. Benchmark #####
|
|
396
463
|
# "overall": 65.0, # 90.0, TODO: see note above in Gemini_1 model card
|
|
397
464
|
# ##### Commonsense Reasoning #####
|
|
@@ -403,7 +470,7 @@ MODEL_CARDS = {
|
|
|
403
470
|
# ##### Code #####
|
|
404
471
|
# "code": 74.4,
|
|
405
472
|
# # "HumanEval": 74.4, # 0-shot (IT)*
|
|
406
|
-
# # "Natural2Code": 74.9, # 0-shot
|
|
473
|
+
# # "Natural2Code": 74.9, # 0-shot
|
|
407
474
|
# ##### Math #####
|
|
408
475
|
# "math": 94.4,
|
|
409
476
|
# # "GSM8K": 94.4, # maj1@32
|
palimpzest/core/__init__.py
CHANGED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
from palimpzest.core.data.datareaders import (
|
|
2
|
-
DataReader,
|
|
3
|
-
DirectoryReader,
|
|
4
|
-
FileReader,
|
|
5
|
-
HTMLFileDirectoryReader,
|
|
6
|
-
ImageFileDirectoryReader,
|
|
7
|
-
MemoryReader,
|
|
8
|
-
PDFFileDirectoryReader,
|
|
9
|
-
TextFileDirectoryReader,
|
|
10
|
-
XLSFileDirectoryReader,
|
|
11
|
-
)
|
|
12
|
-
from palimpzest.core.elements.records import DataRecord, DataRecordSet
|
|
13
|
-
from palimpzest.core.lib.fields import (
|
|
14
|
-
BooleanField,
|
|
15
|
-
BytesField,
|
|
16
|
-
CallableField,
|
|
17
|
-
Field,
|
|
18
|
-
ListField,
|
|
19
|
-
NumericField,
|
|
20
|
-
StringField,
|
|
21
|
-
)
|
|
22
|
-
from palimpzest.core.lib.schemas import (
|
|
23
|
-
URL,
|
|
24
|
-
Download,
|
|
25
|
-
EquationImage,
|
|
26
|
-
File,
|
|
27
|
-
ImageFile,
|
|
28
|
-
Number,
|
|
29
|
-
OperatorDerivedSchema,
|
|
30
|
-
PDFFile,
|
|
31
|
-
PlotImage,
|
|
32
|
-
RawJSONObject,
|
|
33
|
-
Schema,
|
|
34
|
-
Table,
|
|
35
|
-
TextFile,
|
|
36
|
-
WebPage,
|
|
37
|
-
XLSFile,
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
__all__ = [
|
|
41
|
-
# fields
|
|
42
|
-
"BooleanField",
|
|
43
|
-
"BytesField",
|
|
44
|
-
"CallableField",
|
|
45
|
-
"Field",
|
|
46
|
-
"ListField",
|
|
47
|
-
"NumericField",
|
|
48
|
-
"StringField",
|
|
49
|
-
# schemas
|
|
50
|
-
"URL",
|
|
51
|
-
"Download",
|
|
52
|
-
"EquationImage",
|
|
53
|
-
"File",
|
|
54
|
-
"ImageFile",
|
|
55
|
-
"Number",
|
|
56
|
-
"OperatorDerivedSchema",
|
|
57
|
-
"PDFFile",
|
|
58
|
-
"PlotImage",
|
|
59
|
-
"RawJSONObject",
|
|
60
|
-
"Schema",
|
|
61
|
-
"Table",
|
|
62
|
-
"TextFile",
|
|
63
|
-
"WebPage",
|
|
64
|
-
"XLSFile",
|
|
65
|
-
# datareaders
|
|
66
|
-
"DataReader",
|
|
67
|
-
"DirectoryReader",
|
|
68
|
-
"FileReader",
|
|
69
|
-
"HTMLFileDirectoryReader",
|
|
70
|
-
"ImageFileDirectoryReader",
|
|
71
|
-
"MemoryReader",
|
|
72
|
-
"PDFFileDirectoryReader",
|
|
73
|
-
"TextFileDirectoryReader",
|
|
74
|
-
"XLSFileDirectoryReader",
|
|
75
|
-
# records
|
|
76
|
-
"DataRecord",
|
|
77
|
-
"DataRecordSet",
|
|
78
|
-
]
|