parallex 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parallex/ai/output_processor.py +12 -6
- parallex/ai/uploader.py +33 -6
- parallex/models/page_response.py +1 -1
- parallex/models/prompt_response.py +1 -1
- parallex/parallex.py +15 -6
- {parallex-0.3.4.dist-info → parallex-0.4.0.dist-info}/METADATA +1 -1
- {parallex-0.3.4.dist-info → parallex-0.4.0.dist-info}/RECORD +9 -9
- {parallex-0.3.4.dist-info → parallex-0.4.0.dist-info}/LICENSE +0 -0
- {parallex-0.3.4.dist-info → parallex-0.4.0.dist-info}/WHEEL +0 -0
parallex/ai/output_processor.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
import json
|
2
|
-
from typing import TypeVar, Callable
|
2
|
+
from typing import TypeVar, Callable, Optional
|
3
|
+
|
4
|
+
from pydantic import BaseModel
|
3
5
|
|
4
6
|
from parallex.ai.open_ai_client import OpenAIClient
|
5
7
|
from parallex.models.page_response import PageResponse
|
@@ -8,11 +10,12 @@ from parallex.utils.constants import CUSTOM_ID_DELINEATOR
|
|
8
10
|
|
9
11
|
|
10
12
|
async def process_images_output(
|
11
|
-
client: OpenAIClient, output_file_id: str
|
13
|
+
client: OpenAIClient, output_file_id: str, model: Optional[type[BaseModel]] = None
|
12
14
|
) -> list[PageResponse]:
|
13
15
|
return await _process_output(
|
14
16
|
client,
|
15
17
|
output_file_id,
|
18
|
+
model,
|
16
19
|
lambda content, identifier: PageResponse(
|
17
20
|
output_content=content, page_number=int(identifier)
|
18
21
|
),
|
@@ -20,12 +23,13 @@ async def process_images_output(
|
|
20
23
|
|
21
24
|
|
22
25
|
async def process_prompts_output(
|
23
|
-
client: OpenAIClient, output_file_id: str
|
26
|
+
client: OpenAIClient, output_file_id: str, model: Optional[type[BaseModel]] = None
|
24
27
|
) -> list[PromptResponse]:
|
25
28
|
"""Gets content from completed Batch to create PromptResponse with LLM answers to given prompts"""
|
26
29
|
return await _process_output(
|
27
30
|
client,
|
28
31
|
output_file_id,
|
32
|
+
model,
|
29
33
|
lambda content, identifier: PromptResponse(
|
30
34
|
output_content=content, prompt_index=int(identifier)
|
31
35
|
),
|
@@ -38,6 +42,7 @@ ResponseType = TypeVar("ResponseType")
|
|
38
42
|
async def _process_output(
|
39
43
|
client: OpenAIClient,
|
40
44
|
output_file_id: str,
|
45
|
+
model: Optional[type[BaseModel]],
|
41
46
|
response_builder: Callable[[str, str], ResponseType],
|
42
47
|
) -> list[ResponseType]:
|
43
48
|
file_response = await client.retrieve_file(output_file_id)
|
@@ -48,9 +53,10 @@ async def _process_output(
|
|
48
53
|
json_response = json.loads(raw_response)
|
49
54
|
custom_id = json_response["custom_id"]
|
50
55
|
identifier = custom_id.split(CUSTOM_ID_DELINEATOR)[1].split(".")[0]
|
51
|
-
output_content = json_response["response"]["body"]["choices"][0]["message"][
|
52
|
-
|
53
|
-
|
56
|
+
output_content = json_response["response"]["body"]["choices"][0]["message"]["content"]
|
57
|
+
if model:
|
58
|
+
json_data = json.loads(output_content)
|
59
|
+
output_content = model(**json_data)
|
54
60
|
response = response_builder(output_content, identifier)
|
55
61
|
responses.append(response)
|
56
62
|
|
parallex/ai/uploader.py
CHANGED
@@ -1,8 +1,12 @@
|
|
1
1
|
import base64
|
2
2
|
import json
|
3
3
|
import os
|
4
|
+
from typing import Optional
|
4
5
|
from uuid import UUID
|
5
6
|
|
7
|
+
from openai.lib._pydantic import to_strict_json_schema
|
8
|
+
from pydantic import BaseModel
|
9
|
+
|
6
10
|
from parallex.ai.open_ai_client import OpenAIClient
|
7
11
|
from parallex.file_management.utils import file_in_temp_dir
|
8
12
|
from parallex.models.batch_file import BatchFile
|
@@ -17,6 +21,7 @@ async def upload_images_for_processing(
|
|
17
21
|
image_files: list[ImageFile],
|
18
22
|
temp_directory: str,
|
19
23
|
prompt_text: str,
|
24
|
+
model: Optional[type[BaseModel]] = None,
|
20
25
|
) -> list[BatchFile]:
|
21
26
|
"""Base64 encodes image, converts to expected jsonl format and uploads"""
|
22
27
|
trace_id = image_files[0].trace_id
|
@@ -43,7 +48,7 @@ async def upload_images_for_processing(
|
|
43
48
|
prompt_custom_id = (
|
44
49
|
f"{image_file.trace_id}{CUSTOM_ID_DELINEATOR}{image_file.page_number}.jsonl"
|
45
50
|
)
|
46
|
-
jsonl = _image_jsonl_format(prompt_custom_id, base64_encoded_image, prompt_text)
|
51
|
+
jsonl = _image_jsonl_format(prompt_custom_id, base64_encoded_image, prompt_text, model)
|
47
52
|
with open(upload_file_location, "a") as jsonl_file:
|
48
53
|
jsonl_file.write(json.dumps(jsonl) + "\n")
|
49
54
|
batch_file = await _create_batch_file(client, trace_id, upload_file_location)
|
@@ -52,7 +57,10 @@ async def upload_images_for_processing(
|
|
52
57
|
|
53
58
|
|
54
59
|
async def upload_prompts_for_processing(
|
55
|
-
client: OpenAIClient,
|
60
|
+
client: OpenAIClient,
|
61
|
+
prompts: list[str], temp_directory: str,
|
62
|
+
trace_id: UUID,
|
63
|
+
model: Optional[type[BaseModel]] = None
|
56
64
|
) -> list[BatchFile]:
|
57
65
|
"""Creates jsonl file and uploads for processing"""
|
58
66
|
current_index = 0
|
@@ -73,7 +81,7 @@ async def upload_prompts_for_processing(
|
|
73
81
|
)
|
74
82
|
|
75
83
|
prompt_custom_id = f"{trace_id}{CUSTOM_ID_DELINEATOR}{index}.jsonl"
|
76
|
-
jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
|
84
|
+
jsonl = _simple_jsonl_format(prompt_custom_id, prompt, model)
|
77
85
|
with open(upload_file_location, "a") as jsonl_file:
|
78
86
|
jsonl_file.write(json.dumps(jsonl) + "\n")
|
79
87
|
batch_file = await _create_batch_file(client, trace_id, upload_file_location)
|
@@ -119,8 +127,20 @@ async def _create_batch_file(
|
|
119
127
|
)
|
120
128
|
|
121
129
|
|
122
|
-
def
|
130
|
+
def _response_format(model: type[BaseModel]) -> dict:
|
131
|
+
schema = to_strict_json_schema(model)
|
123
132
|
return {
|
133
|
+
"type": "json_schema",
|
134
|
+
"json_schema": {
|
135
|
+
"name": model.__name__,
|
136
|
+
"strict": True,
|
137
|
+
"schema": schema
|
138
|
+
}
|
139
|
+
}
|
140
|
+
|
141
|
+
|
142
|
+
def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str, model: Optional[type[BaseModel]]) -> dict:
|
143
|
+
payload = {
|
124
144
|
"custom_id": prompt_custom_id,
|
125
145
|
"method": "POST",
|
126
146
|
"url": "/chat/completions",
|
@@ -130,10 +150,13 @@ def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str) -> dict:
|
|
130
150
|
"temperature": 0.0, # TODO make configurable
|
131
151
|
},
|
132
152
|
}
|
153
|
+
if model is not None:
|
154
|
+
payload["body"]["response_format"] = _response_format(model)
|
155
|
+
return payload
|
133
156
|
|
134
157
|
|
135
|
-
def _image_jsonl_format(prompt_custom_id: str, encoded_image: str, prompt_text: str):
|
136
|
-
|
158
|
+
def _image_jsonl_format(prompt_custom_id: str, encoded_image: str, prompt_text: str, model: Optional[type[BaseModel]] = None) -> dict:
|
159
|
+
payload = {
|
137
160
|
"custom_id": prompt_custom_id,
|
138
161
|
"method": "POST",
|
139
162
|
"url": "/chat/completions",
|
@@ -154,5 +177,9 @@ def _image_jsonl_format(prompt_custom_id: str, encoded_image: str, prompt_text:
|
|
154
177
|
}
|
155
178
|
],
|
156
179
|
"max_tokens": 2000,
|
180
|
+
"response_format": {"type": "json_object"}
|
157
181
|
},
|
158
182
|
}
|
183
|
+
if model is not None:
|
184
|
+
payload["body"]["response_format"] = _response_format(model)
|
185
|
+
return payload
|
parallex/models/page_response.py
CHANGED
@@ -2,5 +2,5 @@ from pydantic import BaseModel, Field
|
|
2
2
|
|
3
3
|
|
4
4
|
class PageResponse(BaseModel):
|
5
|
-
output_content: str = Field(description="Markdown generated for the page")
|
5
|
+
output_content: str | BaseModel = Field(description="Markdown generated for the page")
|
6
6
|
page_number: int = Field(description="Page number of the associated PDF")
|
@@ -2,5 +2,5 @@ from pydantic import BaseModel, Field
|
|
2
2
|
|
3
3
|
|
4
4
|
class PromptResponse(BaseModel):
|
5
|
-
output_content: str = Field(description="Response from the model")
|
5
|
+
output_content: str | BaseModel = Field(description="Response from the model")
|
6
6
|
prompt_index: int = Field(description="Index corresponding to the given prompts")
|
parallex/parallex.py
CHANGED
@@ -4,6 +4,8 @@ import uuid
|
|
4
4
|
from typing import Callable, Optional
|
5
5
|
from uuid import UUID
|
6
6
|
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
7
9
|
from parallex.ai.batch_processor import wait_for_batch_completion, create_batch
|
8
10
|
from parallex.ai.open_ai_client import OpenAIClient
|
9
11
|
from parallex.ai.output_processor import process_images_output, process_prompts_output
|
@@ -32,6 +34,7 @@ async def parallex(
|
|
32
34
|
concurrency: Optional[int] = 20,
|
33
35
|
prompt_text: Optional[str] = DEFAULT_PROMPT,
|
34
36
|
log_level: Optional[str] = "ERROR",
|
37
|
+
response_model: Optional[type[BaseModel]] = None,
|
35
38
|
) -> ParallexCallableOutput:
|
36
39
|
setup_logger(log_level)
|
37
40
|
remote_file_handler = RemoteFileHandler()
|
@@ -43,6 +46,7 @@ async def parallex(
|
|
43
46
|
post_process_callable=post_process_callable,
|
44
47
|
concurrency=concurrency,
|
45
48
|
prompt_text=prompt_text,
|
49
|
+
model=response_model
|
46
50
|
)
|
47
51
|
except Exception as e:
|
48
52
|
logger.error(f"Error occurred: {e}")
|
@@ -57,6 +61,7 @@ async def parallex_simple_prompts(
|
|
57
61
|
post_process_callable: Optional[Callable[..., None]] = None,
|
58
62
|
log_level: Optional[str] = "ERROR",
|
59
63
|
concurrency: Optional[int] = 20,
|
64
|
+
response_model: Optional[type[BaseModel]] = None,
|
60
65
|
) -> ParallexPromptsCallableOutput:
|
61
66
|
setup_logger(log_level)
|
62
67
|
remote_file_handler = RemoteFileHandler()
|
@@ -67,6 +72,7 @@ async def parallex_simple_prompts(
|
|
67
72
|
prompts=prompts,
|
68
73
|
post_process_callable=post_process_callable,
|
69
74
|
concurrency=concurrency,
|
75
|
+
model=response_model,
|
70
76
|
)
|
71
77
|
except Exception as e:
|
72
78
|
logger.error(f"Error occurred: {e}")
|
@@ -80,6 +86,7 @@ async def _prompts_execute(
|
|
80
86
|
prompts: list[str],
|
81
87
|
post_process_callable: Optional[Callable[..., None]] = None,
|
82
88
|
concurrency: Optional[int] = 20,
|
89
|
+
model: Optional[type[BaseModel]] = None,
|
83
90
|
):
|
84
91
|
with tempfile.TemporaryDirectory() as temp_directory:
|
85
92
|
trace_id = uuid.uuid4()
|
@@ -88,6 +95,7 @@ async def _prompts_execute(
|
|
88
95
|
prompts=prompts,
|
89
96
|
temp_directory=temp_directory,
|
90
97
|
trace_id=trace_id,
|
98
|
+
model=model,
|
91
99
|
)
|
92
100
|
start_batch_semaphore = asyncio.Semaphore(concurrency)
|
93
101
|
start_batch_tasks = []
|
@@ -110,7 +118,7 @@ async def _prompts_execute(
|
|
110
118
|
f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
|
111
119
|
)
|
112
120
|
prompt_task = asyncio.create_task(
|
113
|
-
_wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
|
121
|
+
_wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore, model=model)
|
114
122
|
)
|
115
123
|
prompt_tasks.append(prompt_task)
|
116
124
|
prompt_response_groups = await asyncio.gather(*prompt_tasks)
|
@@ -134,6 +142,7 @@ async def _execute(
|
|
134
142
|
post_process_callable: Optional[Callable[..., None]] = None,
|
135
143
|
concurrency: Optional[int] = 20,
|
136
144
|
prompt_text: Optional[str] = DEFAULT_PROMPT,
|
145
|
+
model: Optional[type[BaseModel]] = None,
|
137
146
|
) -> ParallexCallableOutput:
|
138
147
|
with tempfile.TemporaryDirectory() as temp_directory:
|
139
148
|
raw_file = await add_file_to_temp_directory(
|
@@ -169,7 +178,7 @@ async def _execute(
|
|
169
178
|
for batch in batch_jobs:
|
170
179
|
page_task = asyncio.create_task(
|
171
180
|
_wait_and_create_pages(
|
172
|
-
batch=batch, client=open_ai_client, semaphore=process_semaphore
|
181
|
+
batch=batch, client=open_ai_client, semaphore=process_semaphore, model=model
|
173
182
|
)
|
174
183
|
)
|
175
184
|
pages_tasks.append(page_task)
|
@@ -192,27 +201,27 @@ async def _execute(
|
|
192
201
|
|
193
202
|
|
194
203
|
async def _wait_and_create_pages(
|
195
|
-
batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
|
204
|
+
batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore, model: Optional[type[BaseModel]] = None
|
196
205
|
):
|
197
206
|
async with semaphore:
|
198
207
|
logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
|
199
208
|
output_file_id = await wait_for_batch_completion(client=client, batch=batch)
|
200
209
|
logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
|
201
210
|
page_responses = await process_images_output(
|
202
|
-
client=client, output_file_id=output_file_id
|
211
|
+
client=client, output_file_id=output_file_id, model=model,
|
203
212
|
)
|
204
213
|
return page_responses
|
205
214
|
|
206
215
|
|
207
216
|
async def _wait_and_create_prompt_responses(
|
208
|
-
batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
|
217
|
+
batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore, model: Optional[type[BaseModel]] = None
|
209
218
|
):
|
210
219
|
async with semaphore:
|
211
220
|
logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
|
212
221
|
output_file_id = await wait_for_batch_completion(client=client, batch=batch)
|
213
222
|
logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
|
214
223
|
prompt_responses = await process_prompts_output(
|
215
|
-
client=client, output_file_id=output_file_id
|
224
|
+
client=client, output_file_id=output_file_id, model=model,
|
216
225
|
)
|
217
226
|
return prompt_responses
|
218
227
|
|
@@ -1,24 +1,24 @@
|
|
1
1
|
parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
2
|
parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
|
3
3
|
parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
|
4
|
-
parallex/ai/output_processor.py,sha256=
|
5
|
-
parallex/ai/uploader.py,sha256=
|
4
|
+
parallex/ai/output_processor.py,sha256=kd50DwB2txhzz4_MPYl97bPOtLMl0KV2UP_eFmUtq34,2087
|
5
|
+
parallex/ai/uploader.py,sha256=FKleSK8GWextqpUUAthvTtxGHSwN-aYF127t1YmGOcw,6375
|
6
6
|
parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
|
7
7
|
parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
|
8
8
|
parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
|
9
9
|
parallex/file_management/utils.py,sha256=WMdXd9UOFbJDHnL2IWfDXyyD2jhwnGtpCVI_npiSlIk,98
|
10
10
|
parallex/models/batch_file.py,sha256=JwARFB48sMOTN-wf7J5YbsWIac2rxXnZ4fBABFESA0M,405
|
11
11
|
parallex/models/image_file.py,sha256=LjQne2b6rIDWpQpdYT41KXNDWpg5kv9bkM1SCx6jnAI,402
|
12
|
-
parallex/models/page_response.py,sha256=
|
12
|
+
parallex/models/page_response.py,sha256=uqVdHXoEWX3NVvr0Y2_izSA1cpw3EXFZRe1HmI4ypLk,240
|
13
13
|
parallex/models/parallex_callable_output.py,sha256=CkJKA8mwsc5olNnG1K6nrWUu4xTkJvp8bp3SSPQEX5c,465
|
14
14
|
parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv2-9jMFXqn4LFBbShPzxoc4,421
|
15
|
-
parallex/models/prompt_response.py,sha256=
|
15
|
+
parallex/models/prompt_response.py,sha256=2Zmnwlj8Ou2VgEHmi1VZrlnv5XRzw5VLMEkpQ1VelQQ,242
|
16
16
|
parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
|
17
17
|
parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
|
18
|
-
parallex/parallex.py,sha256=
|
18
|
+
parallex/parallex.py,sha256=JogDmjB-HdsauCis6hyfSBF_tQi2IdmXfltK72roi28,9322
|
19
19
|
parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
|
20
20
|
parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
|
21
|
-
parallex-0.
|
22
|
-
parallex-0.
|
23
|
-
parallex-0.
|
24
|
-
parallex-0.
|
21
|
+
parallex-0.4.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
|
22
|
+
parallex-0.4.0.dist-info/METADATA,sha256=Hdq1xbDWVVPhR-61O88E9Glv7rn3LzKfz72--rzJovo,4461
|
23
|
+
parallex-0.4.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
24
|
+
parallex-0.4.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|