parallex 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parallex/ai/uploader.py +53 -16
- parallex/parallex.py +42 -13
- {parallex-0.2.1.dist-info → parallex-0.3.0.dist-info}/METADATA +1 -1
- {parallex-0.2.1.dist-info → parallex-0.3.0.dist-info}/RECORD +6 -6
- {parallex-0.2.1.dist-info → parallex-0.3.0.dist-info}/LICENSE +0 -0
- {parallex-0.2.1.dist-info → parallex-0.3.0.dist-info}/WHEEL +0 -0
parallex/ai/uploader.py
CHANGED
@@ -9,7 +9,7 @@ from parallex.models.batch_file import BatchFile
|
|
9
9
|
from parallex.models.image_file import ImageFile
|
10
10
|
from parallex.utils.constants import CUSTOM_ID_DELINEATOR
|
11
11
|
|
12
|
-
MAX_FILE_SIZE =
|
12
|
+
MAX_FILE_SIZE = 180 * 1024 * 1024 # 180 MB in bytes. Limit for Azure is 200MB.
|
13
13
|
|
14
14
|
|
15
15
|
async def upload_images_for_processing(
|
@@ -23,22 +23,18 @@ async def upload_images_for_processing(
|
|
23
23
|
current_index = 0
|
24
24
|
batch_files = []
|
25
25
|
upload_file_location = file_in_temp_dir(
|
26
|
-
directory=temp_directory, file_name=f"
|
26
|
+
directory=temp_directory, file_name=f"{trace_id}-{current_index}.jsonl"
|
27
27
|
)
|
28
28
|
|
29
29
|
for image_file in image_files:
|
30
|
-
if (
|
31
|
-
os.path.exists(upload_file_location)
|
32
|
-
and os.path.getsize(upload_file_location) > MAX_FILE_SIZE
|
33
|
-
):
|
30
|
+
if await _approaching_file_size_limit(upload_file_location):
|
34
31
|
"""When approaching upload file limit, upload and start new file"""
|
35
32
|
batch_file = await _create_batch_file(
|
36
33
|
client, trace_id, upload_file_location
|
37
34
|
)
|
38
35
|
batch_files.append(batch_file)
|
39
|
-
|
40
|
-
|
41
|
-
directory=temp_directory, file_name=f"{trace_id}-{current_index}.jsonl"
|
36
|
+
upload_file_location = await _increment_batch_file_index(
|
37
|
+
current_index, temp_directory, trace_id
|
42
38
|
)
|
43
39
|
|
44
40
|
with open(image_file.path, "rb") as image:
|
@@ -57,21 +53,62 @@ async def upload_images_for_processing(
|
|
57
53
|
|
58
54
|
async def upload_prompts_for_processing(
|
59
55
|
client: OpenAIClient, prompts: list[str], temp_directory: str, trace_id: UUID
|
60
|
-
) -> BatchFile:
|
56
|
+
) -> list[BatchFile]:
|
61
57
|
"""Creates jsonl file and uploads for processing"""
|
62
|
-
|
63
|
-
|
58
|
+
current_index = 0
|
59
|
+
batch_files = []
|
60
|
+
|
61
|
+
upload_file_location = await set_file_location(
|
62
|
+
current_index, temp_directory, trace_id
|
64
63
|
)
|
65
64
|
for index, prompt in enumerate(prompts):
|
65
|
+
if await _approaching_file_size_limit(upload_file_location):
|
66
|
+
"""When approaching upload file limit, upload and start new file"""
|
67
|
+
batch_file = await _create_batch_file(
|
68
|
+
client, trace_id, upload_file_location
|
69
|
+
)
|
70
|
+
batch_files.append(batch_file)
|
71
|
+
upload_file_location = await _increment_batch_file_index(
|
72
|
+
current_index, temp_directory, trace_id
|
73
|
+
)
|
74
|
+
|
66
75
|
prompt_custom_id = f"{trace_id}{CUSTOM_ID_DELINEATOR}{index}.jsonl"
|
67
76
|
jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
|
68
77
|
with open(upload_file_location, "a") as jsonl_file:
|
69
78
|
jsonl_file.write(json.dumps(jsonl) + "\n")
|
70
|
-
|
71
|
-
|
79
|
+
batch_file = await _create_batch_file(client, trace_id, upload_file_location)
|
80
|
+
batch_files.append(batch_file)
|
81
|
+
return batch_files
|
82
|
+
|
83
|
+
|
84
|
+
async def set_file_location(
|
85
|
+
current_index: int, temp_directory: str, trace_id: UUID
|
86
|
+
) -> str:
|
87
|
+
return file_in_temp_dir(
|
88
|
+
directory=temp_directory, file_name=f"{trace_id}-{current_index}.jsonl"
|
89
|
+
)
|
90
|
+
|
91
|
+
|
92
|
+
async def _approaching_file_size_limit(upload_file_location: str) -> bool:
|
93
|
+
return (
|
94
|
+
os.path.exists(upload_file_location)
|
95
|
+
and os.path.getsize(upload_file_location) > MAX_FILE_SIZE
|
96
|
+
)
|
97
|
+
|
98
|
+
|
99
|
+
async def _increment_batch_file_index(
|
100
|
+
current_index: int, temp_directory: str, trace_id: UUID
|
101
|
+
) -> str:
|
102
|
+
current_index += 1
|
103
|
+
upload_file_location = await set_file_location(
|
104
|
+
current_index, temp_directory, trace_id
|
105
|
+
)
|
106
|
+
return upload_file_location
|
72
107
|
|
73
108
|
|
74
|
-
async def _create_batch_file(
|
109
|
+
async def _create_batch_file(
|
110
|
+
client: OpenAIClient, trace_id: UUID, upload_file_location: str
|
111
|
+
) -> BatchFile:
|
75
112
|
file_response = await client.upload(upload_file_location)
|
76
113
|
return BatchFile(
|
77
114
|
id=file_response.id,
|
@@ -82,7 +119,7 @@ async def _create_batch_file(client, trace_id, upload_file_location):
|
|
82
119
|
)
|
83
120
|
|
84
121
|
|
85
|
-
def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str):
|
122
|
+
def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str) -> dict:
|
86
123
|
return {
|
87
124
|
"custom_id": prompt_custom_id,
|
88
125
|
"method": "POST",
|
parallex/parallex.py
CHANGED
@@ -55,6 +55,7 @@ async def parallex_simple_prompts(
|
|
55
55
|
prompts: list[str],
|
56
56
|
post_process_callable: Optional[Callable[..., None]] = None,
|
57
57
|
log_level: Optional[str] = "ERROR",
|
58
|
+
concurrency: Optional[int] = 20,
|
58
59
|
) -> ParallexPromptsCallableOutput:
|
59
60
|
setup_logger(log_level)
|
60
61
|
remote_file_handler = RemoteFileHandler()
|
@@ -64,6 +65,7 @@ async def parallex_simple_prompts(
|
|
64
65
|
open_ai_client=open_ai_client,
|
65
66
|
prompts=prompts,
|
66
67
|
post_process_callable=post_process_callable,
|
68
|
+
concurrency=concurrency,
|
67
69
|
)
|
68
70
|
except Exception as e:
|
69
71
|
logger.error(f"Error occurred: {e}")
|
@@ -75,27 +77,54 @@ async def _prompts_execute(
|
|
75
77
|
open_ai_client: OpenAIClient,
|
76
78
|
prompts: list[str],
|
77
79
|
post_process_callable: Optional[Callable[..., None]] = None,
|
80
|
+
concurrency: Optional[int] = 20,
|
78
81
|
):
|
79
82
|
with tempfile.TemporaryDirectory() as temp_directory:
|
80
83
|
trace_id = uuid.uuid4()
|
81
|
-
|
84
|
+
batch_files = await upload_prompts_for_processing(
|
82
85
|
client=open_ai_client,
|
83
86
|
prompts=prompts,
|
84
87
|
temp_directory=temp_directory,
|
85
88
|
trace_id=trace_id,
|
86
89
|
)
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
90
|
+
start_batch_semaphore = asyncio.Semaphore(concurrency)
|
91
|
+
start_batch_tasks = []
|
92
|
+
for file in batch_files:
|
93
|
+
batch_task = asyncio.create_task(
|
94
|
+
_create_batch_jobs(
|
95
|
+
batch_file=file,
|
96
|
+
client=open_ai_client,
|
97
|
+
trace_id=trace_id,
|
98
|
+
semaphore=start_batch_semaphore,
|
99
|
+
)
|
100
|
+
)
|
101
|
+
start_batch_tasks.append(batch_task)
|
102
|
+
batch_jobs = await asyncio.gather(*start_batch_tasks)
|
103
|
+
|
104
|
+
prompt_tasks = []
|
105
|
+
for batch in batch_jobs:
|
106
|
+
logger.info(
|
107
|
+
f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
|
108
|
+
)
|
109
|
+
page_task = asyncio.create_task(
|
110
|
+
await wait_for_batch_completion(client=open_ai_client, batch=batch)
|
111
|
+
)
|
112
|
+
prompt_tasks.append(page_task)
|
113
|
+
|
114
|
+
output_file_ids = await asyncio.gather(*prompt_tasks)
|
115
|
+
|
116
|
+
prompts_output = []
|
117
|
+
for output_file_id in output_file_ids:
|
118
|
+
logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
|
119
|
+
prompts_output.append(
|
120
|
+
await process_prompts_output(
|
121
|
+
client=open_ai_client, output_file_id=output_file_id
|
122
|
+
)
|
123
|
+
)
|
124
|
+
|
125
|
+
flat_prompts = [page for batch in prompts_output for page in batch]
|
126
|
+
|
127
|
+
sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
|
99
128
|
callable_output = ParallexPromptsCallableOutput(
|
100
129
|
original_prompts=prompts,
|
101
130
|
trace_id=trace_id,
|
@@ -2,7 +2,7 @@ parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
|
3
3
|
parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
|
4
4
|
parallex/ai/output_processor.py,sha256=Rwp8dkLo4xsqooeBh3Xv-uGVbJMG1JQkwyxdUoOs2tQ,1800
|
5
|
-
parallex/ai/uploader.py,sha256=
|
5
|
+
parallex/ai/uploader.py,sha256=9GvrzuaQAxqRiYN5dUHWjFeIFXezH0Y7ARnzBkEHbL0,5451
|
6
6
|
parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
|
7
7
|
parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
|
8
8
|
parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
|
@@ -15,10 +15,10 @@ parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv
|
|
15
15
|
parallex/models/prompt_response.py,sha256=LcctuyqwiTHWrZHSahwauMaSBsin5Ws6fQRAzGXTsAA,230
|
16
16
|
parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
|
17
17
|
parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
|
18
|
-
parallex/parallex.py,sha256=
|
18
|
+
parallex/parallex.py,sha256=7YFKnKOkFHoTC7CCHhrXG1JTxprbvw0QkNGOEPYJbvQ,8500
|
19
19
|
parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
|
20
20
|
parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
|
21
|
-
parallex-0.
|
22
|
-
parallex-0.
|
23
|
-
parallex-0.
|
24
|
-
parallex-0.
|
21
|
+
parallex-0.3.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
|
22
|
+
parallex-0.3.0.dist-info/METADATA,sha256=hIIhGrV5PE-E-lkWf-kBE3QBPevKSVRHkw0hUx_iqik,4461
|
23
|
+
parallex-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
24
|
+
parallex-0.3.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|