parallex 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -18,6 +18,7 @@ class OpenAIClient:
18
18
  azure_endpoint=os.getenv("AZURE_API_BASE"),
19
19
  api_key=os.getenv("AZURE_API_KEY"),
20
20
  api_version=os.getenv("AZURE_API_VERSION"),
21
+ timeout=3000,
21
22
  )
22
23
 
23
24
  async def upload(self, file_path: str) -> FileObject:
parallex/ai/uploader.py CHANGED
@@ -76,8 +76,8 @@ async def upload_prompts_for_processing(
76
76
  jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
77
77
  with open(upload_file_location, "a") as jsonl_file:
78
78
  jsonl_file.write(json.dumps(jsonl) + "\n")
79
- batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
- batch_files.append(batch_file)
79
+ batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
+ batch_files.append(batch_file)
81
81
  return batch_files
82
82
 
83
83
 
parallex/parallex.py CHANGED
@@ -46,6 +46,7 @@ async def parallex(
46
46
  )
47
47
  except Exception as e:
48
48
  logger.error(f"Error occurred: {e}")
49
+ raise e
49
50
  finally:
50
51
  await _delete_associated_files(open_ai_client, remote_file_handler)
51
52
 
@@ -69,6 +70,7 @@ async def parallex_simple_prompts(
69
70
  )
70
71
  except Exception as e:
71
72
  logger.error(f"Error occurred: {e}")
73
+ raise e
72
74
  finally:
73
75
  await _delete_associated_files(open_ai_client, remote_file_handler)
74
76
 
@@ -101,30 +103,21 @@ async def _prompts_execute(
101
103
  start_batch_tasks.append(batch_task)
102
104
  batch_jobs = await asyncio.gather(*start_batch_tasks)
103
105
 
106
+ process_semaphore = asyncio.Semaphore(concurrency)
104
107
  prompt_tasks = []
105
108
  for batch in batch_jobs:
106
109
  logger.info(
107
110
  f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
108
111
  )
109
- page_task = asyncio.create_task(
110
- await wait_for_batch_completion(client=open_ai_client, batch=batch)
111
- )
112
- prompt_tasks.append(page_task)
113
-
114
- output_file_ids = await asyncio.gather(*prompt_tasks)
115
-
116
- prompts_output = []
117
- for output_file_id in output_file_ids:
118
- logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
119
- prompts_output.append(
120
- await process_prompts_output(
121
- client=open_ai_client, output_file_id=output_file_id
122
- )
112
+ prompt_task = asyncio.create_task(
113
+ await _wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
123
114
  )
115
+ prompt_tasks.append(prompt_task)
116
+ prompt_response_groups = await asyncio.gather(*prompt_tasks)
124
117
 
125
- flat_prompts = [page for batch in prompts_output for page in batch]
118
+ flat_responses = [response for batch in prompt_response_groups for response in batch]
126
119
 
127
- sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
120
+ sorted_responses = sorted(flat_responses, key=lambda x: x.prompt_index)
128
121
  callable_output = ParallexPromptsCallableOutput(
129
122
  original_prompts=prompts,
130
123
  trace_id=trace_id,
@@ -211,6 +204,19 @@ async def _wait_and_create_pages(
211
204
  return page_responses
212
205
 
213
206
 
207
+ async def _wait_and_create_prompt_responses(
208
+ batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
209
+ ):
210
+ async with semaphore:
211
+ logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
212
+ output_file_id = await wait_for_batch_completion(client=client, batch=batch)
213
+ logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
214
+ prompt_responses = await process_prompts_output(
215
+ client=client, output_file_id=output_file_id
216
+ )
217
+ return prompt_responses
218
+
219
+
214
220
  async def _create_batch_jobs(
215
221
  batch_file: BatchFile,
216
222
  client: OpenAIClient,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parallex
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: PDF to markdown using Azure OpenAI batch processing
5
5
  Home-page: https://github.com/Summed-AI/parallex
6
6
  Author: Jeff Hostetler
@@ -1,8 +1,8 @@
1
1
  parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
3
- parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
3
+ parallex/ai/open_ai_client.py,sha256=CT8q5XEcMr24RlAOaj3-OezruZLczwPtmUafPQWDYXc,2073
4
4
  parallex/ai/output_processor.py,sha256=Rwp8dkLo4xsqooeBh3Xv-uGVbJMG1JQkwyxdUoOs2tQ,1800
5
- parallex/ai/uploader.py,sha256=9GvrzuaQAxqRiYN5dUHWjFeIFXezH0Y7ARnzBkEHbL0,5451
5
+ parallex/ai/uploader.py,sha256=72SZ3KYQSQL1GI0eCUvRDgDxWhz5vLTU_dRhN7cZg84,5443
6
6
  parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
7
7
  parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
8
8
  parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
@@ -15,10 +15,10 @@ parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv
15
15
  parallex/models/prompt_response.py,sha256=LcctuyqwiTHWrZHSahwauMaSBsin5Ws6fQRAzGXTsAA,230
16
16
  parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
17
17
  parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
18
- parallex/parallex.py,sha256=7YFKnKOkFHoTC7CCHhrXG1JTxprbvw0QkNGOEPYJbvQ,8500
18
+ parallex/parallex.py,sha256=SqZBu1QFtSfomJFezteURlsMspvozb3ph4YwjDYXn88,8870
19
19
  parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
20
20
  parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
21
- parallex-0.3.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
- parallex-0.3.0.dist-info/METADATA,sha256=hIIhGrV5PE-E-lkWf-kBE3QBPevKSVRHkw0hUx_iqik,4461
23
- parallex-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
- parallex-0.3.0.dist-info/RECORD,,
21
+ parallex-0.3.2.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
+ parallex-0.3.2.dist-info/METADATA,sha256=6LWYbikRUS1jpMPfDXZau1L8y0oNN987KC0CnwBADYs,4461
23
+ parallex-0.3.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ parallex-0.3.2.dist-info/RECORD,,