parallex 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,7 @@ class OpenAIClient:
18
18
  azure_endpoint=os.getenv("AZURE_API_BASE"),
19
19
  api_key=os.getenv("AZURE_API_KEY"),
20
20
  api_version=os.getenv("AZURE_API_VERSION"),
21
+ timeout=3000,
21
22
  )
22
23
 
23
24
  async def upload(self, file_path: str) -> FileObject:
parallex/ai/uploader.py CHANGED
@@ -76,8 +76,8 @@ async def upload_prompts_for_processing(
76
76
  jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
77
77
  with open(upload_file_location, "a") as jsonl_file:
78
78
  jsonl_file.write(json.dumps(jsonl) + "\n")
79
- batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
- batch_files.append(batch_file)
79
+ batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
+ batch_files.append(batch_file)
81
81
  return batch_files
82
82
 
83
83
 
parallex/parallex.py CHANGED
@@ -46,6 +46,7 @@ async def parallex(
46
46
  )
47
47
  except Exception as e:
48
48
  logger.error(f"Error occurred: {e}")
49
+ raise e
49
50
  finally:
50
51
  await _delete_associated_files(open_ai_client, remote_file_handler)
51
52
 
@@ -69,6 +70,7 @@ async def parallex_simple_prompts(
69
70
  )
70
71
  except Exception as e:
71
72
  logger.error(f"Error occurred: {e}")
73
+ raise e
72
74
  finally:
73
75
  await _delete_associated_files(open_ai_client, remote_file_handler)
74
76
 
@@ -101,30 +103,21 @@ async def _prompts_execute(
101
103
  start_batch_tasks.append(batch_task)
102
104
  batch_jobs = await asyncio.gather(*start_batch_tasks)
103
105
 
106
+ process_semaphore = asyncio.Semaphore(concurrency)
104
107
  prompt_tasks = []
105
108
  for batch in batch_jobs:
106
109
  logger.info(
107
110
  f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
108
111
  )
109
- page_task = asyncio.create_task(
110
- await wait_for_batch_completion(client=open_ai_client, batch=batch)
111
- )
112
- prompt_tasks.append(page_task)
113
-
114
- output_file_ids = await asyncio.gather(*prompt_tasks)
115
-
116
- prompts_output = []
117
- for output_file_id in output_file_ids:
118
- logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
119
- prompts_output.append(
120
- await process_prompts_output(
121
- client=open_ai_client, output_file_id=output_file_id
122
- )
112
+ prompt_task = asyncio.create_task(
113
+ await _wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
123
114
  )
115
+ prompt_tasks.append(prompt_task)
116
+ prompt_response_groups = await asyncio.gather(*prompt_tasks)
124
117
 
125
- flat_prompts = [page for batch in prompts_output for page in batch]
118
+ flat_responses = [response for batch in prompt_response_groups for response in batch]
126
119
 
127
- sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
120
+ sorted_responses = sorted(flat_responses, key=lambda x: x.prompt_index)
128
121
  callable_output = ParallexPromptsCallableOutput(
129
122
  original_prompts=prompts,
130
123
  trace_id=trace_id,
@@ -211,6 +204,19 @@ async def _wait_and_create_pages(
211
204
  return page_responses
212
205
 
213
206
 
207
+ async def _wait_and_create_prompt_responses(
208
+ batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
209
+ ):
210
+ async with semaphore:
211
+ logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
212
+ output_file_id = await wait_for_batch_completion(client=client, batch=batch)
213
+ logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
214
+ prompt_responses = await process_prompts_output(
215
+ client=client, output_file_id=output_file_id
216
+ )
217
+ return prompt_responses
218
+
219
+
214
220
  async def _create_batch_jobs(
215
221
  batch_file: BatchFile,
216
222
  client: OpenAIClient,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parallex
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: PDF to markdown using Azure OpenAI batch processing
5
5
  Home-page: https://github.com/Summed-AI/parallex
6
6
  Author: Jeff Hostetler
@@ -1,8 +1,8 @@
1
1
  parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
3
- parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
3
+ parallex/ai/open_ai_client.py,sha256=CT8q5XEcMr24RlAOaj3-OezruZLczwPtmUafPQWDYXc,2073
4
4
  parallex/ai/output_processor.py,sha256=Rwp8dkLo4xsqooeBh3Xv-uGVbJMG1JQkwyxdUoOs2tQ,1800
5
- parallex/ai/uploader.py,sha256=9GvrzuaQAxqRiYN5dUHWjFeIFXezH0Y7ARnzBkEHbL0,5451
5
+ parallex/ai/uploader.py,sha256=72SZ3KYQSQL1GI0eCUvRDgDxWhz5vLTU_dRhN7cZg84,5443
6
6
  parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
7
7
  parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
8
8
  parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
@@ -15,10 +15,10 @@ parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv
15
15
  parallex/models/prompt_response.py,sha256=LcctuyqwiTHWrZHSahwauMaSBsin5Ws6fQRAzGXTsAA,230
16
16
  parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
17
17
  parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
18
- parallex/parallex.py,sha256=7YFKnKOkFHoTC7CCHhrXG1JTxprbvw0QkNGOEPYJbvQ,8500
18
+ parallex/parallex.py,sha256=SqZBu1QFtSfomJFezteURlsMspvozb3ph4YwjDYXn88,8870
19
19
  parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
20
20
  parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
21
- parallex-0.3.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
- parallex-0.3.0.dist-info/METADATA,sha256=hIIhGrV5PE-E-lkWf-kBE3QBPevKSVRHkw0hUx_iqik,4461
23
- parallex-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
- parallex-0.3.0.dist-info/RECORD,,
21
+ parallex-0.3.2.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
+ parallex-0.3.2.dist-info/METADATA,sha256=6LWYbikRUS1jpMPfDXZau1L8y0oNN987KC0CnwBADYs,4461
23
+ parallex-0.3.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ parallex-0.3.2.dist-info/RECORD,,