parallex 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parallex/ai/uploader.py CHANGED
@@ -76,8 +76,8 @@ async def upload_prompts_for_processing(
76
76
  jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
77
77
  with open(upload_file_location, "a") as jsonl_file:
78
78
  jsonl_file.write(json.dumps(jsonl) + "\n")
79
- batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
- batch_files.append(batch_file)
79
+ batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
+ batch_files.append(batch_file)
81
81
  return batch_files
82
82
 
83
83
 
parallex/parallex.py CHANGED
@@ -46,6 +46,7 @@ async def parallex(
46
46
  )
47
47
  except Exception as e:
48
48
  logger.error(f"Error occurred: {e}")
49
+ raise e
49
50
  finally:
50
51
  await _delete_associated_files(open_ai_client, remote_file_handler)
51
52
 
@@ -69,6 +70,7 @@ async def parallex_simple_prompts(
69
70
  )
70
71
  except Exception as e:
71
72
  logger.error(f"Error occurred: {e}")
73
+ raise e
72
74
  finally:
73
75
  await _delete_associated_files(open_ai_client, remote_file_handler)
74
76
 
@@ -101,30 +103,18 @@ async def _prompts_execute(
101
103
  start_batch_tasks.append(batch_task)
102
104
  batch_jobs = await asyncio.gather(*start_batch_tasks)
103
105
 
106
+ process_semaphore = asyncio.Semaphore(concurrency)
104
107
  prompt_tasks = []
105
108
  for batch in batch_jobs:
106
- logger.info(
107
- f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
108
- )
109
- page_task = asyncio.create_task(
110
- await wait_for_batch_completion(client=open_ai_client, batch=batch)
109
+ prompt_task = asyncio.create_task(
110
+ _wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
111
111
  )
112
- prompt_tasks.append(page_task)
112
+ prompt_tasks.append(prompt_task)
113
+ prompt_response_groups = await asyncio.gather(*prompt_tasks)
113
114
 
114
- output_file_ids = await asyncio.gather(*prompt_tasks)
115
+ flat_responses = [response for batch in prompt_response_groups for response in batch]
115
116
 
116
- prompts_output = []
117
- for output_file_id in output_file_ids:
118
- logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
119
- prompts_output.append(
120
- await process_prompts_output(
121
- client=open_ai_client, output_file_id=output_file_id
122
- )
123
- )
124
-
125
- flat_prompts = [page for batch in prompts_output for page in batch]
126
-
127
- sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
117
+ sorted_responses = sorted(flat_responses, key=lambda x: x.prompt_index)
128
118
  callable_output = ParallexPromptsCallableOutput(
129
119
  original_prompts=prompts,
130
120
  trace_id=trace_id,
@@ -184,7 +174,7 @@ async def _execute(
184
174
 
185
175
  pages = [page for batch_pages in page_groups for page in batch_pages]
186
176
  logger.info(f"pages done. total pages- {len(pages)} - {trace_id}")
187
- sorted_pages = sorted(pages, key=lambda x: x.page_number)
177
+ sorted_pages = sorted(pages, key=lambda x: x.prompt_index)
188
178
 
189
179
  # TODO add combined version of MD to output / save to file system
190
180
  callable_output = ParallexCallableOutput(
@@ -211,6 +201,19 @@ async def _wait_and_create_pages(
211
201
  return page_responses
212
202
 
213
203
 
204
+ async def _wait_and_create_prompt_responses(
205
+ batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
206
+ ):
207
+ async with semaphore:
208
+ logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
209
+ output_file_id = await wait_for_batch_completion(client=client, batch=batch)
210
+ logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
211
+ prompt_responses = await process_prompts_output(
212
+ client=client, output_file_id=output_file_id
213
+ )
214
+ return prompt_responses
215
+
216
+
214
217
  async def _create_batch_jobs(
215
218
  batch_file: BatchFile,
216
219
  client: OpenAIClient,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parallex
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: PDF to markdown using Azure OpenAI batch processing
5
5
  Home-page: https://github.com/Summed-AI/parallex
6
6
  Author: Jeff Hostetler
@@ -2,7 +2,7 @@ parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
3
3
  parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
4
4
  parallex/ai/output_processor.py,sha256=Rwp8dkLo4xsqooeBh3Xv-uGVbJMG1JQkwyxdUoOs2tQ,1800
5
- parallex/ai/uploader.py,sha256=9GvrzuaQAxqRiYN5dUHWjFeIFXezH0Y7ARnzBkEHbL0,5451
5
+ parallex/ai/uploader.py,sha256=72SZ3KYQSQL1GI0eCUvRDgDxWhz5vLTU_dRhN7cZg84,5443
6
6
  parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
7
7
  parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
8
8
  parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
@@ -15,10 +15,10 @@ parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv
15
15
  parallex/models/prompt_response.py,sha256=LcctuyqwiTHWrZHSahwauMaSBsin5Ws6fQRAzGXTsAA,230
16
16
  parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
17
17
  parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
18
- parallex/parallex.py,sha256=7YFKnKOkFHoTC7CCHhrXG1JTxprbvw0QkNGOEPYJbvQ,8500
18
+ parallex/parallex.py,sha256=30DKfec3uHCfyoLj0-wMaSltHn3kLsnW0kJsfFFgktk,8745
19
19
  parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
20
20
  parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
21
- parallex-0.3.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
- parallex-0.3.0.dist-info/METADATA,sha256=hIIhGrV5PE-E-lkWf-kBE3QBPevKSVRHkw0hUx_iqik,4461
23
- parallex-0.3.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
- parallex-0.3.0.dist-info/RECORD,,
21
+ parallex-0.3.1.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
+ parallex-0.3.1.dist-info/METADATA,sha256=PBeHVqfrMvtGgny4FlkkyYt6nY-Ngbk8Z-6rEhukX_Q,4461
23
+ parallex-0.3.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ parallex-0.3.1.dist-info/RECORD,,