parallex 0.3.0__tar.gz → 0.3.1__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (24) hide show
  1. {parallex-0.3.0 → parallex-0.3.1}/PKG-INFO +1 -1
  2. {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/uploader.py +2 -2
  3. {parallex-0.3.0 → parallex-0.3.1}/parallex/parallex.py +23 -20
  4. {parallex-0.3.0 → parallex-0.3.1}/pyproject.toml +1 -1
  5. {parallex-0.3.0 → parallex-0.3.1}/LICENSE +0 -0
  6. {parallex-0.3.0 → parallex-0.3.1}/README.md +0 -0
  7. {parallex-0.3.0 → parallex-0.3.1}/parallex/__init__.py +0 -0
  8. {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/batch_processor.py +0 -0
  9. {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/open_ai_client.py +0 -0
  10. {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/output_processor.py +0 -0
  11. {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/converter.py +0 -0
  12. {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/file_finder.py +0 -0
  13. {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/remote_file_handler.py +0 -0
  14. {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/utils.py +0 -0
  15. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/batch_file.py +0 -0
  16. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/image_file.py +0 -0
  17. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/page_response.py +0 -0
  18. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/parallex_callable_output.py +0 -0
  19. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/parallex_prompts_callable_output.py +0 -0
  20. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/prompt_response.py +0 -0
  21. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/raw_file.py +0 -0
  22. {parallex-0.3.0 → parallex-0.3.1}/parallex/models/upload_batch.py +0 -0
  23. {parallex-0.3.0 → parallex-0.3.1}/parallex/utils/constants.py +0 -0
  24. {parallex-0.3.0 → parallex-0.3.1}/parallex/utils/logger.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parallex
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: PDF to markdown using Azure OpenAI batch processing
5
5
  Home-page: https://github.com/Summed-AI/parallex
6
6
  Author: Jeff Hostetler
@@ -76,8 +76,8 @@ async def upload_prompts_for_processing(
76
76
  jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
77
77
  with open(upload_file_location, "a") as jsonl_file:
78
78
  jsonl_file.write(json.dumps(jsonl) + "\n")
79
- batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
- batch_files.append(batch_file)
79
+ batch_file = await _create_batch_file(client, trace_id, upload_file_location)
80
+ batch_files.append(batch_file)
81
81
  return batch_files
82
82
 
83
83
 
@@ -46,6 +46,7 @@ async def parallex(
46
46
  )
47
47
  except Exception as e:
48
48
  logger.error(f"Error occurred: {e}")
49
+ raise e
49
50
  finally:
50
51
  await _delete_associated_files(open_ai_client, remote_file_handler)
51
52
 
@@ -69,6 +70,7 @@ async def parallex_simple_prompts(
69
70
  )
70
71
  except Exception as e:
71
72
  logger.error(f"Error occurred: {e}")
73
+ raise e
72
74
  finally:
73
75
  await _delete_associated_files(open_ai_client, remote_file_handler)
74
76
 
@@ -101,30 +103,18 @@ async def _prompts_execute(
101
103
  start_batch_tasks.append(batch_task)
102
104
  batch_jobs = await asyncio.gather(*start_batch_tasks)
103
105
 
106
+ process_semaphore = asyncio.Semaphore(concurrency)
104
107
  prompt_tasks = []
105
108
  for batch in batch_jobs:
106
- logger.info(
107
- f"waiting for batch to complete - {batch.id} - {batch.trace_id}"
108
- )
109
- page_task = asyncio.create_task(
110
- await wait_for_batch_completion(client=open_ai_client, batch=batch)
109
+ prompt_task = asyncio.create_task(
110
+ _wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
111
111
  )
112
- prompt_tasks.append(page_task)
112
+ prompt_tasks.append(prompt_task)
113
+ prompt_response_groups = await asyncio.gather(*prompt_tasks)
113
114
 
114
- output_file_ids = await asyncio.gather(*prompt_tasks)
115
+ flat_responses = [response for batch in prompt_response_groups for response in batch]
115
116
 
116
- prompts_output = []
117
- for output_file_id in output_file_ids:
118
- logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
119
- prompts_output.append(
120
- await process_prompts_output(
121
- client=open_ai_client, output_file_id=output_file_id
122
- )
123
- )
124
-
125
- flat_prompts = [page for batch in prompts_output for page in batch]
126
-
127
- sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
117
+ sorted_responses = sorted(flat_responses, key=lambda x: x.prompt_index)
128
118
  callable_output = ParallexPromptsCallableOutput(
129
119
  original_prompts=prompts,
130
120
  trace_id=trace_id,
@@ -184,7 +174,7 @@ async def _execute(
184
174
 
185
175
  pages = [page for batch_pages in page_groups for page in batch_pages]
186
176
  logger.info(f"pages done. total pages- {len(pages)} - {trace_id}")
187
- sorted_pages = sorted(pages, key=lambda x: x.page_number)
177
+ sorted_pages = sorted(pages, key=lambda x: x.prompt_index)
188
178
 
189
179
  # TODO add combined version of MD to output / save to file system
190
180
  callable_output = ParallexCallableOutput(
@@ -211,6 +201,19 @@ async def _wait_and_create_pages(
211
201
  return page_responses
212
202
 
213
203
 
204
+ async def _wait_and_create_prompt_responses(
205
+ batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
206
+ ):
207
+ async with semaphore:
208
+ logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
209
+ output_file_id = await wait_for_batch_completion(client=client, batch=batch)
210
+ logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
211
+ prompt_responses = await process_prompts_output(
212
+ client=client, output_file_id=output_file_id
213
+ )
214
+ return prompt_responses
215
+
216
+
214
217
  async def _create_batch_jobs(
215
218
  batch_file: BatchFile,
216
219
  client: OpenAIClient,
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "parallex"
3
- version = "0.3.0"
3
+ version = "0.3.1"
4
4
  description = "PDF to markdown using Azure OpenAI batch processing"
5
5
  authors = ["Jeff Hostetler <jeff@summed.ai>", "Kevin Bao <kevin@summed.ai>"]
6
6
  repository = "https://github.com/Summed-AI/parallex"
File without changes
File without changes
File without changes