parallex 0.3.0__tar.gz → 0.3.1__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {parallex-0.3.0 → parallex-0.3.1}/PKG-INFO +1 -1
- {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/uploader.py +2 -2
- {parallex-0.3.0 → parallex-0.3.1}/parallex/parallex.py +23 -20
- {parallex-0.3.0 → parallex-0.3.1}/pyproject.toml +1 -1
- {parallex-0.3.0 → parallex-0.3.1}/LICENSE +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/README.md +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/__init__.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/batch_processor.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/open_ai_client.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/ai/output_processor.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/converter.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/file_finder.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/remote_file_handler.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/file_management/utils.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/batch_file.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/image_file.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/page_response.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/parallex_callable_output.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/parallex_prompts_callable_output.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/prompt_response.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/raw_file.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/models/upload_batch.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/utils/constants.py +0 -0
- {parallex-0.3.0 → parallex-0.3.1}/parallex/utils/logger.py +0 -0
@@ -76,8 +76,8 @@ async def upload_prompts_for_processing(
|
|
76
76
|
jsonl = _simple_jsonl_format(prompt_custom_id, prompt)
|
77
77
|
with open(upload_file_location, "a") as jsonl_file:
|
78
78
|
jsonl_file.write(json.dumps(jsonl) + "\n")
|
79
|
-
|
80
|
-
|
79
|
+
batch_file = await _create_batch_file(client, trace_id, upload_file_location)
|
80
|
+
batch_files.append(batch_file)
|
81
81
|
return batch_files
|
82
82
|
|
83
83
|
|
@@ -46,6 +46,7 @@ async def parallex(
|
|
46
46
|
)
|
47
47
|
except Exception as e:
|
48
48
|
logger.error(f"Error occurred: {e}")
|
49
|
+
raise e
|
49
50
|
finally:
|
50
51
|
await _delete_associated_files(open_ai_client, remote_file_handler)
|
51
52
|
|
@@ -69,6 +70,7 @@ async def parallex_simple_prompts(
|
|
69
70
|
)
|
70
71
|
except Exception as e:
|
71
72
|
logger.error(f"Error occurred: {e}")
|
73
|
+
raise e
|
72
74
|
finally:
|
73
75
|
await _delete_associated_files(open_ai_client, remote_file_handler)
|
74
76
|
|
@@ -101,30 +103,18 @@ async def _prompts_execute(
|
|
101
103
|
start_batch_tasks.append(batch_task)
|
102
104
|
batch_jobs = await asyncio.gather(*start_batch_tasks)
|
103
105
|
|
106
|
+
process_semaphore = asyncio.Semaphore(concurrency)
|
104
107
|
prompt_tasks = []
|
105
108
|
for batch in batch_jobs:
|
106
|
-
|
107
|
-
|
108
|
-
)
|
109
|
-
page_task = asyncio.create_task(
|
110
|
-
await wait_for_batch_completion(client=open_ai_client, batch=batch)
|
109
|
+
prompt_task = asyncio.create_task(
|
110
|
+
_wait_and_create_prompt_responses(batch=batch, client=open_ai_client, semaphore=process_semaphore)
|
111
111
|
)
|
112
|
-
prompt_tasks.append(
|
112
|
+
prompt_tasks.append(prompt_task)
|
113
|
+
prompt_response_groups = await asyncio.gather(*prompt_tasks)
|
113
114
|
|
114
|
-
|
115
|
+
flat_responses = [response for batch in prompt_response_groups for response in batch]
|
115
116
|
|
116
|
-
|
117
|
-
for output_file_id in output_file_ids:
|
118
|
-
logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
|
119
|
-
prompts_output.append(
|
120
|
-
await process_prompts_output(
|
121
|
-
client=open_ai_client, output_file_id=output_file_id
|
122
|
-
)
|
123
|
-
)
|
124
|
-
|
125
|
-
flat_prompts = [page for batch in prompts_output for page in batch]
|
126
|
-
|
127
|
-
sorted_responses = sorted(flat_prompts, key=lambda x: x.prompt_index)
|
117
|
+
sorted_responses = sorted(flat_responses, key=lambda x: x.prompt_index)
|
128
118
|
callable_output = ParallexPromptsCallableOutput(
|
129
119
|
original_prompts=prompts,
|
130
120
|
trace_id=trace_id,
|
@@ -184,7 +174,7 @@ async def _execute(
|
|
184
174
|
|
185
175
|
pages = [page for batch_pages in page_groups for page in batch_pages]
|
186
176
|
logger.info(f"pages done. total pages- {len(pages)} - {trace_id}")
|
187
|
-
sorted_pages = sorted(pages, key=lambda x: x.
|
177
|
+
sorted_pages = sorted(pages, key=lambda x: x.prompt_index)
|
188
178
|
|
189
179
|
# TODO add combined version of MD to output / save to file system
|
190
180
|
callable_output = ParallexCallableOutput(
|
@@ -211,6 +201,19 @@ async def _wait_and_create_pages(
|
|
211
201
|
return page_responses
|
212
202
|
|
213
203
|
|
204
|
+
async def _wait_and_create_prompt_responses(
|
205
|
+
batch: UploadBatch, client: OpenAIClient, semaphore: asyncio.Semaphore
|
206
|
+
):
|
207
|
+
async with semaphore:
|
208
|
+
logger.info(f"waiting for batch to complete - {batch.id} - {batch.trace_id}")
|
209
|
+
output_file_id = await wait_for_batch_completion(client=client, batch=batch)
|
210
|
+
logger.info(f"batch completed - {batch.id} - {batch.trace_id}")
|
211
|
+
prompt_responses = await process_prompts_output(
|
212
|
+
client=client, output_file_id=output_file_id
|
213
|
+
)
|
214
|
+
return prompt_responses
|
215
|
+
|
216
|
+
|
214
217
|
async def _create_batch_jobs(
|
215
218
|
batch_file: BatchFile,
|
216
219
|
client: OpenAIClient,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "parallex"
|
3
|
-
version = "0.3.
|
3
|
+
version = "0.3.1"
|
4
4
|
description = "PDF to markdown using Azure OpenAI batch processing"
|
5
5
|
authors = ["Jeff Hostetler <jeff@summed.ai>", "Kevin Bao <kevin@summed.ai>"]
|
6
6
|
repository = "https://github.com/Summed-AI/parallex"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|