parallex 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,14 +10,19 @@ from parallex.utils.logger import logger
10
10
 
11
11
  # Exceptions for missing keys, etc
12
12
  class OpenAIClient:
13
- def __init__(self, model: str, remote_file_handler: RemoteFileHandler):
14
- self.model = model
13
+ def __init__(
14
+ self,
15
+ remote_file_handler: RemoteFileHandler,
16
+ azure_endpoint_env_name: str,
17
+ azure_api_key_env_name: str,
18
+ azure_api_version_env_name: str,
19
+ ):
15
20
  self.file_handler = remote_file_handler
16
21
 
17
22
  self._client = AsyncAzureOpenAI(
18
- azure_endpoint=os.getenv("AZURE_API_BASE"),
19
- api_key=os.getenv("AZURE_API_KEY"),
20
- api_version=os.getenv("AZURE_API_VERSION"),
23
+ azure_endpoint=os.getenv(azure_endpoint_env_name),
24
+ api_key=os.getenv(azure_api_key_env_name),
25
+ api_version=os.getenv(azure_api_version_env_name),
21
26
  )
22
27
 
23
28
  async def upload(self, file_path: str) -> FileObject:
parallex/ai/uploader.py CHANGED
@@ -21,6 +21,7 @@ async def upload_images_for_processing(
21
21
  image_files: list[ImageFile],
22
22
  temp_directory: str,
23
23
  prompt_text: str,
24
+ azure_api_deployment_env_name: str,
24
25
  model: Optional[type[BaseModel]] = None,
25
26
  ) -> list[BatchFile]:
26
27
  """Base64 encodes image, converts to expected jsonl format and uploads"""
@@ -48,7 +49,13 @@ async def upload_images_for_processing(
48
49
  prompt_custom_id = (
49
50
  f"{image_file.trace_id}{CUSTOM_ID_DELINEATOR}{image_file.page_number}.jsonl"
50
51
  )
51
- jsonl = _image_jsonl_format(prompt_custom_id, base64_encoded_image, prompt_text, model)
52
+ jsonl = _image_jsonl_format(
53
+ prompt_custom_id,
54
+ base64_encoded_image,
55
+ prompt_text,
56
+ azure_api_deployment_env_name,
57
+ model
58
+ )
52
59
  with open(upload_file_location, "a") as jsonl_file:
53
60
  jsonl_file.write(json.dumps(jsonl) + "\n")
54
61
  batch_file = await _create_batch_file(client, trace_id, upload_file_location)
@@ -60,7 +67,8 @@ async def upload_prompts_for_processing(
60
67
  client: OpenAIClient,
61
68
  prompts: list[str], temp_directory: str,
62
69
  trace_id: UUID,
63
- model: Optional[type[BaseModel]] = None
70
+ azure_api_deployment_env_name: str,
71
+ model: Optional[type[BaseModel]] = None,
64
72
  ) -> list[BatchFile]:
65
73
  """Creates jsonl file and uploads for processing"""
66
74
  current_index = 0
@@ -81,7 +89,12 @@ async def upload_prompts_for_processing(
81
89
  )
82
90
 
83
91
  prompt_custom_id = f"{trace_id}{CUSTOM_ID_DELINEATOR}{index}.jsonl"
84
- jsonl = _simple_jsonl_format(prompt_custom_id, prompt, model)
92
+ jsonl = _simple_jsonl_format(
93
+ prompt_custom_id,
94
+ prompt,
95
+ azure_api_deployment_env_name,
96
+ model
97
+ )
85
98
  with open(upload_file_location, "a") as jsonl_file:
86
99
  jsonl_file.write(json.dumps(jsonl) + "\n")
87
100
  batch_file = await _create_batch_file(client, trace_id, upload_file_location)
@@ -139,13 +152,18 @@ def _response_format(model: type[BaseModel]) -> dict:
139
152
  }
140
153
 
141
154
 
142
- def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str, model: Optional[type[BaseModel]]) -> dict:
155
+ def _simple_jsonl_format(
156
+ prompt_custom_id: str,
157
+ prompt_text: str,
158
+ azure_api_deployment_env_name: str,
159
+ model: Optional[type[BaseModel]]
160
+ ) -> dict:
143
161
  payload = {
144
162
  "custom_id": prompt_custom_id,
145
163
  "method": "POST",
146
164
  "url": "/chat/completions",
147
165
  "body": {
148
- "model": os.getenv("AZURE_API_DEPLOYMENT"),
166
+ "model": os.getenv(azure_api_deployment_env_name),
149
167
  "messages": [{"role": "user", "content": prompt_text}],
150
168
  "temperature": 0.0, # TODO make configurable
151
169
  },
@@ -155,13 +173,19 @@ def _simple_jsonl_format(prompt_custom_id: str, prompt_text: str, model: Optiona
155
173
  return payload
156
174
 
157
175
 
158
- def _image_jsonl_format(prompt_custom_id: str, encoded_image: str, prompt_text: str, model: Optional[type[BaseModel]] = None) -> dict:
176
+ def _image_jsonl_format(
177
+ prompt_custom_id: str,
178
+ encoded_image: str,
179
+ prompt_text: str,
180
+ azure_api_deployment_env_name: str,
181
+ model: Optional[type[BaseModel]] = None
182
+ ) -> dict:
159
183
  payload = {
160
184
  "custom_id": prompt_custom_id,
161
185
  "method": "POST",
162
186
  "url": "/chat/completions",
163
187
  "body": {
164
- "model": os.getenv("AZURE_API_DEPLOYMENT"),
188
+ "model": os.getenv(azure_api_deployment_env_name),
165
189
  "messages": [
166
190
  {
167
191
  "role": "user",
parallex/parallex.py CHANGED
@@ -35,10 +35,19 @@ async def parallex(
35
35
  prompt_text: Optional[str] = DEFAULT_PROMPT,
36
36
  log_level: Optional[str] = "ERROR",
37
37
  response_model: Optional[type[BaseModel]] = None,
38
+ azure_endpoint_env_name: Optional[str] = "AZURE_API_BASE",
39
+ azure_api_key_env_name: Optional[str] = "AZURE_API_KEY",
40
+ azure_api_version_env_name: Optional[str] = "AZURE_API_VERSION",
41
+ azure_api_deployment_env_name: Optional[str] = "AZURE_API_DEPLOYMENT",
38
42
  ) -> ParallexCallableOutput:
39
43
  setup_logger(log_level)
40
44
  remote_file_handler = RemoteFileHandler()
41
- open_ai_client = OpenAIClient(model=model, remote_file_handler=remote_file_handler)
45
+ open_ai_client = OpenAIClient(
46
+ remote_file_handler=remote_file_handler,
47
+ azure_endpoint_env_name=azure_endpoint_env_name,
48
+ azure_api_key_env_name=azure_api_key_env_name,
49
+ azure_api_version_env_name=azure_api_version_env_name,
50
+ )
42
51
  try:
43
52
  return await _execute(
44
53
  open_ai_client=open_ai_client,
@@ -46,6 +55,7 @@ async def parallex(
46
55
  post_process_callable=post_process_callable,
47
56
  concurrency=concurrency,
48
57
  prompt_text=prompt_text,
58
+ azure_api_deployment_env_name=azure_api_deployment_env_name,
49
59
  model=response_model
50
60
  )
51
61
  except Exception as e:
@@ -56,16 +66,24 @@ async def parallex(
56
66
 
57
67
 
58
68
  async def parallex_simple_prompts(
59
- model: str,
60
69
  prompts: list[str],
61
70
  post_process_callable: Optional[Callable[..., None]] = None,
62
71
  log_level: Optional[str] = "ERROR",
63
72
  concurrency: Optional[int] = 20,
64
73
  response_model: Optional[type[BaseModel]] = None,
74
+ azure_endpoint_env_name: Optional[str] = "AZURE_API_BASE",
75
+ azure_api_key_env_name: Optional[str] = "AZURE_API_KEY",
76
+ azure_api_version_env_name: Optional[str] = "AZURE_API_VERSION",
77
+ azure_api_deployment_env_name: Optional[str] = "AZURE_API_DEPLOYMENT",
65
78
  ) -> ParallexPromptsCallableOutput:
66
79
  setup_logger(log_level)
67
80
  remote_file_handler = RemoteFileHandler()
68
- open_ai_client = OpenAIClient(model=model, remote_file_handler=remote_file_handler)
81
+ open_ai_client = OpenAIClient(
82
+ remote_file_handler=remote_file_handler,
83
+ azure_endpoint_env_name=azure_endpoint_env_name,
84
+ azure_api_key_env_name=azure_api_key_env_name,
85
+ azure_api_version_env_name=azure_api_version_env_name,
86
+ )
69
87
  try:
70
88
  return await _prompts_execute(
71
89
  open_ai_client=open_ai_client,
@@ -73,6 +91,7 @@ async def parallex_simple_prompts(
73
91
  post_process_callable=post_process_callable,
74
92
  concurrency=concurrency,
75
93
  model=response_model,
94
+ azure_api_deployment_env_name=azure_api_deployment_env_name
76
95
  )
77
96
  except Exception as e:
78
97
  logger.error(f"Error occurred: {e}")
@@ -84,6 +103,7 @@ async def parallex_simple_prompts(
84
103
  async def _prompts_execute(
85
104
  open_ai_client: OpenAIClient,
86
105
  prompts: list[str],
106
+ azure_api_deployment_env_name: str,
87
107
  post_process_callable: Optional[Callable[..., None]] = None,
88
108
  concurrency: Optional[int] = 20,
89
109
  model: Optional[type[BaseModel]] = None,
@@ -95,6 +115,7 @@ async def _prompts_execute(
95
115
  prompts=prompts,
96
116
  temp_directory=temp_directory,
97
117
  trace_id=trace_id,
118
+ azure_api_deployment_env_name=azure_api_deployment_env_name,
98
119
  model=model,
99
120
  )
100
121
  start_batch_semaphore = asyncio.Semaphore(concurrency)
@@ -139,6 +160,7 @@ async def _prompts_execute(
139
160
  async def _execute(
140
161
  open_ai_client: OpenAIClient,
141
162
  pdf_source_url: str,
163
+ azure_api_deployment_env_name: str,
142
164
  post_process_callable: Optional[Callable[..., None]] = None,
143
165
  concurrency: Optional[int] = 20,
144
166
  prompt_text: Optional[str] = DEFAULT_PROMPT,
@@ -158,6 +180,8 @@ async def _execute(
158
180
  image_files=image_files,
159
181
  temp_directory=temp_directory,
160
182
  prompt_text=prompt_text,
183
+ model=model,
184
+ azure_api_deployment_env_name=azure_api_deployment_env_name
161
185
  )
162
186
  start_batch_semaphore = asyncio.Semaphore(concurrency)
163
187
  start_batch_tasks = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parallex
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: PDF to markdown using Azure OpenAI batch processing
5
5
  Home-page: https://github.com/Summed-AI/parallex
6
6
  Author: Jeff Hostetler
@@ -1,8 +1,8 @@
1
1
  parallex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  parallex/ai/batch_processor.py,sha256=O5q_jaIU0VI93p7Riq4aZ_qUiN9Omxp5GOfn0IqEYgo,1361
3
- parallex/ai/open_ai_client.py,sha256=TRH78oYod_EWpp3hjEh097OT7hwsQmtv44_j3X9Frxo,2047
3
+ parallex/ai/open_ai_client.py,sha256=Nkl8F4EaHQbkNtgOXjLAtynWr10w9Q1Ym3TDisjtIiw,2168
4
4
  parallex/ai/output_processor.py,sha256=kd50DwB2txhzz4_MPYl97bPOtLMl0KV2UP_eFmUtq34,2087
5
- parallex/ai/uploader.py,sha256=FKleSK8GWextqpUUAthvTtxGHSwN-aYF127t1YmGOcw,6375
5
+ parallex/ai/uploader.py,sha256=t_R-3FMX3OVo90EQRCGL0VqBn3vKKn5iUe3qoWVIbMM,6772
6
6
  parallex/file_management/converter.py,sha256=Rj-93LXNl2gCY-XUOCZv7DdCNI2-GyRpS5FobnTqwzo,1111
7
7
  parallex/file_management/file_finder.py,sha256=BPvrkxZlwOYmRXzzS138wGTsVzuhDIKfQZn0CISUj3o,1598
8
8
  parallex/file_management/remote_file_handler.py,sha256=jsI9NhOrKQR8K3yo536lGplVBGis9XY0G4dRpumgWFM,213
@@ -15,10 +15,10 @@ parallex/models/parallex_prompts_callable_output.py,sha256=IlNX9627_E8aXWQ-vDBuv
15
15
  parallex/models/prompt_response.py,sha256=2Zmnwlj8Ou2VgEHmi1VZrlnv5XRzw5VLMEkpQ1VelQQ,242
16
16
  parallex/models/raw_file.py,sha256=Nlv6u_jlDCXDgU2_Ff7DRbDCx27pB1NZugNhEoaBMQU,483
17
17
  parallex/models/upload_batch.py,sha256=jrnds9ryXg9drL4TF8TGimMVTCDfKaWsBzFv_ed0i88,2068
18
- parallex/parallex.py,sha256=JogDmjB-HdsauCis6hyfSBF_tQi2IdmXfltK72roi28,9322
18
+ parallex/parallex.py,sha256=uP36YPJkWhaSgfrXcOLprea2W-9ZwQ-MXmU7liE-aKk,10591
19
19
  parallex/utils/constants.py,sha256=508ieZLZ5kse0T4_QyNJp57Aq0DMNFjjyFlsKa0xtek,366
20
20
  parallex/utils/logger.py,sha256=i3ZZ7YTUmhUStbvVME67F9ffnkLOv5ijm7wVUyJT8Ys,440
21
- parallex-0.4.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
- parallex-0.4.0.dist-info/METADATA,sha256=Hdq1xbDWVVPhR-61O88E9Glv7rn3LzKfz72--rzJovo,4461
23
- parallex-0.4.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
- parallex-0.4.0.dist-info/RECORD,,
21
+ parallex-0.5.0.dist-info/LICENSE,sha256=wPwCqGrisXnEcpaUxSO79C2mdOUTbtjhLjyy8mVW6p8,1046
22
+ parallex-0.5.0.dist-info/METADATA,sha256=0Mm0BYWvEGpYr5SNBkQw2qLjI2TGKWRVUDVw_e8XyMo,4461
23
+ parallex-0.5.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ parallex-0.5.0.dist-info/RECORD,,