llama-cloud 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (38) hide show
  1. llama_cloud/__init__.py +64 -0
  2. llama_cloud/client.py +3 -0
  3. llama_cloud/resources/__init__.py +22 -1
  4. llama_cloud/resources/data_sinks/client.py +12 -6
  5. llama_cloud/resources/embedding_model_configs/__init__.py +23 -0
  6. llama_cloud/resources/embedding_model_configs/client.py +360 -0
  7. llama_cloud/resources/embedding_model_configs/types/__init__.py +23 -0
  8. llama_cloud/resources/embedding_model_configs/types/embedding_model_config_create_embedding_config.py +89 -0
  9. llama_cloud/resources/files/__init__.py +2 -2
  10. llama_cloud/resources/files/client.py +265 -34
  11. llama_cloud/resources/files/types/__init__.py +2 -1
  12. llama_cloud/resources/files/types/file_create_from_url_resource_info_value.py +7 -0
  13. llama_cloud/resources/organizations/client.py +65 -0
  14. llama_cloud/resources/parsing/client.py +157 -0
  15. llama_cloud/resources/pipelines/client.py +177 -14
  16. llama_cloud/resources/projects/client.py +71 -0
  17. llama_cloud/types/__init__.py +48 -0
  18. llama_cloud/types/base.py +29 -0
  19. llama_cloud/types/cloud_one_drive_data_source.py +1 -0
  20. llama_cloud/types/cloud_postgres_vector_store.py +1 -1
  21. llama_cloud/types/cloud_sharepoint_data_source.py +1 -0
  22. llama_cloud/types/embedding_model_config.py +43 -0
  23. llama_cloud/types/embedding_model_config_embedding_config.py +89 -0
  24. llama_cloud/types/embedding_model_config_update.py +33 -0
  25. llama_cloud/types/embedding_model_config_update_embedding_config.py +89 -0
  26. llama_cloud/types/interval_usage_and_plan.py +36 -0
  27. llama_cloud/types/llama_parse_parameters.py +10 -0
  28. llama_cloud/types/markdown_node_parser.py +2 -1
  29. llama_cloud/types/paginated_list_pipeline_files_response.py +35 -0
  30. llama_cloud/types/pipeline.py +1 -0
  31. llama_cloud/types/pipeline_create.py +1 -0
  32. llama_cloud/types/pipeline_file.py +1 -0
  33. llama_cloud/types/plan.py +40 -0
  34. llama_cloud/types/usage.py +41 -0
  35. {llama_cloud-0.1.3.dist-info → llama_cloud-0.1.5.dist-info}/METADATA +1 -2
  36. {llama_cloud-0.1.3.dist-info → llama_cloud-0.1.5.dist-info}/RECORD +38 -25
  37. {llama_cloud-0.1.3.dist-info → llama_cloud-0.1.5.dist-info}/WHEEL +1 -1
  38. {llama_cloud-0.1.3.dist-info → llama_cloud-0.1.5.dist-info}/LICENSE +0 -0
@@ -10,6 +10,7 @@ from ...core.jsonable_encoder import jsonable_encoder
10
10
  from ...core.remove_none_from_dict import remove_none_from_dict
11
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
12
12
  from ...types.http_validation_error import HttpValidationError
13
+ from ...types.interval_usage_and_plan import IntervalUsageAndPlan
13
14
  from ...types.organization import Organization
14
15
  from ...types.organization_create import OrganizationCreate
15
16
  from ...types.project import Project
@@ -298,6 +299,38 @@ class OrganizationsClient:
298
299
  raise ApiError(status_code=_response.status_code, body=_response.text)
299
300
  raise ApiError(status_code=_response.status_code, body=_response_json)
300
301
 
302
+ def get_organization_usage(self, organization_id: typing.Optional[str]) -> IntervalUsageAndPlan:
303
+ """
304
+ Get usage for a project
305
+
306
+ Parameters:
307
+ - organization_id: typing.Optional[str].
308
+ ---
309
+ from llama_cloud.client import LlamaCloud
310
+
311
+ client = LlamaCloud(
312
+ token="YOUR_TOKEN",
313
+ )
314
+ client.organizations.get_organization_usage()
315
+ """
316
+ _response = self._client_wrapper.httpx_client.request(
317
+ "GET",
318
+ urllib.parse.urljoin(
319
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/organizations/{organization_id}/usage"
320
+ ),
321
+ headers=self._client_wrapper.get_headers(),
322
+ timeout=60,
323
+ )
324
+ if 200 <= _response.status_code < 300:
325
+ return pydantic.parse_obj_as(IntervalUsageAndPlan, _response.json()) # type: ignore
326
+ if _response.status_code == 422:
327
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
328
+ try:
329
+ _response_json = _response.json()
330
+ except JSONDecodeError:
331
+ raise ApiError(status_code=_response.status_code, body=_response.text)
332
+ raise ApiError(status_code=_response.status_code, body=_response_json)
333
+
301
334
  def list_organization_users(self, organization_id: str) -> typing.List[UserOrganization]:
302
335
  """
303
336
  Get all users in an organization.
@@ -959,6 +992,38 @@ class AsyncOrganizationsClient:
959
992
  raise ApiError(status_code=_response.status_code, body=_response.text)
960
993
  raise ApiError(status_code=_response.status_code, body=_response_json)
961
994
 
995
+ async def get_organization_usage(self, organization_id: typing.Optional[str]) -> IntervalUsageAndPlan:
996
+ """
997
+ Get usage for a project
998
+
999
+ Parameters:
1000
+ - organization_id: typing.Optional[str].
1001
+ ---
1002
+ from llama_cloud.client import AsyncLlamaCloud
1003
+
1004
+ client = AsyncLlamaCloud(
1005
+ token="YOUR_TOKEN",
1006
+ )
1007
+ await client.organizations.get_organization_usage()
1008
+ """
1009
+ _response = await self._client_wrapper.httpx_client.request(
1010
+ "GET",
1011
+ urllib.parse.urljoin(
1012
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/organizations/{organization_id}/usage"
1013
+ ),
1014
+ headers=self._client_wrapper.get_headers(),
1015
+ timeout=60,
1016
+ )
1017
+ if 200 <= _response.status_code < 300:
1018
+ return pydantic.parse_obj_as(IntervalUsageAndPlan, _response.json()) # type: ignore
1019
+ if _response.status_code == 422:
1020
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1021
+ try:
1022
+ _response_json = _response.json()
1023
+ except JSONDecodeError:
1024
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1025
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1026
+
962
1027
  async def list_organization_users(self, organization_id: str) -> typing.List[UserOrganization]:
963
1028
  """
964
1029
  Get all users in an organization.
@@ -7,6 +7,7 @@ from json.decoder import JSONDecodeError
7
7
  from ...core.api_error import ApiError
8
8
  from ...core.client_wrapper import AsyncClientWrapper, SyncClientWrapper
9
9
  from ...core.jsonable_encoder import jsonable_encoder
10
+ from ...core.remove_none_from_dict import remove_none_from_dict
10
11
  from ...errors.unprocessable_entity_error import UnprocessableEntityError
11
12
  from ...types.http_validation_error import HttpValidationError
12
13
  from ...types.llama_parse_supported_file_extensions import LlamaParseSupportedFileExtensions
@@ -101,6 +102,8 @@ class ParsingClient:
101
102
  def upload_file(
102
103
  self,
103
104
  *,
105
+ project_id: typing.Optional[str] = None,
106
+ organization_id: typing.Optional[str] = None,
104
107
  language: typing.List[ParserLanguages],
105
108
  parsing_instruction: str,
106
109
  skip_diagonal_text: bool,
@@ -112,6 +115,7 @@ class ParsingClient:
112
115
  continuous_mode: bool,
113
116
  gpt_4_o_api_key: str,
114
117
  do_not_unroll_columns: bool,
118
+ guess_xlsx_sheet_name: bool,
115
119
  page_separator: str,
116
120
  bounding_box: str,
117
121
  target_pages: str,
@@ -126,18 +130,30 @@ class ParsingClient:
126
130
  disable_ocr: bool,
127
131
  annotate_links: bool,
128
132
  disable_reconstruction: bool,
133
+ disable_image_extraction: bool,
129
134
  input_s_3_path: str,
130
135
  output_s_3_path_prefix: str,
131
136
  azure_openai_deployment_name: str,
132
137
  azure_openai_endpoint: str,
133
138
  azure_openai_api_version: str,
134
139
  azure_openai_key: str,
140
+ auto_mode: bool,
141
+ auto_mode_trigger_on_regexp_in_page: str,
142
+ auto_mode_trigger_on_text_in_page: str,
143
+ auto_mode_trigger_on_table_in_page: bool,
144
+ auto_mode_trigger_on_image_in_page: bool,
135
145
  file: typing.Optional[str] = OMIT,
146
+ input_url: str,
147
+ http_proxy: str,
136
148
  ) -> ParsingJob:
137
149
  """
138
150
  Upload a file to s3 and create a job. return a job id
139
151
 
140
152
  Parameters:
153
+ - project_id: typing.Optional[str].
154
+
155
+ - organization_id: typing.Optional[str].
156
+
141
157
  - language: typing.List[ParserLanguages].
142
158
 
143
159
  - parsing_instruction: str.
@@ -160,6 +176,8 @@ class ParsingClient:
160
176
 
161
177
  - do_not_unroll_columns: bool.
162
178
 
179
+ - guess_xlsx_sheet_name: bool.
180
+
163
181
  - page_separator: str.
164
182
 
165
183
  - bounding_box: str.
@@ -188,6 +206,8 @@ class ParsingClient:
188
206
 
189
207
  - disable_reconstruction: bool.
190
208
 
209
+ - disable_image_extraction: bool.
210
+
191
211
  - input_s_3_path: str.
192
212
 
193
213
  - output_s_3_path_prefix: str.
@@ -200,7 +220,21 @@ class ParsingClient:
200
220
 
201
221
  - azure_openai_key: str.
202
222
 
223
+ - auto_mode: bool.
224
+
225
+ - auto_mode_trigger_on_regexp_in_page: str.
226
+
227
+ - auto_mode_trigger_on_text_in_page: str.
228
+
229
+ - auto_mode_trigger_on_table_in_page: bool.
230
+
231
+ - auto_mode_trigger_on_image_in_page: bool.
232
+
203
233
  - file: typing.Optional[str].
234
+
235
+ - input_url: str.
236
+
237
+ - http_proxy: str.
204
238
  """
205
239
  _request: typing.Dict[str, typing.Any] = {
206
240
  "language": language,
@@ -214,6 +248,7 @@ class ParsingClient:
214
248
  "continuous_mode": continuous_mode,
215
249
  "gpt4o_api_key": gpt_4_o_api_key,
216
250
  "do_not_unroll_columns": do_not_unroll_columns,
251
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
217
252
  "page_separator": page_separator,
218
253
  "bounding_box": bounding_box,
219
254
  "target_pages": target_pages,
@@ -228,18 +263,27 @@ class ParsingClient:
228
263
  "disable_ocr": disable_ocr,
229
264
  "annotate_links": annotate_links,
230
265
  "disable_reconstruction": disable_reconstruction,
266
+ "disable_image_extraction": disable_image_extraction,
231
267
  "input_s3_path": input_s_3_path,
232
268
  "output_s3_path_prefix": output_s_3_path_prefix,
233
269
  "azure_openai_deployment_name": azure_openai_deployment_name,
234
270
  "azure_openai_endpoint": azure_openai_endpoint,
235
271
  "azure_openai_api_version": azure_openai_api_version,
236
272
  "azure_openai_key": azure_openai_key,
273
+ "auto_mode": auto_mode,
274
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
275
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
276
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
277
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
278
+ "input_url": input_url,
279
+ "http_proxy": http_proxy,
237
280
  }
238
281
  if file is not OMIT:
239
282
  _request["file"] = file
240
283
  _response = self._client_wrapper.httpx_client.request(
241
284
  "POST",
242
285
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
286
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
243
287
  json=jsonable_encoder(_request),
244
288
  headers=self._client_wrapper.get_headers(),
245
289
  timeout=60,
@@ -256,6 +300,7 @@ class ParsingClient:
256
300
 
257
301
  def usage(self) -> ParsingUsage:
258
302
  """
303
+ DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
259
304
  Get parsing usage for user
260
305
 
261
306
  ---
@@ -412,6 +457,40 @@ class ParsingClient:
412
457
  raise ApiError(status_code=_response.status_code, body=_response.text)
413
458
  raise ApiError(status_code=_response.status_code, body=_response_json)
414
459
 
460
+ def get_job_raw_xlsx_result(self, job_id: str) -> typing.Any:
461
+ """
462
+ Get a job by id
463
+
464
+ Parameters:
465
+ - job_id: str.
466
+ ---
467
+ from llama_cloud.client import LlamaCloud
468
+
469
+ client = LlamaCloud(
470
+ token="YOUR_TOKEN",
471
+ )
472
+ client.parsing.get_job_raw_xlsx_result(
473
+ job_id="string",
474
+ )
475
+ """
476
+ _response = self._client_wrapper.httpx_client.request(
477
+ "GET",
478
+ urllib.parse.urljoin(
479
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/xlsx"
480
+ ),
481
+ headers=self._client_wrapper.get_headers(),
482
+ timeout=60,
483
+ )
484
+ if 200 <= _response.status_code < 300:
485
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
486
+ if _response.status_code == 422:
487
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
488
+ try:
489
+ _response_json = _response.json()
490
+ except JSONDecodeError:
491
+ raise ApiError(status_code=_response.status_code, body=_response.text)
492
+ raise ApiError(status_code=_response.status_code, body=_response_json)
493
+
415
494
  def get_job_result(self, job_id: str) -> ParsingJobMarkdownResult:
416
495
  """
417
496
  Get a job by id
@@ -682,6 +761,8 @@ class AsyncParsingClient:
682
761
  async def upload_file(
683
762
  self,
684
763
  *,
764
+ project_id: typing.Optional[str] = None,
765
+ organization_id: typing.Optional[str] = None,
685
766
  language: typing.List[ParserLanguages],
686
767
  parsing_instruction: str,
687
768
  skip_diagonal_text: bool,
@@ -693,6 +774,7 @@ class AsyncParsingClient:
693
774
  continuous_mode: bool,
694
775
  gpt_4_o_api_key: str,
695
776
  do_not_unroll_columns: bool,
777
+ guess_xlsx_sheet_name: bool,
696
778
  page_separator: str,
697
779
  bounding_box: str,
698
780
  target_pages: str,
@@ -707,18 +789,30 @@ class AsyncParsingClient:
707
789
  disable_ocr: bool,
708
790
  annotate_links: bool,
709
791
  disable_reconstruction: bool,
792
+ disable_image_extraction: bool,
710
793
  input_s_3_path: str,
711
794
  output_s_3_path_prefix: str,
712
795
  azure_openai_deployment_name: str,
713
796
  azure_openai_endpoint: str,
714
797
  azure_openai_api_version: str,
715
798
  azure_openai_key: str,
799
+ auto_mode: bool,
800
+ auto_mode_trigger_on_regexp_in_page: str,
801
+ auto_mode_trigger_on_text_in_page: str,
802
+ auto_mode_trigger_on_table_in_page: bool,
803
+ auto_mode_trigger_on_image_in_page: bool,
716
804
  file: typing.Optional[str] = OMIT,
805
+ input_url: str,
806
+ http_proxy: str,
717
807
  ) -> ParsingJob:
718
808
  """
719
809
  Upload a file to s3 and create a job. return a job id
720
810
 
721
811
  Parameters:
812
+ - project_id: typing.Optional[str].
813
+
814
+ - organization_id: typing.Optional[str].
815
+
722
816
  - language: typing.List[ParserLanguages].
723
817
 
724
818
  - parsing_instruction: str.
@@ -741,6 +835,8 @@ class AsyncParsingClient:
741
835
 
742
836
  - do_not_unroll_columns: bool.
743
837
 
838
+ - guess_xlsx_sheet_name: bool.
839
+
744
840
  - page_separator: str.
745
841
 
746
842
  - bounding_box: str.
@@ -769,6 +865,8 @@ class AsyncParsingClient:
769
865
 
770
866
  - disable_reconstruction: bool.
771
867
 
868
+ - disable_image_extraction: bool.
869
+
772
870
  - input_s_3_path: str.
773
871
 
774
872
  - output_s_3_path_prefix: str.
@@ -781,7 +879,21 @@ class AsyncParsingClient:
781
879
 
782
880
  - azure_openai_key: str.
783
881
 
882
+ - auto_mode: bool.
883
+
884
+ - auto_mode_trigger_on_regexp_in_page: str.
885
+
886
+ - auto_mode_trigger_on_text_in_page: str.
887
+
888
+ - auto_mode_trigger_on_table_in_page: bool.
889
+
890
+ - auto_mode_trigger_on_image_in_page: bool.
891
+
784
892
  - file: typing.Optional[str].
893
+
894
+ - input_url: str.
895
+
896
+ - http_proxy: str.
785
897
  """
786
898
  _request: typing.Dict[str, typing.Any] = {
787
899
  "language": language,
@@ -795,6 +907,7 @@ class AsyncParsingClient:
795
907
  "continuous_mode": continuous_mode,
796
908
  "gpt4o_api_key": gpt_4_o_api_key,
797
909
  "do_not_unroll_columns": do_not_unroll_columns,
910
+ "guess_xlsx_sheet_name": guess_xlsx_sheet_name,
798
911
  "page_separator": page_separator,
799
912
  "bounding_box": bounding_box,
800
913
  "target_pages": target_pages,
@@ -809,18 +922,27 @@ class AsyncParsingClient:
809
922
  "disable_ocr": disable_ocr,
810
923
  "annotate_links": annotate_links,
811
924
  "disable_reconstruction": disable_reconstruction,
925
+ "disable_image_extraction": disable_image_extraction,
812
926
  "input_s3_path": input_s_3_path,
813
927
  "output_s3_path_prefix": output_s_3_path_prefix,
814
928
  "azure_openai_deployment_name": azure_openai_deployment_name,
815
929
  "azure_openai_endpoint": azure_openai_endpoint,
816
930
  "azure_openai_api_version": azure_openai_api_version,
817
931
  "azure_openai_key": azure_openai_key,
932
+ "auto_mode": auto_mode,
933
+ "auto_mode_trigger_on_regexp_in_page": auto_mode_trigger_on_regexp_in_page,
934
+ "auto_mode_trigger_on_text_in_page": auto_mode_trigger_on_text_in_page,
935
+ "auto_mode_trigger_on_table_in_page": auto_mode_trigger_on_table_in_page,
936
+ "auto_mode_trigger_on_image_in_page": auto_mode_trigger_on_image_in_page,
937
+ "input_url": input_url,
938
+ "http_proxy": http_proxy,
818
939
  }
819
940
  if file is not OMIT:
820
941
  _request["file"] = file
821
942
  _response = await self._client_wrapper.httpx_client.request(
822
943
  "POST",
823
944
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/parsing/upload"),
945
+ params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
824
946
  json=jsonable_encoder(_request),
825
947
  headers=self._client_wrapper.get_headers(),
826
948
  timeout=60,
@@ -837,6 +959,7 @@ class AsyncParsingClient:
837
959
 
838
960
  async def usage(self) -> ParsingUsage:
839
961
  """
962
+ DEPRECATED: use either /organizations/{organization_id}/usage or /projects/{project_id}/usage instead
840
963
  Get parsing usage for user
841
964
 
842
965
  ---
@@ -993,6 +1116,40 @@ class AsyncParsingClient:
993
1116
  raise ApiError(status_code=_response.status_code, body=_response.text)
994
1117
  raise ApiError(status_code=_response.status_code, body=_response_json)
995
1118
 
1119
+ async def get_job_raw_xlsx_result(self, job_id: str) -> typing.Any:
1120
+ """
1121
+ Get a job by id
1122
+
1123
+ Parameters:
1124
+ - job_id: str.
1125
+ ---
1126
+ from llama_cloud.client import AsyncLlamaCloud
1127
+
1128
+ client = AsyncLlamaCloud(
1129
+ token="YOUR_TOKEN",
1130
+ )
1131
+ await client.parsing.get_job_raw_xlsx_result(
1132
+ job_id="string",
1133
+ )
1134
+ """
1135
+ _response = await self._client_wrapper.httpx_client.request(
1136
+ "GET",
1137
+ urllib.parse.urljoin(
1138
+ f"{self._client_wrapper.get_base_url()}/", f"api/v1/parsing/job/{job_id}/result/raw/xlsx"
1139
+ ),
1140
+ headers=self._client_wrapper.get_headers(),
1141
+ timeout=60,
1142
+ )
1143
+ if 200 <= _response.status_code < 300:
1144
+ return pydantic.parse_obj_as(typing.Any, _response.json()) # type: ignore
1145
+ if _response.status_code == 422:
1146
+ raise UnprocessableEntityError(pydantic.parse_obj_as(HttpValidationError, _response.json())) # type: ignore
1147
+ try:
1148
+ _response_json = _response.json()
1149
+ except JSONDecodeError:
1150
+ raise ApiError(status_code=_response.status_code, body=_response.text)
1151
+ raise ApiError(status_code=_response.status_code, body=_response_json)
1152
+
996
1153
  async def get_job_result(self, job_id: str) -> ParsingJobMarkdownResult:
997
1154
  """
998
1155
  Get a job by id