llama-cloud 0.1.38__py3-none-any.whl → 0.1.40__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of llama-cloud might be problematic. Click here for more details.

Files changed (39) hide show
  1. llama_cloud/__init__.py +12 -0
  2. llama_cloud/resources/admin/client.py +5 -5
  3. llama_cloud/resources/alpha/client.py +2 -8
  4. llama_cloud/resources/beta/client.py +30 -126
  5. llama_cloud/resources/chat_apps/client.py +8 -32
  6. llama_cloud/resources/classifier/client.py +8 -32
  7. llama_cloud/resources/data_sinks/client.py +8 -32
  8. llama_cloud/resources/data_sources/client.py +8 -32
  9. llama_cloud/resources/embedding_model_configs/client.py +12 -48
  10. llama_cloud/resources/files/client.py +42 -176
  11. llama_cloud/resources/jobs/client.py +2 -8
  12. llama_cloud/resources/llama_extract/client.py +40 -138
  13. llama_cloud/resources/organizations/client.py +4 -18
  14. llama_cloud/resources/parsing/client.py +12 -16
  15. llama_cloud/resources/pipelines/client.py +45 -32
  16. llama_cloud/resources/projects/client.py +18 -78
  17. llama_cloud/resources/reports/client.py +30 -126
  18. llama_cloud/resources/retrievers/client.py +12 -48
  19. llama_cloud/types/__init__.py +12 -0
  20. llama_cloud/types/extract_job_create.py +2 -0
  21. llama_cloud/types/extract_job_create_priority.py +29 -0
  22. llama_cloud/types/file.py +1 -1
  23. llama_cloud/types/job_names.py +0 -4
  24. llama_cloud/types/llama_extract_feature_availability.py +34 -0
  25. llama_cloud/types/llama_parse_parameters.py +1 -0
  26. llama_cloud/types/parse_job_config.py +1 -0
  27. llama_cloud/types/pipeline.py +4 -0
  28. llama_cloud/types/pipeline_create.py +2 -0
  29. llama_cloud/types/pipeline_file.py +4 -4
  30. llama_cloud/types/schema_generation_availability.py +33 -0
  31. llama_cloud/types/schema_generation_availability_status.py +17 -0
  32. llama_cloud/types/sparse_model_config.py +42 -0
  33. llama_cloud/types/sparse_model_type.py +33 -0
  34. llama_cloud/types/webhook_configuration.py +1 -0
  35. llama_cloud-0.1.40.dist-info/METADATA +106 -0
  36. {llama_cloud-0.1.38.dist-info → llama_cloud-0.1.40.dist-info}/RECORD +38 -32
  37. {llama_cloud-0.1.38.dist-info → llama_cloud-0.1.40.dist-info}/WHEEL +1 -1
  38. llama_cloud-0.1.38.dist-info/METADATA +0 -32
  39. {llama_cloud-0.1.38.dist-info → llama_cloud-0.1.40.dist-info}/LICENSE +0 -0
@@ -39,7 +39,6 @@ class RetrieversClient:
39
39
  name: typing.Optional[str] = None,
40
40
  project_id: typing.Optional[str] = None,
41
41
  organization_id: typing.Optional[str] = None,
42
- project_id: typing.Optional[str] = None,
43
42
  ) -> typing.List[Retriever]:
44
43
  """
45
44
  List Retrievers for a project.
@@ -50,8 +49,6 @@ class RetrieversClient:
50
49
  - project_id: typing.Optional[str].
51
50
 
52
51
  - organization_id: typing.Optional[str].
53
-
54
- - project_id: typing.Optional[str].
55
52
  ---
56
53
  from llama_cloud.client import LlamaCloud
57
54
 
@@ -64,7 +61,7 @@ class RetrieversClient:
64
61
  "GET",
65
62
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
66
63
  params=remove_none_from_dict({"name": name, "project_id": project_id, "organization_id": organization_id}),
67
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
64
+ headers=self._client_wrapper.get_headers(),
68
65
  timeout=60,
69
66
  )
70
67
  if 200 <= _response.status_code < 300:
@@ -83,7 +80,6 @@ class RetrieversClient:
83
80
  project_id: typing.Optional[str] = None,
84
81
  organization_id: typing.Optional[str] = None,
85
82
  request: RetrieverCreate,
86
- project_id: typing.Optional[str] = None,
87
83
  ) -> Retriever:
88
84
  """
89
85
  Create a new Retriever.
@@ -94,8 +90,6 @@ class RetrieversClient:
94
90
  - organization_id: typing.Optional[str].
95
91
 
96
92
  - request: RetrieverCreate.
97
-
98
- - project_id: typing.Optional[str].
99
93
  ---
100
94
  from llama_cloud import RetrieverCreate
101
95
  from llama_cloud.client import LlamaCloud
@@ -114,7 +108,7 @@ class RetrieversClient:
114
108
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
115
109
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
116
110
  json=jsonable_encoder(request),
117
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
111
+ headers=self._client_wrapper.get_headers(),
118
112
  timeout=60,
119
113
  )
120
114
  if 200 <= _response.status_code < 300:
@@ -133,7 +127,6 @@ class RetrieversClient:
133
127
  project_id: typing.Optional[str] = None,
134
128
  organization_id: typing.Optional[str] = None,
135
129
  request: RetrieverCreate,
136
- project_id: typing.Optional[str] = None,
137
130
  ) -> Retriever:
138
131
  """
139
132
  Upsert a new Retriever.
@@ -144,8 +137,6 @@ class RetrieversClient:
144
137
  - organization_id: typing.Optional[str].
145
138
 
146
139
  - request: RetrieverCreate.
147
-
148
- - project_id: typing.Optional[str].
149
140
  ---
150
141
  from llama_cloud import RetrieverCreate
151
142
  from llama_cloud.client import LlamaCloud
@@ -164,7 +155,7 @@ class RetrieversClient:
164
155
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
165
156
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
166
157
  json=jsonable_encoder(request),
167
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
158
+ headers=self._client_wrapper.get_headers(),
168
159
  timeout=60,
169
160
  )
170
161
  if 200 <= _response.status_code < 300:
@@ -183,7 +174,6 @@ class RetrieversClient:
183
174
  *,
184
175
  project_id: typing.Optional[str] = None,
185
176
  organization_id: typing.Optional[str] = None,
186
- project_id: typing.Optional[str] = None,
187
177
  ) -> Retriever:
188
178
  """
189
179
  Get a Retriever by ID.
@@ -194,8 +184,6 @@ class RetrieversClient:
194
184
  - project_id: typing.Optional[str].
195
185
 
196
186
  - organization_id: typing.Optional[str].
197
-
198
- - project_id: typing.Optional[str].
199
187
  ---
200
188
  from llama_cloud.client import LlamaCloud
201
189
 
@@ -210,7 +198,7 @@ class RetrieversClient:
210
198
  "GET",
211
199
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/retrievers/{retriever_id}"),
212
200
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
213
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
201
+ headers=self._client_wrapper.get_headers(),
214
202
  timeout=60,
215
203
  )
216
204
  if 200 <= _response.status_code < 300:
@@ -313,7 +301,6 @@ class RetrieversClient:
313
301
  rerank_top_n: typing.Optional[int] = OMIT,
314
302
  rerank_config: typing.Optional[ReRankConfig] = OMIT,
315
303
  query: str,
316
- project_id: typing.Optional[str] = None,
317
304
  ) -> CompositeRetrievalResult:
318
305
  """
319
306
  Retrieve data using a Retriever.
@@ -332,8 +319,6 @@ class RetrieversClient:
332
319
  - rerank_config: typing.Optional[ReRankConfig]. The rerank configuration for composite retrieval.
333
320
 
334
321
  - query: str. The query to retrieve against.
335
-
336
- - project_id: typing.Optional[str].
337
322
  ---
338
323
  from llama_cloud import CompositeRetrievalMode, ReRankConfig, ReRankerType
339
324
  from llama_cloud.client import LlamaCloud
@@ -364,7 +349,7 @@ class RetrieversClient:
364
349
  ),
365
350
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
366
351
  json=jsonable_encoder(_request),
367
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
352
+ headers=self._client_wrapper.get_headers(),
368
353
  timeout=60,
369
354
  )
370
355
  if 200 <= _response.status_code < 300:
@@ -387,7 +372,6 @@ class RetrieversClient:
387
372
  rerank_config: typing.Optional[ReRankConfig] = OMIT,
388
373
  query: str,
389
374
  pipelines: typing.Optional[typing.List[RetrieverPipeline]] = OMIT,
390
- project_id: typing.Optional[str] = None,
391
375
  ) -> CompositeRetrievalResult:
392
376
  """
393
377
  Retrieve data using specified pipelines without creating a persistent retriever.
@@ -406,8 +390,6 @@ class RetrieversClient:
406
390
  - query: str. The query to retrieve against.
407
391
 
408
392
  - pipelines: typing.Optional[typing.List[RetrieverPipeline]]. The pipelines to use for retrieval.
409
-
410
- - project_id: typing.Optional[str].
411
393
  ---
412
394
  from llama_cloud import CompositeRetrievalMode, ReRankConfig, ReRankerType
413
395
  from llama_cloud.client import LlamaCloud
@@ -437,7 +419,7 @@ class RetrieversClient:
437
419
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers/retrieve"),
438
420
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
439
421
  json=jsonable_encoder(_request),
440
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
422
+ headers=self._client_wrapper.get_headers(),
441
423
  timeout=60,
442
424
  )
443
425
  if 200 <= _response.status_code < 300:
@@ -461,7 +443,6 @@ class AsyncRetrieversClient:
461
443
  name: typing.Optional[str] = None,
462
444
  project_id: typing.Optional[str] = None,
463
445
  organization_id: typing.Optional[str] = None,
464
- project_id: typing.Optional[str] = None,
465
446
  ) -> typing.List[Retriever]:
466
447
  """
467
448
  List Retrievers for a project.
@@ -472,8 +453,6 @@ class AsyncRetrieversClient:
472
453
  - project_id: typing.Optional[str].
473
454
 
474
455
  - organization_id: typing.Optional[str].
475
-
476
- - project_id: typing.Optional[str].
477
456
  ---
478
457
  from llama_cloud.client import AsyncLlamaCloud
479
458
 
@@ -486,7 +465,7 @@ class AsyncRetrieversClient:
486
465
  "GET",
487
466
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
488
467
  params=remove_none_from_dict({"name": name, "project_id": project_id, "organization_id": organization_id}),
489
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
468
+ headers=self._client_wrapper.get_headers(),
490
469
  timeout=60,
491
470
  )
492
471
  if 200 <= _response.status_code < 300:
@@ -505,7 +484,6 @@ class AsyncRetrieversClient:
505
484
  project_id: typing.Optional[str] = None,
506
485
  organization_id: typing.Optional[str] = None,
507
486
  request: RetrieverCreate,
508
- project_id: typing.Optional[str] = None,
509
487
  ) -> Retriever:
510
488
  """
511
489
  Create a new Retriever.
@@ -516,8 +494,6 @@ class AsyncRetrieversClient:
516
494
  - organization_id: typing.Optional[str].
517
495
 
518
496
  - request: RetrieverCreate.
519
-
520
- - project_id: typing.Optional[str].
521
497
  ---
522
498
  from llama_cloud import RetrieverCreate
523
499
  from llama_cloud.client import AsyncLlamaCloud
@@ -536,7 +512,7 @@ class AsyncRetrieversClient:
536
512
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
537
513
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
538
514
  json=jsonable_encoder(request),
539
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
515
+ headers=self._client_wrapper.get_headers(),
540
516
  timeout=60,
541
517
  )
542
518
  if 200 <= _response.status_code < 300:
@@ -555,7 +531,6 @@ class AsyncRetrieversClient:
555
531
  project_id: typing.Optional[str] = None,
556
532
  organization_id: typing.Optional[str] = None,
557
533
  request: RetrieverCreate,
558
- project_id: typing.Optional[str] = None,
559
534
  ) -> Retriever:
560
535
  """
561
536
  Upsert a new Retriever.
@@ -566,8 +541,6 @@ class AsyncRetrieversClient:
566
541
  - organization_id: typing.Optional[str].
567
542
 
568
543
  - request: RetrieverCreate.
569
-
570
- - project_id: typing.Optional[str].
571
544
  ---
572
545
  from llama_cloud import RetrieverCreate
573
546
  from llama_cloud.client import AsyncLlamaCloud
@@ -586,7 +559,7 @@ class AsyncRetrieversClient:
586
559
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers"),
587
560
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
588
561
  json=jsonable_encoder(request),
589
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
562
+ headers=self._client_wrapper.get_headers(),
590
563
  timeout=60,
591
564
  )
592
565
  if 200 <= _response.status_code < 300:
@@ -605,7 +578,6 @@ class AsyncRetrieversClient:
605
578
  *,
606
579
  project_id: typing.Optional[str] = None,
607
580
  organization_id: typing.Optional[str] = None,
608
- project_id: typing.Optional[str] = None,
609
581
  ) -> Retriever:
610
582
  """
611
583
  Get a Retriever by ID.
@@ -616,8 +588,6 @@ class AsyncRetrieversClient:
616
588
  - project_id: typing.Optional[str].
617
589
 
618
590
  - organization_id: typing.Optional[str].
619
-
620
- - project_id: typing.Optional[str].
621
591
  ---
622
592
  from llama_cloud.client import AsyncLlamaCloud
623
593
 
@@ -632,7 +602,7 @@ class AsyncRetrieversClient:
632
602
  "GET",
633
603
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", f"api/v1/retrievers/{retriever_id}"),
634
604
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
635
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
605
+ headers=self._client_wrapper.get_headers(),
636
606
  timeout=60,
637
607
  )
638
608
  if 200 <= _response.status_code < 300:
@@ -735,7 +705,6 @@ class AsyncRetrieversClient:
735
705
  rerank_top_n: typing.Optional[int] = OMIT,
736
706
  rerank_config: typing.Optional[ReRankConfig] = OMIT,
737
707
  query: str,
738
- project_id: typing.Optional[str] = None,
739
708
  ) -> CompositeRetrievalResult:
740
709
  """
741
710
  Retrieve data using a Retriever.
@@ -754,8 +723,6 @@ class AsyncRetrieversClient:
754
723
  - rerank_config: typing.Optional[ReRankConfig]. The rerank configuration for composite retrieval.
755
724
 
756
725
  - query: str. The query to retrieve against.
757
-
758
- - project_id: typing.Optional[str].
759
726
  ---
760
727
  from llama_cloud import CompositeRetrievalMode, ReRankConfig, ReRankerType
761
728
  from llama_cloud.client import AsyncLlamaCloud
@@ -786,7 +753,7 @@ class AsyncRetrieversClient:
786
753
  ),
787
754
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
788
755
  json=jsonable_encoder(_request),
789
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
756
+ headers=self._client_wrapper.get_headers(),
790
757
  timeout=60,
791
758
  )
792
759
  if 200 <= _response.status_code < 300:
@@ -809,7 +776,6 @@ class AsyncRetrieversClient:
809
776
  rerank_config: typing.Optional[ReRankConfig] = OMIT,
810
777
  query: str,
811
778
  pipelines: typing.Optional[typing.List[RetrieverPipeline]] = OMIT,
812
- project_id: typing.Optional[str] = None,
813
779
  ) -> CompositeRetrievalResult:
814
780
  """
815
781
  Retrieve data using specified pipelines without creating a persistent retriever.
@@ -828,8 +794,6 @@ class AsyncRetrieversClient:
828
794
  - query: str. The query to retrieve against.
829
795
 
830
796
  - pipelines: typing.Optional[typing.List[RetrieverPipeline]]. The pipelines to use for retrieval.
831
-
832
- - project_id: typing.Optional[str].
833
797
  ---
834
798
  from llama_cloud import CompositeRetrievalMode, ReRankConfig, ReRankerType
835
799
  from llama_cloud.client import AsyncLlamaCloud
@@ -859,7 +823,7 @@ class AsyncRetrieversClient:
859
823
  urllib.parse.urljoin(f"{self._client_wrapper.get_base_url()}/", "api/v1/retrievers/retrieve"),
860
824
  params=remove_none_from_dict({"project_id": project_id, "organization_id": organization_id}),
861
825
  json=jsonable_encoder(_request),
862
- headers=remove_none_from_dict({**self._client_wrapper.get_headers(), "Project-Id": project_id}),
826
+ headers=self._client_wrapper.get_headers(),
863
827
  timeout=60,
864
828
  )
865
829
  if 200 <= _response.status_code < 300:
@@ -125,6 +125,7 @@ from .extract_job import ExtractJob
125
125
  from .extract_job_create import ExtractJobCreate
126
126
  from .extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
127
127
  from .extract_job_create_data_schema_override_zero_value import ExtractJobCreateDataSchemaOverrideZeroValue
128
+ from .extract_job_create_priority import ExtractJobCreatePriority
128
129
  from .extract_mode import ExtractMode
129
130
  from .extract_models import ExtractModels
130
131
  from .extract_resultset import ExtractResultset
@@ -199,6 +200,7 @@ from .job_record_with_usage_metrics import JobRecordWithUsageMetrics
199
200
  from .l_lama_parse_transform_config import LLamaParseTransformConfig
200
201
  from .legacy_parse_job_config import LegacyParseJobConfig
201
202
  from .license_info_response import LicenseInfoResponse
203
+ from .llama_extract_feature_availability import LlamaExtractFeatureAvailability
202
204
  from .llama_extract_mode_availability import LlamaExtractModeAvailability
203
205
  from .llama_extract_mode_availability_status import LlamaExtractModeAvailabilityStatus
204
206
  from .llama_extract_settings import LlamaExtractSettings
@@ -365,9 +367,13 @@ from .retriever import Retriever
365
367
  from .retriever_create import RetrieverCreate
366
368
  from .retriever_pipeline import RetrieverPipeline
367
369
  from .role import Role
370
+ from .schema_generation_availability import SchemaGenerationAvailability
371
+ from .schema_generation_availability_status import SchemaGenerationAvailabilityStatus
368
372
  from .schema_relax_mode import SchemaRelaxMode
369
373
  from .semantic_chunking_config import SemanticChunkingConfig
370
374
  from .sentence_chunking_config import SentenceChunkingConfig
375
+ from .sparse_model_config import SparseModelConfig
376
+ from .sparse_model_type import SparseModelType
371
377
  from .src_app_schema_chat_chat_message import SrcAppSchemaChatChatMessage
372
378
  from .status_enum import StatusEnum
373
379
  from .struct_mode import StructMode
@@ -516,6 +522,7 @@ __all__ = [
516
522
  "ExtractJobCreate",
517
523
  "ExtractJobCreateDataSchemaOverride",
518
524
  "ExtractJobCreateDataSchemaOverrideZeroValue",
525
+ "ExtractJobCreatePriority",
519
526
  "ExtractMode",
520
527
  "ExtractModels",
521
528
  "ExtractResultset",
@@ -588,6 +595,7 @@ __all__ = [
588
595
  "LLamaParseTransformConfig",
589
596
  "LegacyParseJobConfig",
590
597
  "LicenseInfoResponse",
598
+ "LlamaExtractFeatureAvailability",
591
599
  "LlamaExtractModeAvailability",
592
600
  "LlamaExtractModeAvailabilityStatus",
593
601
  "LlamaExtractSettings",
@@ -742,9 +750,13 @@ __all__ = [
742
750
  "RetrieverCreate",
743
751
  "RetrieverPipeline",
744
752
  "Role",
753
+ "SchemaGenerationAvailability",
754
+ "SchemaGenerationAvailabilityStatus",
745
755
  "SchemaRelaxMode",
746
756
  "SemanticChunkingConfig",
747
757
  "SentenceChunkingConfig",
758
+ "SparseModelConfig",
759
+ "SparseModelType",
748
760
  "SrcAppSchemaChatChatMessage",
749
761
  "StatusEnum",
750
762
  "StructMode",
@@ -6,6 +6,7 @@ import typing
6
6
  from ..core.datetime_utils import serialize_datetime
7
7
  from .extract_config import ExtractConfig
8
8
  from .extract_job_create_data_schema_override import ExtractJobCreateDataSchemaOverride
9
+ from .extract_job_create_priority import ExtractJobCreatePriority
9
10
  from .webhook_configuration import WebhookConfiguration
10
11
 
11
12
  try:
@@ -22,6 +23,7 @@ class ExtractJobCreate(pydantic.BaseModel):
22
23
  Schema for creating an extraction job.
23
24
  """
24
25
 
26
+ priority: typing.Optional[ExtractJobCreatePriority]
25
27
  webhook_configurations: typing.Optional[typing.List[WebhookConfiguration]]
26
28
  extraction_agent_id: str = pydantic.Field(description="The id of the extraction agent")
27
29
  file_id: str = pydantic.Field(description="The id of the file")
@@ -0,0 +1,29 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class ExtractJobCreatePriority(str, enum.Enum):
10
+ LOW = "low"
11
+ MEDIUM = "medium"
12
+ HIGH = "high"
13
+ CRITICAL = "critical"
14
+
15
+ def visit(
16
+ self,
17
+ low: typing.Callable[[], T_Result],
18
+ medium: typing.Callable[[], T_Result],
19
+ high: typing.Callable[[], T_Result],
20
+ critical: typing.Callable[[], T_Result],
21
+ ) -> T_Result:
22
+ if self is ExtractJobCreatePriority.LOW:
23
+ return low()
24
+ if self is ExtractJobCreatePriority.MEDIUM:
25
+ return medium()
26
+ if self is ExtractJobCreatePriority.HIGH:
27
+ return high()
28
+ if self is ExtractJobCreatePriority.CRITICAL:
29
+ return critical()
llama_cloud/types/file.py CHANGED
@@ -25,7 +25,7 @@ class File(pydantic.BaseModel):
25
25
  created_at: typing.Optional[dt.datetime]
26
26
  updated_at: typing.Optional[dt.datetime]
27
27
  name: str
28
- external_file_id: str = pydantic.Field(description="The ID of the file in the external system")
28
+ external_file_id: typing.Optional[str]
29
29
  file_size: typing.Optional[int]
30
30
  file_type: typing.Optional[str]
31
31
  project_id: str = pydantic.Field(description="The ID of the project that the file belongs to")
@@ -15,7 +15,6 @@ class JobNames(str, enum.Enum):
15
15
  LOAD_FILES_JOB = "load_files_job"
16
16
  PLAYGROUND_JOB = "playground_job"
17
17
  PIPELINE_MANAGED_INGESTION_JOB = "pipeline_managed_ingestion_job"
18
- DATA_SOURCE_MANAGED_INGESTION_JOB = "data_source_managed_ingestion_job"
19
18
  DATA_SOURCE_UPDATE_DISPATCHER_JOB = "data_source_update_dispatcher_job"
20
19
  PIPELINE_FILE_UPDATE_DISPATCHER_JOB = "pipeline_file_update_dispatcher_job"
21
20
  PIPELINE_FILE_UPDATER_JOB = "pipeline_file_updater_job"
@@ -35,7 +34,6 @@ class JobNames(str, enum.Enum):
35
34
  load_files_job: typing.Callable[[], T_Result],
36
35
  playground_job: typing.Callable[[], T_Result],
37
36
  pipeline_managed_ingestion_job: typing.Callable[[], T_Result],
38
- data_source_managed_ingestion_job: typing.Callable[[], T_Result],
39
37
  data_source_update_dispatcher_job: typing.Callable[[], T_Result],
40
38
  pipeline_file_update_dispatcher_job: typing.Callable[[], T_Result],
41
39
  pipeline_file_updater_job: typing.Callable[[], T_Result],
@@ -57,8 +55,6 @@ class JobNames(str, enum.Enum):
57
55
  return playground_job()
58
56
  if self is JobNames.PIPELINE_MANAGED_INGESTION_JOB:
59
57
  return pipeline_managed_ingestion_job()
60
- if self is JobNames.DATA_SOURCE_MANAGED_INGESTION_JOB:
61
- return data_source_managed_ingestion_job()
62
58
  if self is JobNames.DATA_SOURCE_UPDATE_DISPATCHER_JOB:
63
59
  return data_source_update_dispatcher_job()
64
60
  if self is JobNames.PIPELINE_FILE_UPDATE_DISPATCHER_JOB:
@@ -0,0 +1,34 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .llama_extract_mode_availability import LlamaExtractModeAvailability
8
+ from .schema_generation_availability import SchemaGenerationAvailability
9
+
10
+ try:
11
+ import pydantic
12
+ if pydantic.__version__.startswith("1."):
13
+ raise ImportError
14
+ import pydantic.v1 as pydantic # type: ignore
15
+ except ImportError:
16
+ import pydantic # type: ignore
17
+
18
+
19
+ class LlamaExtractFeatureAvailability(pydantic.BaseModel):
20
+ schema_generation: SchemaGenerationAvailability
21
+ available_modes: typing.List[LlamaExtractModeAvailability]
22
+
23
+ def json(self, **kwargs: typing.Any) -> str:
24
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
25
+ return super().json(**kwargs_with_defaults)
26
+
27
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
28
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
29
+ return super().dict(**kwargs_with_defaults)
30
+
31
+ class Config:
32
+ frozen = True
33
+ smart_union = True
34
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -49,6 +49,7 @@ class LlamaParseParameters(pydantic.BaseModel):
49
49
  extract_layout: typing.Optional[bool]
50
50
  high_res_ocr: typing.Optional[bool]
51
51
  html_make_all_elements_visible: typing.Optional[bool]
52
+ layout_aware: typing.Optional[bool]
52
53
  html_remove_navigation_elements: typing.Optional[bool]
53
54
  html_remove_fixed_elements: typing.Optional[bool]
54
55
  guess_xlsx_sheet_name: typing.Optional[bool]
@@ -51,6 +51,7 @@ class ParseJobConfig(pydantic.BaseModel):
51
51
  extract_layout: typing.Optional[bool]
52
52
  high_res_ocr: typing.Optional[bool]
53
53
  html_make_all_elements_visible: typing.Optional[bool]
54
+ layout_aware: typing.Optional[bool]
54
55
  html_remove_navigation_elements: typing.Optional[bool]
55
56
  html_remove_fixed_elements: typing.Optional[bool]
56
57
  guess_xlsx_sheet_name: typing.Optional[bool]
@@ -5,6 +5,7 @@ import typing
5
5
 
6
6
  from ..core.datetime_utils import serialize_datetime
7
7
  from .data_sink import DataSink
8
+ from .embedding_model_config import EmbeddingModelConfig
8
9
  from .eval_execution_params import EvalExecutionParams
9
10
  from .llama_parse_parameters import LlamaParseParameters
10
11
  from .pipeline_configuration_hashes import PipelineConfigurationHashes
@@ -14,6 +15,7 @@ from .pipeline_status import PipelineStatus
14
15
  from .pipeline_transform_config import PipelineTransformConfig
15
16
  from .pipeline_type import PipelineType
16
17
  from .preset_retrieval_params import PresetRetrievalParams
18
+ from .sparse_model_config import SparseModelConfig
17
19
 
18
20
  try:
19
21
  import pydantic
@@ -35,11 +37,13 @@ class Pipeline(pydantic.BaseModel):
35
37
  name: str
36
38
  project_id: str
37
39
  embedding_model_config_id: typing.Optional[str]
40
+ embedding_model_config: typing.Optional[EmbeddingModelConfig]
38
41
  pipeline_type: typing.Optional[PipelineType] = pydantic.Field(
39
42
  description="Type of pipeline. Either PLAYGROUND or MANAGED."
40
43
  )
41
44
  managed_pipeline_id: typing.Optional[str]
42
45
  embedding_config: PipelineEmbeddingConfig
46
+ sparse_model_config: typing.Optional[SparseModelConfig]
43
47
  config_hash: typing.Optional[PipelineConfigurationHashes]
44
48
  transform_config: typing.Optional[PipelineTransformConfig] = pydantic.Field(
45
49
  description="Configuration for the transformation."
@@ -12,6 +12,7 @@ from .pipeline_create_transform_config import PipelineCreateTransformConfig
12
12
  from .pipeline_metadata_config import PipelineMetadataConfig
13
13
  from .pipeline_type import PipelineType
14
14
  from .preset_retrieval_params import PresetRetrievalParams
15
+ from .sparse_model_config import SparseModelConfig
15
16
 
16
17
  try:
17
18
  import pydantic
@@ -40,6 +41,7 @@ class PipelineCreate(pydantic.BaseModel):
40
41
  eval_parameters: typing.Optional[EvalExecutionParams] = pydantic.Field(
41
42
  description="Eval parameters for the pipeline."
42
43
  )
44
+ sparse_model_config: typing.Optional[SparseModelConfig]
43
45
  llama_parse_parameters: typing.Optional[LlamaParseParameters]
44
46
  status: typing.Optional[str]
45
47
  metadata_config: typing.Optional[PipelineMetadataConfig]
@@ -31,14 +31,14 @@ class PipelineFile(pydantic.BaseModel):
31
31
  external_file_id: typing.Optional[str]
32
32
  file_size: typing.Optional[int]
33
33
  file_type: typing.Optional[str]
34
- project_id: str = pydantic.Field(description="The ID of the project that the file belongs to")
34
+ project_id: typing.Optional[str]
35
35
  last_modified_at: typing.Optional[dt.datetime]
36
- resource_info: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileResourceInfoValue]]]
37
- permission_info: typing.Optional[typing.Dict[str, typing.Optional[PipelineFilePermissionInfoValue]]]
38
- data_source_id: typing.Optional[str]
39
36
  file_id: typing.Optional[str]
40
37
  pipeline_id: str = pydantic.Field(description="The ID of the pipeline that the file is associated with")
38
+ resource_info: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileResourceInfoValue]]]
39
+ permission_info: typing.Optional[typing.Dict[str, typing.Optional[PipelineFilePermissionInfoValue]]]
41
40
  custom_metadata: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileCustomMetadataValue]]]
41
+ data_source_id: typing.Optional[str]
42
42
  config_hash: typing.Optional[typing.Dict[str, typing.Optional[PipelineFileConfigHashValue]]]
43
43
  indexed_page_count: typing.Optional[int]
44
44
  status: typing.Optional[PipelineFileStatus]
@@ -0,0 +1,33 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .schema_generation_availability_status import SchemaGenerationAvailabilityStatus
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class SchemaGenerationAvailability(pydantic.BaseModel):
19
+ model: str
20
+ status: SchemaGenerationAvailabilityStatus
21
+
22
+ def json(self, **kwargs: typing.Any) -> str:
23
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
24
+ return super().json(**kwargs_with_defaults)
25
+
26
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
27
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
28
+ return super().dict(**kwargs_with_defaults)
29
+
30
+ class Config:
31
+ frozen = True
32
+ smart_union = True
33
+ json_encoders = {dt.datetime: serialize_datetime}
@@ -0,0 +1,17 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import enum
4
+ import typing
5
+
6
+ T_Result = typing.TypeVar("T_Result")
7
+
8
+
9
+ class SchemaGenerationAvailabilityStatus(str, enum.Enum):
10
+ AVAILABLE = "available"
11
+ UNAVAILABLE = "unavailable"
12
+
13
+ def visit(self, available: typing.Callable[[], T_Result], unavailable: typing.Callable[[], T_Result]) -> T_Result:
14
+ if self is SchemaGenerationAvailabilityStatus.AVAILABLE:
15
+ return available()
16
+ if self is SchemaGenerationAvailabilityStatus.UNAVAILABLE:
17
+ return unavailable()
@@ -0,0 +1,42 @@
1
+ # This file was auto-generated by Fern from our API Definition.
2
+
3
+ import datetime as dt
4
+ import typing
5
+
6
+ from ..core.datetime_utils import serialize_datetime
7
+ from .sparse_model_type import SparseModelType
8
+
9
+ try:
10
+ import pydantic
11
+ if pydantic.__version__.startswith("1."):
12
+ raise ImportError
13
+ import pydantic.v1 as pydantic # type: ignore
14
+ except ImportError:
15
+ import pydantic # type: ignore
16
+
17
+
18
+ class SparseModelConfig(pydantic.BaseModel):
19
+ """
20
+ Configuration for sparse embedding models used in hybrid search.
21
+
22
+ This allows users to choose between Splade and BM25 models for
23
+ sparse retrieval in managed data sinks.
24
+ """
25
+
26
+ model_type: typing.Optional[SparseModelType] = pydantic.Field(
27
+ description="The sparse model type to use. 'auto' selects based on deployment mode (BYOC uses term frequency, Cloud uses Splade), 'splade' uses HuggingFace Splade model, 'bm25' uses Qdrant's FastEmbed BM25 model."
28
+ )
29
+ class_name: typing.Optional[str]
30
+
31
+ def json(self, **kwargs: typing.Any) -> str:
32
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
33
+ return super().json(**kwargs_with_defaults)
34
+
35
+ def dict(self, **kwargs: typing.Any) -> typing.Dict[str, typing.Any]:
36
+ kwargs_with_defaults: typing.Any = {"by_alias": True, "exclude_unset": True, **kwargs}
37
+ return super().dict(**kwargs_with_defaults)
38
+
39
+ class Config:
40
+ frozen = True
41
+ smart_union = True
42
+ json_encoders = {dt.datetime: serialize_datetime}