landingai-ade 0.11.0__py3-none-any.whl → 0.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
landingai_ade/_client.py CHANGED
@@ -41,6 +41,7 @@ from ._response import (
41
41
  async_to_raw_response_wrapper,
42
42
  async_to_streamed_response_wrapper,
43
43
  )
44
+ from .resources import parse_jobs
44
45
  from ._streaming import Stream as Stream, AsyncStream as AsyncStream
45
46
  from ._exceptions import APIStatusError, LandingAiadeError
46
47
  from ._base_client import (
@@ -49,6 +50,7 @@ from ._base_client import (
49
50
  AsyncAPIClient,
50
51
  make_request_options,
51
52
  )
53
+ from .lib.url_utils import convert_url_to_file_if_local
52
54
  from .types.parse_response import ParseResponse
53
55
  from .types.extract_response import ExtractResponse
54
56
 
@@ -73,6 +75,7 @@ ENVIRONMENTS: Dict[str, str] = {
73
75
 
74
76
 
75
77
  class LandingAIADE(SyncAPIClient):
78
+ parse_jobs: parse_jobs.ParseJobsResource
76
79
  with_raw_response: LandingAIADEWithRawResponse
77
80
  with_streaming_response: LandingAIADEWithStreamedResponse
78
81
 
@@ -154,6 +157,7 @@ class LandingAIADE(SyncAPIClient):
154
157
  _strict_response_validation=_strict_response_validation,
155
158
  )
156
159
 
160
+ self.parse_jobs = parse_jobs.ParseJobsResource(self)
157
161
  self.with_raw_response = LandingAIADEWithRawResponse(self)
158
162
  self.with_streaming_response = LandingAIADEWithStreamedResponse(self)
159
163
 
@@ -259,11 +263,12 @@ class LandingAIADE(SyncAPIClient):
259
263
  are extracted from the Markdown. The schema must be a valid JSON object and will
260
264
  be validated before processing the document.
261
265
 
262
- markdown: The Markdown file to extract data from.
266
+ markdown: The Markdown file or Markdown content to extract data from.
263
267
 
264
268
  markdown_url: The URL to the Markdown file to extract data from.
265
269
 
266
- model: The version of the model to use for extraction.
270
+ model: The version of the model to use for extraction. Use `extract-latest` to use the
271
+ latest version.
267
272
 
268
273
  extra_headers: Send extra headers
269
274
 
@@ -273,6 +278,9 @@ class LandingAIADE(SyncAPIClient):
273
278
 
274
279
  timeout: Override the client-level default timeout for this request, in seconds
275
280
  """
281
+ # Convert local file paths to file parameters
282
+ markdown, markdown_url = convert_url_to_file_if_local(markdown, markdown_url)
283
+
276
284
  body = deepcopy_minimal(
277
285
  {
278
286
  "schema": schema,
@@ -318,24 +326,23 @@ class LandingAIADE(SyncAPIClient):
318
326
  timeout: float | httpx.Timeout | None | NotGiven = not_given,
319
327
  ) -> ParseResponse:
320
328
  """
321
- Parse a document.
329
+ Parse a document or spreadsheet.
322
330
 
323
- This endpoint parses documents and structured Markdown, chunks, and metadata.
331
+ This endpoint parses documents (PDF, images) and spreadsheets (XLSX, CSV) into
332
+ structured Markdown, chunks, and metadata.
324
333
 
325
334
  For EU users, use this endpoint:
326
335
 
327
336
  `https://api.va.eu-west-1.landing.ai/v1/ade/parse`.
328
337
 
329
338
  Args:
330
- document: A file to be parsed. The file can be a PDF (50 pages max) or an image (50MB).
331
- See the list of supported file types here
332
- (https://docs.landing.ai/ade/ade-file-types). Either this parameter or the
333
- document_url parameter must be provided.
339
+ document: A file to be parsed. The file can be a PDF or an image. See the list of
340
+ supported file types here: https://docs.landing.ai/ade/ade-file-types. Either
341
+ this parameter or the `document_url` parameter must be provided.
334
342
 
335
- document_url: The URL to the file to be parsed. The file can be a PDF (50 pages max) or an
336
- image (50MB). See the list of supported file types here
337
- (https://docs.landing.ai/ade/ade-file-types). Either this parameter or the
338
- document parameter must be provided.
343
+ document_url: The URL to the file to be parsed. The file can be a PDF or an image. See the
344
+ list of supported file types here: https://docs.landing.ai/ade/ade-file-types.
345
+ Either this parameter or the `document` parameter must be provided.
339
346
 
340
347
  model: The version of the model to use for parsing.
341
348
 
@@ -351,6 +358,9 @@ class LandingAIADE(SyncAPIClient):
351
358
 
352
359
  timeout: Override the client-level default timeout for this request, in seconds
353
360
  """
361
+ # Convert local file paths to file parameters
362
+ document, document_url = convert_url_to_file_if_local(document, document_url)
363
+
354
364
  body = deepcopy_minimal(
355
365
  {
356
366
  "document": document,
@@ -416,6 +426,7 @@ class LandingAIADE(SyncAPIClient):
416
426
 
417
427
 
418
428
  class AsyncLandingAIADE(AsyncAPIClient):
429
+ parse_jobs: parse_jobs.AsyncParseJobsResource
419
430
  with_raw_response: AsyncLandingAIADEWithRawResponse
420
431
  with_streaming_response: AsyncLandingAIADEWithStreamedResponse
421
432
 
@@ -497,6 +508,7 @@ class AsyncLandingAIADE(AsyncAPIClient):
497
508
  _strict_response_validation=_strict_response_validation,
498
509
  )
499
510
 
511
+ self.parse_jobs = parse_jobs.AsyncParseJobsResource(self)
500
512
  self.with_raw_response = AsyncLandingAIADEWithRawResponse(self)
501
513
  self.with_streaming_response = AsyncLandingAIADEWithStreamedResponse(self)
502
514
 
@@ -602,11 +614,12 @@ class AsyncLandingAIADE(AsyncAPIClient):
602
614
  are extracted from the Markdown. The schema must be a valid JSON object and will
603
615
  be validated before processing the document.
604
616
 
605
- markdown: The Markdown file to extract data from.
617
+ markdown: The Markdown file or Markdown content to extract data from.
606
618
 
607
619
  markdown_url: The URL to the Markdown file to extract data from.
608
620
 
609
- model: The version of the model to use for extraction.
621
+ model: The version of the model to use for extraction. Use `extract-latest` to use the
622
+ latest version.
610
623
 
611
624
  extra_headers: Send extra headers
612
625
 
@@ -616,6 +629,9 @@ class AsyncLandingAIADE(AsyncAPIClient):
616
629
 
617
630
  timeout: Override the client-level default timeout for this request, in seconds
618
631
  """
632
+ # Convert local file paths to file parameters
633
+ markdown, markdown_url = convert_url_to_file_if_local(markdown, markdown_url)
634
+
619
635
  body = deepcopy_minimal(
620
636
  {
621
637
  "schema": schema,
@@ -661,24 +677,23 @@ class AsyncLandingAIADE(AsyncAPIClient):
661
677
  timeout: float | httpx.Timeout | None | NotGiven = not_given,
662
678
  ) -> ParseResponse:
663
679
  """
664
- Parse a document.
680
+ Parse a document or spreadsheet.
665
681
 
666
- This endpoint parses documents and structured Markdown, chunks, and metadata.
682
+ This endpoint parses documents (PDF, images) and spreadsheets (XLSX, CSV) into
683
+ structured Markdown, chunks, and metadata.
667
684
 
668
685
  For EU users, use this endpoint:
669
686
 
670
687
  `https://api.va.eu-west-1.landing.ai/v1/ade/parse`.
671
688
 
672
689
  Args:
673
- document: A file to be parsed. The file can be a PDF (50 pages max) or an image (50MB).
674
- See the list of supported file types here
675
- (https://docs.landing.ai/ade/ade-file-types). Either this parameter or the
676
- document_url parameter must be provided.
690
+ document: A file to be parsed. The file can be a PDF or an image. See the list of
691
+ supported file types here: https://docs.landing.ai/ade/ade-file-types. Either
692
+ this parameter or the `document_url` parameter must be provided.
677
693
 
678
- document_url: The URL to the file to be parsed. The file can be a PDF (50 pages max) or an
679
- image (50MB). See the list of supported file types here
680
- (https://docs.landing.ai/ade/ade-file-types). Either this parameter or the
681
- document parameter must be provided.
694
+ document_url: The URL to the file to be parsed. The file can be a PDF or an image. See the
695
+ list of supported file types here: https://docs.landing.ai/ade/ade-file-types.
696
+ Either this parameter or the `document` parameter must be provided.
682
697
 
683
698
  model: The version of the model to use for parsing.
684
699
 
@@ -694,6 +709,9 @@ class AsyncLandingAIADE(AsyncAPIClient):
694
709
 
695
710
  timeout: Override the client-level default timeout for this request, in seconds
696
711
  """
712
+ # Convert local file paths to file parameters
713
+ document, document_url = convert_url_to_file_if_local(document, document_url)
714
+
697
715
  body = deepcopy_minimal(
698
716
  {
699
717
  "document": document,
@@ -760,6 +778,8 @@ class AsyncLandingAIADE(AsyncAPIClient):
760
778
 
761
779
  class LandingAIADEWithRawResponse:
762
780
  def __init__(self, client: LandingAIADE) -> None:
781
+ self.parse_jobs = parse_jobs.ParseJobsResourceWithRawResponse(client.parse_jobs)
782
+
763
783
  self.extract = to_raw_response_wrapper(
764
784
  client.extract,
765
785
  )
@@ -770,6 +790,8 @@ class LandingAIADEWithRawResponse:
770
790
 
771
791
  class AsyncLandingAIADEWithRawResponse:
772
792
  def __init__(self, client: AsyncLandingAIADE) -> None:
793
+ self.parse_jobs = parse_jobs.AsyncParseJobsResourceWithRawResponse(client.parse_jobs)
794
+
773
795
  self.extract = async_to_raw_response_wrapper(
774
796
  client.extract,
775
797
  )
@@ -780,6 +802,8 @@ class AsyncLandingAIADEWithRawResponse:
780
802
 
781
803
  class LandingAIADEWithStreamedResponse:
782
804
  def __init__(self, client: LandingAIADE) -> None:
805
+ self.parse_jobs = parse_jobs.ParseJobsResourceWithStreamingResponse(client.parse_jobs)
806
+
783
807
  self.extract = to_streamed_response_wrapper(
784
808
  client.extract,
785
809
  )
@@ -790,6 +814,8 @@ class LandingAIADEWithStreamedResponse:
790
814
 
791
815
  class AsyncLandingAIADEWithStreamedResponse:
792
816
  def __init__(self, client: AsyncLandingAIADE) -> None:
817
+ self.parse_jobs = parse_jobs.AsyncParseJobsResourceWithStreamingResponse(client.parse_jobs)
818
+
793
819
  self.extract = async_to_streamed_response_wrapper(
794
820
  client.extract,
795
821
  )
@@ -5,8 +5,8 @@ import httpx
5
5
  RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
6
6
  OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
7
7
 
8
- # default timeout is 1 minute
9
- DEFAULT_TIMEOUT = httpx.Timeout(timeout=60, connect=5.0)
8
+ # default timeout is 8 minutes
9
+ DEFAULT_TIMEOUT = httpx.Timeout(timeout=480, connect=5.0)
10
10
  DEFAULT_MAX_RETRIES = 2
11
11
  DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=100, max_keepalive_connections=20)
12
12
 
landingai_ade/_files.py CHANGED
@@ -26,7 +26,7 @@ def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
26
26
 
27
27
  def is_file_content(obj: object) -> TypeGuard[FileContent]:
28
28
  return (
29
- isinstance(obj, bytes) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
29
+ isinstance(obj, bytes) or isinstance(obj, str) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
30
30
  )
31
31
 
32
32
 
@@ -66,6 +66,9 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
66
66
  path = pathlib.Path(file)
67
67
  return (path.name, path.read_bytes())
68
68
 
69
+ if isinstance(file, str):
70
+ return file.encode('utf-8')
71
+
69
72
  return file
70
73
 
71
74
  if is_tuple_t(file):
@@ -77,6 +80,8 @@ def _transform_file(file: FileTypes) -> HttpxFileTypes:
77
80
  def read_file_content(file: FileContent) -> HttpxFileContent:
78
81
  if isinstance(file, os.PathLike):
79
82
  return pathlib.Path(file).read_bytes()
83
+ if isinstance(file, str):
84
+ return file.encode('utf-8')
80
85
  return file
81
86
 
82
87
 
@@ -108,6 +113,9 @@ async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
108
113
  path = anyio.Path(file)
109
114
  return (path.name, await path.read_bytes())
110
115
 
116
+ if isinstance(file, str):
117
+ return file.encode('utf-8')
118
+
111
119
  return file
112
120
 
113
121
  if is_tuple_t(file):
@@ -120,4 +128,7 @@ async def async_read_file_content(file: FileContent) -> HttpxFileContent:
120
128
  if isinstance(file, os.PathLike):
121
129
  return await anyio.Path(file).read_bytes()
122
130
 
131
+ if isinstance(file, str):
132
+ return file.encode('utf-8')
133
+
123
134
  return file
landingai_ade/_models.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import os
4
4
  import inspect
5
+ import weakref
5
6
  from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
6
7
  from datetime import date, datetime
7
8
  from typing_extensions import (
@@ -573,6 +574,9 @@ class CachedDiscriminatorType(Protocol):
573
574
  __discriminator__: DiscriminatorDetails
574
575
 
575
576
 
577
+ DISCRIMINATOR_CACHE: weakref.WeakKeyDictionary[type, DiscriminatorDetails] = weakref.WeakKeyDictionary()
578
+
579
+
576
580
  class DiscriminatorDetails:
577
581
  field_name: str
578
582
  """The name of the discriminator field in the variant class, e.g.
@@ -615,8 +619,9 @@ class DiscriminatorDetails:
615
619
 
616
620
 
617
621
  def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
618
- if isinstance(union, CachedDiscriminatorType):
619
- return union.__discriminator__
622
+ cached = DISCRIMINATOR_CACHE.get(union)
623
+ if cached is not None:
624
+ return cached
620
625
 
621
626
  discriminator_field_name: str | None = None
622
627
 
@@ -669,7 +674,7 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
669
674
  discriminator_field=discriminator_field_name,
670
675
  discriminator_alias=discriminator_alias,
671
676
  )
672
- cast(CachedDiscriminatorType, union).__discriminator__ = details
677
+ DISCRIMINATOR_CACHE.setdefault(union, details)
673
678
  return details
674
679
 
675
680
 
@@ -57,9 +57,8 @@ class Stream(Generic[_T]):
57
57
  for sse in iterator:
58
58
  yield process_data(data=sse.json(), cast_to=cast_to, response=response)
59
59
 
60
- # Ensure the entire stream is consumed
61
- for _sse in iterator:
62
- ...
60
+ # As we might not fully consume the response stream, we need to close it explicitly
61
+ response.close()
63
62
 
64
63
  def __enter__(self) -> Self:
65
64
  return self
@@ -121,9 +120,8 @@ class AsyncStream(Generic[_T]):
121
120
  async for sse in iterator:
122
121
  yield process_data(data=sse.json(), cast_to=cast_to, response=response)
123
122
 
124
- # Ensure the entire stream is consumed
125
- async for _sse in iterator:
126
- ...
123
+ # As we might not fully consume the response stream, we need to close it explicitly
124
+ await response.aclose()
127
125
 
128
126
  async def __aenter__(self) -> Self:
129
127
  return self
landingai_ade/_types.py CHANGED
@@ -52,10 +52,10 @@ ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
52
52
  ProxiesTypes = Union[str, Proxy, ProxiesDict]
53
53
  if TYPE_CHECKING:
54
54
  Base64FileInput = Union[IO[bytes], PathLike[str]]
55
- FileContent = Union[IO[bytes], bytes, PathLike[str]]
55
+ FileContent = Union[IO[bytes], bytes, str, PathLike[str]]
56
56
  else:
57
57
  Base64FileInput = Union[IO[bytes], PathLike]
58
- FileContent = Union[IO[bytes], bytes, PathLike] # PathLike is not subscriptable in Python 3.8.
58
+ FileContent = Union[IO[bytes], bytes, str, PathLike] # PathLike is not subscriptable in Python 3.8.
59
59
  FileTypes = Union[
60
60
  # file (or bytes)
61
61
  FileContent,
@@ -1,10 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- import sys
4
3
  import asyncio
5
4
  import functools
6
- import contextvars
7
- from typing import Any, TypeVar, Callable, Awaitable
5
+ from typing import TypeVar, Callable, Awaitable
8
6
  from typing_extensions import ParamSpec
9
7
 
10
8
  import anyio
@@ -15,34 +13,11 @@ T_Retval = TypeVar("T_Retval")
15
13
  T_ParamSpec = ParamSpec("T_ParamSpec")
16
14
 
17
15
 
18
- if sys.version_info >= (3, 9):
19
- _asyncio_to_thread = asyncio.to_thread
20
- else:
21
- # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
22
- # for Python 3.8 support
23
- async def _asyncio_to_thread(
24
- func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
25
- ) -> Any:
26
- """Asynchronously run function *func* in a separate thread.
27
-
28
- Any *args and **kwargs supplied for this function are directly passed
29
- to *func*. Also, the current :class:`contextvars.Context` is propagated,
30
- allowing context variables from the main thread to be accessed in the
31
- separate thread.
32
-
33
- Returns a coroutine that can be awaited to get the eventual result of *func*.
34
- """
35
- loop = asyncio.events.get_running_loop()
36
- ctx = contextvars.copy_context()
37
- func_call = functools.partial(ctx.run, func, *args, **kwargs)
38
- return await loop.run_in_executor(None, func_call)
39
-
40
-
41
16
  async def to_thread(
42
17
  func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
43
18
  ) -> T_Retval:
44
19
  if sniffio.current_async_library() == "asyncio":
45
- return await _asyncio_to_thread(func, *args, **kwargs)
20
+ return await asyncio.to_thread(func, *args, **kwargs)
46
21
 
47
22
  return await anyio.to_thread.run_sync(
48
23
  functools.partial(func, *args, **kwargs),
@@ -53,10 +28,7 @@ async def to_thread(
53
28
  def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
54
29
  """
55
30
  Take a blocking function and create an async one that receives the same
56
- positional and keyword arguments. For python version 3.9 and above, it uses
57
- asyncio.to_thread to run the function in a separate thread. For python version
58
- 3.8, it uses locally defined copy of the asyncio.to_thread function which was
59
- introduced in python 3.9.
31
+ positional and keyword arguments.
60
32
 
61
33
  Usage:
62
34
 
@@ -133,7 +133,7 @@ def is_given(obj: _T | NotGiven | Omit) -> TypeGuard[_T]:
133
133
  # Type safe methods for narrowing types with TypeVars.
134
134
  # The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
135
135
  # however this cause Pyright to rightfully report errors. As we know we don't
136
- # care about the contained types we can safely use `object` in it's place.
136
+ # care about the contained types we can safely use `object` in its place.
137
137
  #
138
138
  # There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
139
139
  # `is_*` is for when you're dealing with an unknown input
landingai_ade/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "landingai_ade"
4
- __version__ = "0.11.0" # x-release-please-version
4
+ __version__ = "0.21.1" # x-release-please-version
@@ -0,0 +1,51 @@
1
+ """Utility functions for handling URL and file path conversions."""
2
+
3
+ from typing import Tuple, Union, Optional, cast
4
+ from pathlib import Path
5
+ from urllib.parse import urlparse
6
+
7
+ from .._types import Omit, FileTypes, omit
8
+
9
+
10
+ def convert_url_to_file_if_local(
11
+ file: Union[Optional[FileTypes], Omit],
12
+ file_url: Union[Optional[str], Omit],
13
+ ) -> Tuple[Union[Optional[FileTypes], Omit], Union[Optional[str], Omit]]:
14
+ """
15
+ Convert a URL parameter to a file parameter if it's a local file path.
16
+
17
+ If the file_url is a local file path that exists, it will be converted to a Path object
18
+ and returned as the file parameter, with the file_url parameter set to omit.
19
+
20
+ If the file_url is a remote URL (http/https) or doesn't exist as a local file,
21
+ it will be returned unchanged.
22
+
23
+ Args:
24
+ file: The existing file parameter
25
+ file_url: The URL parameter that might be a local file path
26
+
27
+ Returns:
28
+ A tuple of (file, file_url) where one will be set and the other omit
29
+ """
30
+ # If file_url is omit or None, return unchanged
31
+ if file_url is omit or file_url is None:
32
+ return file, file_url
33
+
34
+ # At this point, file_url is guaranteed to be a string, use cast for type narrowing
35
+ url_str = cast(str, file_url)
36
+
37
+ # Check if it's a remote URL (http/https)
38
+ parsed = urlparse(url_str)
39
+ if parsed.scheme in ("http", "https", "ftp", "ftps"):
40
+ # It's a remote URL, keep it as is
41
+ return file, file_url
42
+
43
+ # Check if it's a local file path
44
+ path = Path(url_str)
45
+ if path.exists() and path.is_file():
46
+ # It's a local file, convert to file parameter
47
+ return path, omit
48
+
49
+ # Path doesn't exist or is not a file, treat as URL
50
+ # (could be a URL with a different scheme or a typo)
51
+ return file, file_url
@@ -1 +1,19 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from .parse_jobs import (
4
+ ParseJobsResource,
5
+ AsyncParseJobsResource,
6
+ ParseJobsResourceWithRawResponse,
7
+ AsyncParseJobsResourceWithRawResponse,
8
+ ParseJobsResourceWithStreamingResponse,
9
+ AsyncParseJobsResourceWithStreamingResponse,
10
+ )
11
+
12
+ __all__ = [
13
+ "ParseJobsResource",
14
+ "AsyncParseJobsResource",
15
+ "ParseJobsResourceWithRawResponse",
16
+ "AsyncParseJobsResourceWithRawResponse",
17
+ "ParseJobsResourceWithStreamingResponse",
18
+ "AsyncParseJobsResourceWithStreamingResponse",
19
+ ]