isaacus 0.11.0__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. isaacus/_version.py +1 -1
  2. isaacus/resources/classifications/universal.py +8 -8
  3. isaacus/resources/embeddings.py +24 -24
  4. isaacus/resources/extractions/__init__.py +12 -12
  5. isaacus/resources/extractions/extractions.py +18 -18
  6. isaacus/resources/extractions/qa.py +29 -29
  7. isaacus/resources/rerankings.py +12 -12
  8. isaacus/types/__init__.py +14 -0
  9. isaacus/types/classifications/universal_classification_response.py +22 -22
  10. isaacus/types/classifications/universal_create_params.py +6 -6
  11. isaacus/types/crossreference.py +42 -0
  12. isaacus/types/date.py +70 -0
  13. isaacus/types/document.py +172 -0
  14. isaacus/types/email.py +27 -0
  15. isaacus/types/embedding_create_params.py +11 -11
  16. isaacus/types/embedding_response.py +3 -3
  17. isaacus/types/enrichment_response.py +6 -1404
  18. isaacus/types/external_document.py +101 -0
  19. isaacus/types/extractions/__init__.py +1 -1
  20. isaacus/types/extractions/answer_extraction_response.py +12 -12
  21. isaacus/types/extractions/qa_create_params.py +8 -8
  22. isaacus/types/id_number.py +27 -0
  23. isaacus/types/location.py +53 -0
  24. isaacus/types/person.py +195 -0
  25. isaacus/types/phone_number.py +30 -0
  26. isaacus/types/quote.py +52 -0
  27. isaacus/types/reranking_create_params.py +7 -7
  28. isaacus/types/segment.py +205 -0
  29. isaacus/types/span.py +28 -0
  30. isaacus/types/term.py +59 -0
  31. isaacus/types/website.py +28 -0
  32. {isaacus-0.11.0.dist-info → isaacus-0.13.0.dist-info}/METADATA +1 -1
  33. {isaacus-0.11.0.dist-info → isaacus-0.13.0.dist-info}/RECORD +35 -21
  34. {isaacus-0.11.0.dist-info → isaacus-0.13.0.dist-info}/WHEEL +0 -0
  35. {isaacus-0.11.0.dist-info → isaacus-0.13.0.dist-info}/licenses/LICENSE +0 -0
@@ -8,13 +8,6 @@ __all__ = ["UniversalClassificationResponse", "Classification", "ClassificationC
8
8
 
9
9
 
10
10
  class ClassificationChunk(BaseModel):
11
- end: int
12
- """
13
- The index of the character immediately after the last character of the chunk in
14
- the original text, beginning from `0` (such that, in Python, the chunk is
15
- equivalent to `text[start:end]`).
16
- """
17
-
18
11
  index: int
19
12
  """
20
13
  The original position of the chunk in the outputted list of chunks before
@@ -22,6 +15,19 @@ class ClassificationChunk(BaseModel):
22
15
  `1`).
23
16
  """
24
17
 
18
+ start: int
19
+ """
20
+ The index of the character in the original text where the chunk starts,
21
+ beginning from `0`.
22
+ """
23
+
24
+ end: int
25
+ """
26
+ The index of the character immediately after the last character of the chunk in
27
+ the original text, beginning from `0` (such that, in Python, the chunk is
28
+ equivalent to `text[start:end]`).
29
+ """
30
+
25
31
  score: float
26
32
  """
27
33
  The model's score of the likelihood that the query expressed about the chunk is
@@ -31,26 +37,11 @@ class ClassificationChunk(BaseModel):
31
37
  score less than `0.5` indicates that the chunk does not support the query.
32
38
  """
33
39
 
34
- start: int
35
- """
36
- The index of the character in the original text where the chunk starts,
37
- beginning from `0`.
38
- """
39
-
40
40
  text: str
41
41
  """The text of the chunk."""
42
42
 
43
43
 
44
44
  class Classification(BaseModel):
45
- chunks: Optional[List[ClassificationChunk]] = None
46
- """
47
- The text as broken into chunks by
48
- [semchunk](https://github.com/isaacus-dev/semchunk), each chunk with its own
49
- confidence score, ordered from highest to lowest score.
50
-
51
- If no chunking occurred, this will be `null`.
52
- """
53
-
54
45
  index: int
55
46
  """
56
47
  The index of the text in the input array of texts, starting from `0` (and,
@@ -66,6 +57,15 @@ class Classification(BaseModel):
66
57
  score less than `0.5` indicates that the text does not support the query.
67
58
  """
68
59
 
60
+ chunks: Optional[List[ClassificationChunk]] = None
61
+ """
62
+ The text as broken into chunks by
63
+ [semchunk](https://github.com/isaacus-dev/semchunk), each chunk with its own
64
+ confidence score, ordered from highest to lowest score.
65
+
66
+ If no chunking occurred, this will be `null`.
67
+ """
68
+
69
69
 
70
70
  class Usage(BaseModel):
71
71
  """Statistics about the usage of resources in the process of classifying the text."""
@@ -34,9 +34,6 @@ class UniversalCreateParams(TypedDict, total=False):
34
34
  Each text must contain at least one non-whitespace character.
35
35
  """
36
36
 
37
- chunking_options: Optional[ChunkingOptions]
38
- """Options for how to split text into smaller chunks."""
39
-
40
37
  is_iql: bool
41
38
  """
42
39
  Whether the query should be interpreted as an
@@ -57,15 +54,18 @@ class UniversalCreateParams(TypedDict, total=False):
57
54
  `chunk_min` uses the lowest confidence score of all of the texts' chunks.
58
55
  """
59
56
 
57
+ chunking_options: Optional[ChunkingOptions]
58
+ """Options for how to split text into smaller chunks."""
59
+
60
60
 
61
61
  class ChunkingOptions(TypedDict, total=False):
62
62
  """Options for how to split text into smaller chunks."""
63
63
 
64
+ size: Optional[int]
65
+ """A whole number greater than or equal to 1."""
66
+
64
67
  overlap_ratio: Optional[float]
65
68
  """A number greater than or equal to 0 and less than 1."""
66
69
 
67
70
  overlap_tokens: Optional[int]
68
71
  """A whole number greater than or equal to 0."""
69
-
70
- size: Optional[int]
71
- """A whole number greater than or equal to 1."""
@@ -0,0 +1,42 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from .span import Span
4
+ from .._models import BaseModel
5
+
6
+ __all__ = ["Crossreference"]
7
+
8
+
9
+ class Crossreference(BaseModel):
10
+ """A cross-reference within the document pointing to one or more segments."""
11
+
12
+ start: str
13
+ """
14
+ The unique identifier of the earliest segment in the span of segments being
15
+ cross-referenced with ties broken in favor of the least-nested (i.e., largest)
16
+ segment. If the cross-reference points to a single segment, `start` and `end`
17
+ will be identical.
18
+ """
19
+
20
+ end: str
21
+ """
22
+ The unique identifier of the latest segment in the span of segments being
23
+ cross-referenced with ties broken in favor of the least-nested (i.e., largest)
24
+ segment. If the cross-reference points to a single segment, `start` and `end`
25
+ will be identical.
26
+ """
27
+
28
+ span: Span
29
+ """A zero-based, half-open span into the Unicode code point space of input text.
30
+
31
+ All spans are globally laminar and well-nested similar to XML—it is impossible
32
+ for any two spans to partially overlap; they can only be disjoint, adjacent, or
33
+ wholly nested. Spans of the exact same type (e.g., segments) will never be
34
+ duplicated.
35
+
36
+ A span cannot be empty and will never start or end at whitespace.
37
+
38
+ Note that, when using programming languages other than Python (which uses
39
+ zero-based, half-open, Unicode code point-spaced string indexing), indices may
40
+ need to be translated accordingly (for example, JavaScript slices into UTF-16
41
+ code units instead of Unicode code points).
42
+ """
isaacus/types/date.py ADDED
@@ -0,0 +1,70 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .span import Span
7
+ from .._models import BaseModel
8
+
9
+ __all__ = ["Date"]
10
+
11
+
12
+ class Date(BaseModel):
13
+ """
14
+ A date identified in a document belonging to one of the following types: `creation`, `signature`, `effective`, `expiry`, `delivery`, `renewal`, `payment`, `birth`, or `death`.
15
+
16
+ Only Gregorian dates between the years 1000 and 9999 (inclusive) fitting into one of the supported date types are extractable.
17
+ """
18
+
19
+ value: str
20
+ """The date in ISO 8601 format (YYYY-MM-DD)."""
21
+
22
+ type: Literal["creation", "signature", "effective", "expiry", "delivery", "renewal", "payment", "birth", "death"]
23
+ """
24
+ The type of the date, being one of `creation`, `signature`, `effective`,
25
+ `expiry`, `delivery`, `renewal`, `payment`, `birth`, or `death`. If a date is
26
+ mentioned in a document that does not fit into a supported type, it will not be
27
+ extracted.
28
+
29
+ `creation` denotes the date the document was created. There may only be one
30
+ `creation` date per document.
31
+
32
+ `signature` denotes the date the document was signed.
33
+
34
+ `effective` denotes the date when the document or a part thereof comes into
35
+ effect (e.g., commencement or enactment dates).
36
+
37
+ `expiry` denotes the date when the document or a part thereof is no longer in
38
+ effect.
39
+
40
+ `delivery` denotes the date when goods or services are to be delivered under the
41
+ document.
42
+
43
+ `renewal` denotes the date when one or more of the document's terms are to be
44
+ renewed.
45
+
46
+ `payment` denotes the date when payment is to be made under the document.
47
+
48
+ `birth` denotes the birth date of a natural person or establishment (e.g.,
49
+ incorporation) date of a non-natural legal person identified in the document.
50
+ There can only be one `birth` date linked to a single person and all `birth`
51
+ dates must be linked to a person. A person's `birth` date will never be after
52
+ their `death` date.
53
+
54
+ `death` denotes the death date of a natural person or dissolution date of a
55
+ non-natural legal person identified in the document. There can only be one
56
+ `death` date linked to a single person and all `death` dates must be linked to a
57
+ person. A person's `death` date will never be before their `birth` date.
58
+ """
59
+
60
+ person: Optional[str] = None
61
+ """
62
+ A unique identifier for a legal person in the format `per:{index}` where
63
+ `{index}` is a non-negative incrementing integer starting from zero.
64
+ """
65
+
66
+ mentions: List[Span]
67
+ """
68
+ An array of one or more spans within the document's text where the date is
69
+ mentioned.
70
+ """
@@ -0,0 +1,172 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .date import Date
7
+ from .span import Span
8
+ from .term import Term
9
+ from .email import Email
10
+ from .quote import Quote
11
+ from .person import Person
12
+ from .segment import Segment
13
+ from .website import Website
14
+ from .._models import BaseModel
15
+ from .location import Location
16
+ from .id_number import IDNumber
17
+ from .phone_number import PhoneNumber
18
+ from .crossreference import Crossreference
19
+ from .external_document import ExternalDocument
20
+
21
+ __all__ = ["Document"]
22
+
23
+
24
+ class Document(BaseModel):
25
+ """The enriched document."""
26
+
27
+ title: Optional[Span] = None
28
+ """A zero-based, half-open span into the Unicode code point space of input text.
29
+
30
+ All spans are globally laminar and well-nested similar to XML—it is impossible
31
+ for any two spans to partially overlap; they can only be disjoint, adjacent, or
32
+ wholly nested. Spans of the exact same type (e.g., segments) will never be
33
+ duplicated.
34
+
35
+ A span cannot be empty and will never start or end at whitespace.
36
+
37
+ Note that, when using programming languages other than Python (which uses
38
+ zero-based, half-open, Unicode code point-spaced string indexing), indices may
39
+ need to be translated accordingly (for example, JavaScript slices into UTF-16
40
+ code units instead of Unicode code points).
41
+ """
42
+
43
+ subtitle: Optional[Span] = None
44
+ """A zero-based, half-open span into the Unicode code point space of input text.
45
+
46
+ All spans are globally laminar and well-nested similar to XML—it is impossible
47
+ for any two spans to partially overlap; they can only be disjoint, adjacent, or
48
+ wholly nested. Spans of the exact same type (e.g., segments) will never be
49
+ duplicated.
50
+
51
+ A span cannot be empty and will never start or end at whitespace.
52
+
53
+ Note that, when using programming languages other than Python (which uses
54
+ zero-based, half-open, Unicode code point-spaced string indexing), indices may
55
+ need to be translated accordingly (for example, JavaScript slices into UTF-16
56
+ code units instead of Unicode code points).
57
+ """
58
+
59
+ type: Literal["statute", "regulation", "decision", "contract", "other"]
60
+ """
61
+ The type of the document, being one of `statute`, `regulation`, `decision`,
62
+ `contract`, or `other`.
63
+
64
+ `statute` denotes primary legislation such as acts, bills, codes, and
65
+ constitutions.
66
+
67
+ `regulation` denotes secondary legislation such as rules, statutory instruments,
68
+ and ordinances.
69
+
70
+ `decision` denotes judicial or quasi-judicial decisions such as court judgments,
71
+ judicial opinions, and tribunal rulings.
72
+
73
+ `other` is used for all other types of legal documents that do not fit into any
74
+ of the predefined types.
75
+ """
76
+
77
+ jurisdiction: Optional[str] = None
78
+ """
79
+ A jurisdiction code representing a country (via an initial country code) and,
80
+ optionally, a subdivision within that country (via a subsequent subdivision code
81
+ prefixed by a hyphen).
82
+
83
+ All 249 ISO 3166-1 alpha-2 country codes are representable in addition to
84
+ special `INT` and `EU` codes for international and European Union law,
85
+ respectively.
86
+
87
+ All 5,046 ISO 3166-2 codes are also representable in addition to a special `FED`
88
+ code for federal law.
89
+ """
90
+
91
+ segments: List[Segment]
92
+ """
93
+ An array of segments within the document representing structurally distinct
94
+ portions of its content.
95
+ """
96
+
97
+ crossreferences: List[Crossreference]
98
+ """
99
+ An array of cross-references within the document pointing to a single segment or
100
+ a span of segments.
101
+ """
102
+
103
+ locations: List[Location]
104
+ """An array of locations identified in the document."""
105
+
106
+ persons: List[Person]
107
+ """An array of legal persons identified in the document."""
108
+
109
+ emails: List[Email]
110
+ """
111
+ An array of email addresses identified in the document belonging to legal
112
+ persons.
113
+
114
+ Email addresses mentioned in the document that are not attributable to legal
115
+ persons will not be extracted.
116
+ """
117
+
118
+ websites: List[Website]
119
+ """An array of websites identified in the document belonging to legal persons.
120
+
121
+ Websites mentioned in the document that are not attributable to legal persons
122
+ will not be extracted.
123
+ """
124
+
125
+ phone_numbers: List[PhoneNumber]
126
+ """
127
+ An array of valid phone numbers identified in the document belonging to legal
128
+ persons.
129
+
130
+ Phone numbers mentioned in the document that are not valid, possible, or
131
+ attributable to legal persons will not be extracted.
132
+ """
133
+
134
+ id_numbers: List[IDNumber]
135
+ """
136
+ An array of identification numbers identified in the document belonging to legal
137
+ persons.
138
+
139
+ Identification numbers mentioned in the document that are not attributable to
140
+ legal persons will not be extracted.
141
+ """
142
+
143
+ terms: List[Term]
144
+ """An array of terms assigned definite meanings within the document."""
145
+
146
+ external_documents: List[ExternalDocument]
147
+ """An array of documents identified within the document."""
148
+
149
+ quotes: List[Quote]
150
+ """An array of quotations within the document."""
151
+
152
+ dates: List[Date]
153
+ """
154
+ An array of dates identified in the document belonging to one of the following
155
+ types: `creation`, `signature`, `effective`, `expiry`, `delivery`, `renewal`,
156
+ `payment`, `birth`, or `death`.
157
+
158
+ Only Gregorian dates between the years 1000 and 9999 (inclusive) fitting into
159
+ one of the supported date types are extractable.
160
+ """
161
+
162
+ headings: List[Span]
163
+ """An array of spans within the document's text constituting headings."""
164
+
165
+ junk: List[Span]
166
+ """
167
+ An array of spans within the document's text constituting non-operative,
168
+ non-substantive 'junk' content such as headers, footers, page numbers, and OCR
169
+ artifacts.
170
+ """
171
+
172
+ version: Literal["ilgs@1"]
isaacus/types/email.py ADDED
@@ -0,0 +1,27 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List
4
+
5
+ from .span import Span
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["Email"]
9
+
10
+
11
+ class Email(BaseModel):
12
+ """An email address identified in a document belonging to a legal person.
13
+
14
+ If an email address was mentioned in the document but is not attributable to a legal person, it will not be extracted.
15
+ """
16
+
17
+ address: str
18
+ """The normalized email address."""
19
+
20
+ person: str
21
+ """The unique identifier of the person that this email address belongs to."""
22
+
23
+ mentions: List[Span]
24
+ """
25
+ An array of one or more spans within the document's text where the email address
26
+ is mentioned.
27
+ """
@@ -25,8 +25,15 @@ class EmbeddingCreateParams(TypedDict, total=False):
25
25
  No more than 128 texts can be embedded in a single request.
26
26
  """
27
27
 
28
- dimensions: Optional[int]
29
- """A whole number greater than or equal to 1."""
28
+ task: Optional[Literal["retrieval/query", "retrieval/document"]]
29
+ """The task the embeddings will be used for.
30
+
31
+ `retrieval/query` is meant for queries and statements, and `retrieval/document`
32
+ is meant for anything to be retrieved using query embeddings.
33
+
34
+ If `null`, which is the default setting, embeddings will not be optimized for
35
+ any particular task.
36
+ """
30
37
 
31
38
  overflow_strategy: Optional[Literal["drop_end"]]
32
39
  """The strategy to employ when content exceeds the model's maximum input length.
@@ -38,12 +45,5 @@ class EmbeddingCreateParams(TypedDict, total=False):
38
45
  input length.
39
46
  """
40
47
 
41
- task: Optional[Literal["retrieval/query", "retrieval/document"]]
42
- """The task the embeddings will be used for.
43
-
44
- `retrieval/query` is meant for queries and statements, and `retrieval/document`
45
- is meant for anything to be retrieved using query embeddings.
46
-
47
- If `null`, which is the default setting, embeddings will not be optimized for
48
- any particular task.
49
- """
48
+ dimensions: Optional[int]
49
+ """A whole number greater than or equal to 1."""
@@ -8,15 +8,15 @@ __all__ = ["EmbeddingResponse", "Embedding", "Usage"]
8
8
 
9
9
 
10
10
  class Embedding(BaseModel):
11
- embedding: List[float]
12
- """The embedding of the content represented as an array of floating point numbers."""
13
-
14
11
  index: int
15
12
  """
16
13
  The position of the content in the input array of contents, starting from `0`
17
14
  (and, therefore, ending at the number of contents minus `1`).
18
15
  """
19
16
 
17
+ embedding: List[float]
18
+ """The embedding of the content represented as an array of floating point numbers."""
19
+
20
20
 
21
21
  class Usage(BaseModel):
22
22
  """Statistics about the usage of resources in the process of embedding the inputs."""