isaacus 0.9.1__py3-none-any.whl → 0.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isaacus/_base_client.py +145 -13
- isaacus/_client.py +216 -46
- isaacus/_compat.py +3 -3
- isaacus/_models.py +53 -16
- isaacus/_streaming.py +12 -12
- isaacus/_types.py +12 -2
- isaacus/_utils/_json.py +35 -0
- isaacus/_utils/_sync.py +3 -31
- isaacus/_utils/_utils.py +1 -1
- isaacus/_version.py +1 -1
- isaacus/resources/__init__.py +14 -0
- isaacus/resources/enrichments.py +224 -0
- isaacus/resources/rerankings.py +2 -2
- isaacus/types/__init__.py +2 -0
- isaacus/types/classifications/universal_classification_response.py +2 -0
- isaacus/types/classifications/universal_create_params.py +3 -1
- isaacus/types/embedding_response.py +2 -0
- isaacus/types/enrichment_create_params.py +40 -0
- isaacus/types/enrichment_response.py +1458 -0
- isaacus/types/extractions/answer_extraction_response.py +8 -0
- isaacus/types/extractions/qa_create_params.py +3 -1
- isaacus/types/reranking_create_params.py +3 -1
- isaacus/types/reranking_response.py +2 -0
- {isaacus-0.9.1.dist-info → isaacus-0.10.1.dist-info}/METADATA +24 -6
- {isaacus-0.9.1.dist-info → isaacus-0.10.1.dist-info}/RECORD +27 -23
- {isaacus-0.9.1.dist-info → isaacus-0.10.1.dist-info}/licenses/LICENSE +1 -1
- {isaacus-0.9.1.dist-info → isaacus-0.10.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,1458 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"EnrichmentResponse",
|
|
10
|
+
"Result",
|
|
11
|
+
"ResultDocument",
|
|
12
|
+
"ResultDocumentCrossreference",
|
|
13
|
+
"ResultDocumentCrossreferenceSpan",
|
|
14
|
+
"ResultDocumentDate",
|
|
15
|
+
"ResultDocumentDateMention",
|
|
16
|
+
"ResultDocumentEmail",
|
|
17
|
+
"ResultDocumentEmailMention",
|
|
18
|
+
"ResultDocumentExternalDocument",
|
|
19
|
+
"ResultDocumentExternalDocumentMention",
|
|
20
|
+
"ResultDocumentExternalDocumentName",
|
|
21
|
+
"ResultDocumentExternalDocumentPinpoint",
|
|
22
|
+
"ResultDocumentHeading",
|
|
23
|
+
"ResultDocumentIDNumber",
|
|
24
|
+
"ResultDocumentIDNumberMention",
|
|
25
|
+
"ResultDocumentJunk",
|
|
26
|
+
"ResultDocumentLocation",
|
|
27
|
+
"ResultDocumentLocationMention",
|
|
28
|
+
"ResultDocumentLocationName",
|
|
29
|
+
"ResultDocumentPerson",
|
|
30
|
+
"ResultDocumentPersonMention",
|
|
31
|
+
"ResultDocumentPersonName",
|
|
32
|
+
"ResultDocumentPhoneNumber",
|
|
33
|
+
"ResultDocumentPhoneNumberMention",
|
|
34
|
+
"ResultDocumentQuote",
|
|
35
|
+
"ResultDocumentQuoteSpan",
|
|
36
|
+
"ResultDocumentSegment",
|
|
37
|
+
"ResultDocumentSegmentCode",
|
|
38
|
+
"ResultDocumentSegmentSpan",
|
|
39
|
+
"ResultDocumentSegmentTitle",
|
|
40
|
+
"ResultDocumentSegmentTypeName",
|
|
41
|
+
"ResultDocumentSubtitle",
|
|
42
|
+
"ResultDocumentTerm",
|
|
43
|
+
"ResultDocumentTermMeaning",
|
|
44
|
+
"ResultDocumentTermMention",
|
|
45
|
+
"ResultDocumentTermName",
|
|
46
|
+
"ResultDocumentTitle",
|
|
47
|
+
"ResultDocumentWebsite",
|
|
48
|
+
"ResultDocumentWebsiteMention",
|
|
49
|
+
"Usage",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ResultDocumentCrossreferenceSpan(BaseModel):
|
|
54
|
+
"""The span of the segment within the document's text."""
|
|
55
|
+
|
|
56
|
+
end: int
|
|
57
|
+
"""
|
|
58
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
59
|
+
Unicode code points in the input text.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
start: int
|
|
63
|
+
"""
|
|
64
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
65
|
+
input text.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ResultDocumentCrossreference(BaseModel):
|
|
70
|
+
"""A cross-reference within the document pointing to one or more segments."""
|
|
71
|
+
|
|
72
|
+
end: str
|
|
73
|
+
"""
|
|
74
|
+
The unique identifier of the latest segment in the span of segments being
|
|
75
|
+
cross-referenced with ties broken in favor of the least-nested (i.e., largest)
|
|
76
|
+
segment. If the cross-reference points to a single segment, `start` and `end`
|
|
77
|
+
will be identical.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
span: ResultDocumentCrossreferenceSpan
|
|
81
|
+
"""The span of the segment within the document's text."""
|
|
82
|
+
|
|
83
|
+
start: str
|
|
84
|
+
"""
|
|
85
|
+
The unique identifier of the earliest segment in the span of segments being
|
|
86
|
+
cross-referenced with ties broken in favor of the least-nested (i.e., largest)
|
|
87
|
+
segment. If the cross-reference points to a single segment, `start` and `end`
|
|
88
|
+
will be identical.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ResultDocumentDateMention(BaseModel):
|
|
93
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
94
|
+
|
|
95
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
96
|
+
|
|
97
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
98
|
+
|
|
99
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
end: int
|
|
103
|
+
"""
|
|
104
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
105
|
+
Unicode code points in the input text.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
start: int
|
|
109
|
+
"""
|
|
110
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
111
|
+
input text.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ResultDocumentDate(BaseModel):
|
|
116
|
+
"""
|
|
117
|
+
A date identified in a document belonging to one of the following types: `creation`, `signature`, `effective`, `expiry`, `delivery`, `renewal`, `payment`, `birth`, or `death`.
|
|
118
|
+
|
|
119
|
+
Only Gregorian dates between the years 1000 and 9999 (inclusive) fitting into one of the supported date types are extractable.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
mentions: List[ResultDocumentDateMention]
|
|
123
|
+
"""
|
|
124
|
+
An array of one or more spans within the document's text where the date is
|
|
125
|
+
mentioned.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
person: Optional[str] = None
|
|
129
|
+
"""
|
|
130
|
+
A unique identifier for a legal person in the format `per:{index}` where
|
|
131
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
type: Literal["creation", "signature", "effective", "expiry", "delivery", "renewal", "payment", "birth", "death"]
|
|
135
|
+
"""
|
|
136
|
+
The type of the date, being one of `creation`, `signature`, `effective`,
|
|
137
|
+
`expiry`, `delivery`, `renewal`, `payment`, `birth`, or `death`. If a date is
|
|
138
|
+
mentioned in a document that does not fit into a supported type, it will not be
|
|
139
|
+
extracted.
|
|
140
|
+
|
|
141
|
+
`creation` denotes the date the document was created. There may only be one
|
|
142
|
+
`creation` date per document.
|
|
143
|
+
|
|
144
|
+
`signature` denotes the date the document was signed.
|
|
145
|
+
|
|
146
|
+
`effective` denotes the date when the document or a part thereof comes into
|
|
147
|
+
effect (e.g., commencement or enactment dates).
|
|
148
|
+
|
|
149
|
+
`expiry` denotes the date when the document or a part thereof is no longer in
|
|
150
|
+
effect.
|
|
151
|
+
|
|
152
|
+
`delivery` denotes the date when goods or services are to be delivered under the
|
|
153
|
+
document.
|
|
154
|
+
|
|
155
|
+
`renewal` denotes the date when one or more of the document's terms are to be
|
|
156
|
+
renewed.
|
|
157
|
+
|
|
158
|
+
`payment` denotes the date when payment is to be made under the document.
|
|
159
|
+
|
|
160
|
+
`birth` denotes the birth date of a natural person or establishment (e.g.,
|
|
161
|
+
incorporation) date of a non-natural legal person identified in the document.
|
|
162
|
+
There can only be one `birth` date linked to a single person and all `birth`
|
|
163
|
+
dates must be linked to a person. A person's `birth` date will never be after
|
|
164
|
+
their `death` date.
|
|
165
|
+
|
|
166
|
+
`death` denotes the death date of a natural person or dissolution date of a
|
|
167
|
+
non-natural legal person identified in the document. There can only be one
|
|
168
|
+
`death` date linked to a single person and all `death` dates must be linked to a
|
|
169
|
+
person. A person's `death` date will never be before their `birth` date.
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
value: str
|
|
173
|
+
"""The date in ISO 8601 format (YYYY-MM-DD)."""
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ResultDocumentEmailMention(BaseModel):
|
|
177
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
178
|
+
|
|
179
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
180
|
+
|
|
181
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
182
|
+
|
|
183
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
end: int
|
|
187
|
+
"""
|
|
188
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
189
|
+
Unicode code points in the input text.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
start: int
|
|
193
|
+
"""
|
|
194
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
195
|
+
input text.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class ResultDocumentEmail(BaseModel):
|
|
200
|
+
"""An email address identified in a document belonging to a legal person.
|
|
201
|
+
|
|
202
|
+
If an email address was mentioned in the document but is not attributable to a legal person, it will not be extracted.
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
address: str
|
|
206
|
+
"""The normalized email address."""
|
|
207
|
+
|
|
208
|
+
mentions: List[ResultDocumentEmailMention]
|
|
209
|
+
"""
|
|
210
|
+
An array of one or more spans within the document's text where the email address
|
|
211
|
+
is mentioned.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
person: str
|
|
215
|
+
"""The unique identifier of the person that this email address belongs to."""
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
class ResultDocumentExternalDocumentMention(BaseModel):
|
|
219
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
220
|
+
|
|
221
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
222
|
+
|
|
223
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
224
|
+
|
|
225
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
end: int
|
|
229
|
+
"""
|
|
230
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
231
|
+
Unicode code points in the input text.
|
|
232
|
+
"""
|
|
233
|
+
|
|
234
|
+
start: int
|
|
235
|
+
"""
|
|
236
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
237
|
+
input text.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class ResultDocumentExternalDocumentName(BaseModel):
|
|
242
|
+
"""The span of the segment within the document's text."""
|
|
243
|
+
|
|
244
|
+
end: int
|
|
245
|
+
"""
|
|
246
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
247
|
+
Unicode code points in the input text.
|
|
248
|
+
"""
|
|
249
|
+
|
|
250
|
+
start: int
|
|
251
|
+
"""
|
|
252
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
253
|
+
input text.
|
|
254
|
+
"""
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class ResultDocumentExternalDocumentPinpoint(BaseModel):
|
|
258
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
259
|
+
|
|
260
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
261
|
+
|
|
262
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
263
|
+
|
|
264
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
265
|
+
"""
|
|
266
|
+
|
|
267
|
+
end: int
|
|
268
|
+
"""
|
|
269
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
270
|
+
Unicode code points in the input text.
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
start: int
|
|
274
|
+
"""
|
|
275
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
276
|
+
input text.
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class ResultDocumentExternalDocument(BaseModel):
|
|
281
|
+
"""A document identified within another document."""
|
|
282
|
+
|
|
283
|
+
id: str
|
|
284
|
+
"""
|
|
285
|
+
The unique identifier of the external document in the format `exd:{index}` where
|
|
286
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
287
|
+
"""
|
|
288
|
+
|
|
289
|
+
jurisdiction: Optional[str] = None
|
|
290
|
+
"""
|
|
291
|
+
A jurisdiction code representing a country (via an initial country code) and,
|
|
292
|
+
optionally, a subdivision within that country (via a subsequent subdivision code
|
|
293
|
+
prefixed by a hyphen).
|
|
294
|
+
|
|
295
|
+
All 249 ISO 3166-1 alpha-2 country codes are representable in addition to
|
|
296
|
+
special `INT` and `EU` codes for international and European Union law,
|
|
297
|
+
respectively.
|
|
298
|
+
|
|
299
|
+
All 5,046 ISO 3166-2 codes are also representable in addition to a special `FED`
|
|
300
|
+
code for federal law.
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
mentions: List[ResultDocumentExternalDocumentMention]
|
|
304
|
+
"""
|
|
305
|
+
An array of one or more spans within the document's text where the external
|
|
306
|
+
document is mentioned by name, for example, 'the US Constitution' in 'the Second
|
|
307
|
+
Amendment to the US Constitution protects freedom of speech'.
|
|
308
|
+
"""
|
|
309
|
+
|
|
310
|
+
name: ResultDocumentExternalDocumentName
|
|
311
|
+
"""The span of the segment within the document's text."""
|
|
312
|
+
|
|
313
|
+
pinpoints: List[ResultDocumentExternalDocumentPinpoint]
|
|
314
|
+
"""
|
|
315
|
+
An array of spans within the document's text where specific parts of the
|
|
316
|
+
external document are referenced, for example, 'Section 2' in 'as defined in
|
|
317
|
+
Section 2 of the US Constitution'.
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
reception: Literal["positive", "mixed", "negative", "neutral"]
|
|
321
|
+
"""
|
|
322
|
+
The sentiment of the document towards the external document, being one of
|
|
323
|
+
`positive`, `mixed`, `negative`, or `neutral`.
|
|
324
|
+
|
|
325
|
+
`positive` indicates that the document expresses a favorable view of the
|
|
326
|
+
external document whether by endorsing or approving it.
|
|
327
|
+
|
|
328
|
+
`mixed` indicates that the document expresses both favorable and unfavorable
|
|
329
|
+
views of the external document, for example, by affirming parts of it and
|
|
330
|
+
disapproving others.
|
|
331
|
+
|
|
332
|
+
`negative` indicates that the document expresses an unfavorable view of the
|
|
333
|
+
external document whether by criticizing, repealing, overruling, or explicitly
|
|
334
|
+
contradicting it.
|
|
335
|
+
|
|
336
|
+
`neutral` indicates that the document references the external document without
|
|
337
|
+
expressing any particular sentiment towards it.
|
|
338
|
+
"""
|
|
339
|
+
|
|
340
|
+
type: Literal["statute", "regulation", "decision", "contract", "other"]
|
|
341
|
+
"""
|
|
342
|
+
The type of the external document, being one of `statute`, `regulation`,
|
|
343
|
+
`decision`, `contract`, or `other`.
|
|
344
|
+
|
|
345
|
+
`statute` denotes primary legislation such as acts, bills, codes, and
|
|
346
|
+
constitutions.
|
|
347
|
+
|
|
348
|
+
`regulation` denotes secondary legislation such as rules, statutory instruments,
|
|
349
|
+
and ordinances.
|
|
350
|
+
|
|
351
|
+
`decision` denotes judicial or quasi-judicial decisions such as court judgments,
|
|
352
|
+
judicial opinions, and tribunal rulings.
|
|
353
|
+
|
|
354
|
+
`other` is used for all other types of legal documents that do not fit into any
|
|
355
|
+
of the predefined types.
|
|
356
|
+
"""
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class ResultDocumentHeading(BaseModel):
|
|
360
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
361
|
+
|
|
362
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
363
|
+
|
|
364
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
365
|
+
|
|
366
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
end: int
|
|
370
|
+
"""
|
|
371
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
372
|
+
Unicode code points in the input text.
|
|
373
|
+
"""
|
|
374
|
+
|
|
375
|
+
start: int
|
|
376
|
+
"""
|
|
377
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
378
|
+
input text.
|
|
379
|
+
"""
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
class ResultDocumentIDNumberMention(BaseModel):
|
|
383
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
384
|
+
|
|
385
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
386
|
+
|
|
387
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
388
|
+
|
|
389
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
390
|
+
"""
|
|
391
|
+
|
|
392
|
+
end: int
|
|
393
|
+
"""
|
|
394
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
395
|
+
Unicode code points in the input text.
|
|
396
|
+
"""
|
|
397
|
+
|
|
398
|
+
start: int
|
|
399
|
+
"""
|
|
400
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
401
|
+
input text.
|
|
402
|
+
"""
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
class ResultDocumentIDNumber(BaseModel):
|
|
406
|
+
"""An identification number mentioned in a document belonging to a legal person.
|
|
407
|
+
|
|
408
|
+
If an identification number was mentioned in the document but is not attributable to a legal person, it will not be extracted.
|
|
409
|
+
"""
|
|
410
|
+
|
|
411
|
+
mentions: List[ResultDocumentIDNumberMention]
|
|
412
|
+
"""
|
|
413
|
+
An array of one or more spans within the document's text where the
|
|
414
|
+
identification number is mentioned.
|
|
415
|
+
"""
|
|
416
|
+
|
|
417
|
+
number: str
|
|
418
|
+
"""The identification number."""
|
|
419
|
+
|
|
420
|
+
person: str
|
|
421
|
+
"""The unique identifier of the person that this identification number belongs to."""
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
class ResultDocumentJunk(BaseModel):
|
|
425
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
426
|
+
|
|
427
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
428
|
+
|
|
429
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
430
|
+
|
|
431
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
432
|
+
"""
|
|
433
|
+
|
|
434
|
+
end: int
|
|
435
|
+
"""
|
|
436
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
437
|
+
Unicode code points in the input text.
|
|
438
|
+
"""
|
|
439
|
+
|
|
440
|
+
start: int
|
|
441
|
+
"""
|
|
442
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
443
|
+
input text.
|
|
444
|
+
"""
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
class ResultDocumentLocationMention(BaseModel):
|
|
448
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
449
|
+
|
|
450
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
451
|
+
|
|
452
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
453
|
+
|
|
454
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
455
|
+
"""
|
|
456
|
+
|
|
457
|
+
end: int
|
|
458
|
+
"""
|
|
459
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
460
|
+
Unicode code points in the input text.
|
|
461
|
+
"""
|
|
462
|
+
|
|
463
|
+
start: int
|
|
464
|
+
"""
|
|
465
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
466
|
+
input text.
|
|
467
|
+
"""
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class ResultDocumentLocationName(BaseModel):
|
|
471
|
+
"""The span of the segment within the document's text."""
|
|
472
|
+
|
|
473
|
+
end: int
|
|
474
|
+
"""
|
|
475
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
476
|
+
Unicode code points in the input text.
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
start: int
|
|
480
|
+
"""
|
|
481
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
482
|
+
input text.
|
|
483
|
+
"""
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
class ResultDocumentLocation(BaseModel):
|
|
487
|
+
"""A location identified within a document."""
|
|
488
|
+
|
|
489
|
+
id: str
|
|
490
|
+
"""
|
|
491
|
+
The unique identifier of the location in the format `loc:{index}` where
|
|
492
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
493
|
+
"""
|
|
494
|
+
|
|
495
|
+
mentions: List[ResultDocumentLocationMention]
|
|
496
|
+
"""
|
|
497
|
+
An array of one or more spans within the document's text where the location is
|
|
498
|
+
mentioned.
|
|
499
|
+
"""
|
|
500
|
+
|
|
501
|
+
name: ResultDocumentLocationName
|
|
502
|
+
"""The span of the segment within the document's text."""
|
|
503
|
+
|
|
504
|
+
parent: Optional[str] = None
|
|
505
|
+
"""
|
|
506
|
+
A unique identifier for a location in the format `loc:{index}` where `{index}`
|
|
507
|
+
is a non-negative incrementing integer starting from zero.
|
|
508
|
+
"""
|
|
509
|
+
|
|
510
|
+
type: Literal["country", "state", "city", "address", "other"]
|
|
511
|
+
"""
|
|
512
|
+
The type of the location, being one of `country`, `state`, `city`, `address`, or
|
|
513
|
+
`other`.
|
|
514
|
+
"""
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
class ResultDocumentPersonMention(BaseModel):
|
|
518
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
519
|
+
|
|
520
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
521
|
+
|
|
522
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
523
|
+
|
|
524
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
525
|
+
"""
|
|
526
|
+
|
|
527
|
+
end: int
|
|
528
|
+
"""
|
|
529
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
530
|
+
Unicode code points in the input text.
|
|
531
|
+
"""
|
|
532
|
+
|
|
533
|
+
start: int
|
|
534
|
+
"""
|
|
535
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
536
|
+
input text.
|
|
537
|
+
"""
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
class ResultDocumentPersonName(BaseModel):
|
|
541
|
+
"""The span of the segment within the document's text."""
|
|
542
|
+
|
|
543
|
+
end: int
|
|
544
|
+
"""
|
|
545
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
546
|
+
Unicode code points in the input text.
|
|
547
|
+
"""
|
|
548
|
+
|
|
549
|
+
start: int
|
|
550
|
+
"""
|
|
551
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
552
|
+
input text.
|
|
553
|
+
"""
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
class ResultDocumentPerson(BaseModel):
|
|
557
|
+
"""A legal person identified in a document."""
|
|
558
|
+
|
|
559
|
+
id: str
|
|
560
|
+
"""
|
|
561
|
+
The unique identifier of the person in the format `per:{index}` where `{index}`
|
|
562
|
+
is a non-negative incrementing integer starting from zero.
|
|
563
|
+
"""
|
|
564
|
+
|
|
565
|
+
mentions: List[ResultDocumentPersonMention]
|
|
566
|
+
"""
|
|
567
|
+
An array of one or more spans within the document's text where the person is
|
|
568
|
+
mentioned.
|
|
569
|
+
"""
|
|
570
|
+
|
|
571
|
+
name: ResultDocumentPersonName
|
|
572
|
+
"""The span of the segment within the document's text."""
|
|
573
|
+
|
|
574
|
+
parent: Optional[str] = None
|
|
575
|
+
"""
|
|
576
|
+
A unique identifier for a legal person in the format `per:{index}` where
|
|
577
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
578
|
+
"""
|
|
579
|
+
|
|
580
|
+
residence: Optional[str] = None
|
|
581
|
+
"""
|
|
582
|
+
A unique identifier for a location in the format `loc:{index}` where `{index}`
|
|
583
|
+
is a non-negative incrementing integer starting from zero.
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
role: Literal[
|
|
587
|
+
"plaintiff",
|
|
588
|
+
"petitioner",
|
|
589
|
+
"applicant",
|
|
590
|
+
"appellant",
|
|
591
|
+
"appellee",
|
|
592
|
+
"claimant",
|
|
593
|
+
"complainant",
|
|
594
|
+
"defendant",
|
|
595
|
+
"respondent",
|
|
596
|
+
"prior_authority",
|
|
597
|
+
"prosecutor",
|
|
598
|
+
"defense_counsel",
|
|
599
|
+
"amicus",
|
|
600
|
+
"intervener",
|
|
601
|
+
"borrower",
|
|
602
|
+
"lender",
|
|
603
|
+
"guarantor",
|
|
604
|
+
"lessee",
|
|
605
|
+
"lessor",
|
|
606
|
+
"employer",
|
|
607
|
+
"employee",
|
|
608
|
+
"licensor",
|
|
609
|
+
"licensee",
|
|
610
|
+
"franchisor",
|
|
611
|
+
"franchisee",
|
|
612
|
+
"buyer",
|
|
613
|
+
"seller",
|
|
614
|
+
"contractor",
|
|
615
|
+
"shareholder",
|
|
616
|
+
"joint_venturer",
|
|
617
|
+
"investor",
|
|
618
|
+
"insurer",
|
|
619
|
+
"insured",
|
|
620
|
+
"enacting_authority",
|
|
621
|
+
"empowered_authority",
|
|
622
|
+
"settlor",
|
|
623
|
+
"trustee",
|
|
624
|
+
"beneficiary",
|
|
625
|
+
"debater",
|
|
626
|
+
"director",
|
|
627
|
+
"governing_jurisdiction",
|
|
628
|
+
"clerk",
|
|
629
|
+
"witness",
|
|
630
|
+
"other",
|
|
631
|
+
"non_party",
|
|
632
|
+
]
|
|
633
|
+
"""The role of the person in relation to the subject of the document.
|
|
634
|
+
|
|
635
|
+
The following roles are currently supported: | | | | ------------------------ |
|
|
636
|
+
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|
|
637
|
+
| | `plaintiff` | A party initiating the case that is the subject of the
|
|
638
|
+
document. | | `petitioner` | A party initiating the petition that is the subject
|
|
639
|
+
of the document. | | `applicant` | A party initiating the application that is
|
|
640
|
+
the subject of the document. | | `appellant` | A party appealing the decision
|
|
641
|
+
that is the subject of the document. | | `appellee` | A party responding to the
|
|
642
|
+
appeal that is the subject of the document if they are explicitly referred to as
|
|
643
|
+
an 'appellee'. | | `claimant` | A party making a claim in the case that is the
|
|
644
|
+
subject of the document. | | `complainant` | A party making a complaint in the
|
|
645
|
+
case that is the subject of the document. | | `defendant` | A party defending
|
|
646
|
+
against the case that is the subject of the document. | | `respondent` | A party
|
|
647
|
+
responding to the petition, appeal, or application that is the subject of the
|
|
648
|
+
document. | | `prior_authority` | An authority (e.g., judge, tribunal, court)
|
|
649
|
+
that made a prior decision in the case that is the subject of the document. Both
|
|
650
|
+
individual judges and courts should be annotated with this role where
|
|
651
|
+
applicable. This is not to be used for authorities cited as precedent, only for
|
|
652
|
+
those that made prior decisions in the same case. | | `prosecutor` | A lawyer
|
|
653
|
+
prosecuting the case that is the subject of the document. | | `defense_counsel`
|
|
654
|
+
| A lawyer defending the case that is the subject of the document. | | `amicus`
|
|
655
|
+
| A party filing an amicus curiae brief in the case that is the subject of the
|
|
656
|
+
document. | | `intervener` | A party attempting to or that has intervened in the
|
|
657
|
+
case that is the subject of the document. | | `borrower` | A party borrowing
|
|
658
|
+
money or other assets under the agreement that is the subject of the document,
|
|
659
|
+
including 'mortgagors' and 'debtors'. | | `lender` | A party lending money or
|
|
660
|
+
other assets under the agreement that is the subject of the document, including
|
|
661
|
+
'mortgagees' and 'creditors'. | | `guarantor` | A party guaranteeing obligations
|
|
662
|
+
under the agreement that is the subject of the document, including 'sureties'. |
|
|
663
|
+
| `lessee` | A party leasing goods or services under the agreement that is the
|
|
664
|
+
subject of the document, including 'tenants'. | | `lessor` | A party leasing
|
|
665
|
+
goods or services under the agreement that is the subject of the document,
|
|
666
|
+
including 'landlords'. | | `employer` | A party employing personnel under the
|
|
667
|
+
agreement that is the subject of the document. | | `employee` | A party employed
|
|
668
|
+
under the agreement that is the subject of the document. | | `licensor` | A
|
|
669
|
+
party licensing intellectual property or other rights under the agreement that
|
|
670
|
+
are the subject of the document. | | `licensee` | A party licensed to use
|
|
671
|
+
intellectual property or other rights under the agreement that are the subject
|
|
672
|
+
of the document. | | `franchisor` | A party granting a franchise under the
|
|
673
|
+
agreement that is the subject of the document. | | `franchisee` | A party
|
|
674
|
+
granted a franchise under the agreement that is the subject of the document. | |
|
|
675
|
+
`buyer` | A party purchasing goods or services under the agreement that is the
|
|
676
|
+
subject of the document, including 'purchasers', 'customers', and 'clients'. | |
|
|
677
|
+
`seller` | A party selling or providing goods or services under the agreement
|
|
678
|
+
that is the subject of the document, including 'Vendors', 'Suppliers', and
|
|
679
|
+
'Service Providers' (where such parties are actually providing goods or services
|
|
680
|
+
under the agreement). | | `contractor` | A party contracted to perform work or
|
|
681
|
+
services under the agreement that is the subject of the document, including
|
|
682
|
+
'consultants'. | | `shareholder` | A party holding shares or equity under the
|
|
683
|
+
agreement that is the subject of the document. | | `joint_venturer` | A party
|
|
684
|
+
participating in a joint venture under the agreement that is the subject of the
|
|
685
|
+
document. | | `investor` | A party investing money or assets under the agreement
|
|
686
|
+
that is the subject of the document. | | `insurer` | A party providing insurance
|
|
687
|
+
under the agreement that is the subject of the document. | | `insured` | A party
|
|
688
|
+
insured under the agreement that is the subject of the document. | | `settlor` |
|
|
689
|
+
A party establishing the trust that is the subject of the document. | |
|
|
690
|
+
`trustee` | A party managing the trust that is the subject of the document. | |
|
|
691
|
+
`beneficiary` | A party benefiting from the trust that is the subject of the
|
|
692
|
+
document. | | `enacting_authority` | An authority (e.g., legislature, regulator,
|
|
693
|
+
Minister/Secretary, President/Prime Minister, tribunal, court, judge) giving
|
|
694
|
+
legal effect to or authorizing the document. All relevant individuals and bodies
|
|
695
|
+
should be annotated with this role where applicable. | | `empowered_authority` |
|
|
696
|
+
An authority (e.g., government agency, regulator, Minister/Secretary,
|
|
697
|
+
President/Prime Minister, tribunal, court) empowered by the document to carry
|
|
698
|
+
out functions or duties. | | `debater` | A person participating in the debate
|
|
699
|
+
that is the subject of the document. | | `governing_jurisdiction` | The
|
|
700
|
+
jurisdiction whose laws govern the document. | | `director` | A director or
|
|
701
|
+
other officer of a corporate legal person mentioned in the document. | | `clerk`
|
|
702
|
+
| A clerk, notary, or other official certifying, witnessing, filing, recording,
|
|
703
|
+
registering, or otherwise administering the document. | | `witness` | A witness
|
|
704
|
+
witnessing the signing of the document, or whose testimony is part of the case
|
|
705
|
+
that is the subject of the document. | | `other` | A party to the case,
|
|
706
|
+
agreement, legislation, or regulation that is the subject of the document that
|
|
707
|
+
does not fit into any of the other roles. | | `non_party` | A legal person
|
|
708
|
+
mentioned in the document that is not a party to the case, agreement,
|
|
709
|
+
legislation, or regulation that is the subject of the document. |
|
|
710
|
+
"""
|
|
711
|
+
|
|
712
|
+
type: Literal["natural", "corporate", "politic"]
|
|
713
|
+
"""
|
|
714
|
+
The legal entity type of the person, being one of `natural`, `corporate`, or
|
|
715
|
+
`politic`.
|
|
716
|
+
|
|
717
|
+
`natural` denotes a human being in their capacity as a natural legal person,
|
|
718
|
+
including when representing unincorporated entities such as partnerships and
|
|
719
|
+
trusts.
|
|
720
|
+
|
|
721
|
+
`corporate` denotes a body corporate such as a company, incorporated
|
|
722
|
+
partnership, or statutory corporation.
|
|
723
|
+
|
|
724
|
+
`politic` denotes a body politic such as a court, state, government, or
|
|
725
|
+
intergovernmental organization.
|
|
726
|
+
"""
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
class ResultDocumentPhoneNumberMention(BaseModel):
|
|
730
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
731
|
+
|
|
732
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
733
|
+
|
|
734
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
735
|
+
|
|
736
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
737
|
+
"""
|
|
738
|
+
|
|
739
|
+
end: int
|
|
740
|
+
"""
|
|
741
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
742
|
+
Unicode code points in the input text.
|
|
743
|
+
"""
|
|
744
|
+
|
|
745
|
+
start: int
|
|
746
|
+
"""
|
|
747
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
748
|
+
input text.
|
|
749
|
+
"""
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
class ResultDocumentPhoneNumber(BaseModel):
|
|
753
|
+
"""A valid phone number identified in a document belonging to a legal person.
|
|
754
|
+
|
|
755
|
+
If a phone number was mentioned in the document but is not valid, possible, or attributable to a legal person, it will not be extracted.
|
|
756
|
+
"""
|
|
757
|
+
|
|
758
|
+
mentions: List[ResultDocumentPhoneNumberMention]
|
|
759
|
+
"""
|
|
760
|
+
An array of one or more spans within the document's text where the phone number
|
|
761
|
+
is mentioned.
|
|
762
|
+
"""
|
|
763
|
+
|
|
764
|
+
number: str
|
|
765
|
+
"""
|
|
766
|
+
The normalized phone number in E.123 international notation conforming with
|
|
767
|
+
local conventions on the use of spaces and hyphens as separators.
|
|
768
|
+
"""
|
|
769
|
+
|
|
770
|
+
person: str
|
|
771
|
+
"""The unique identifier of the person that this phone number belongs to."""
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
class ResultDocumentQuoteSpan(BaseModel):
|
|
775
|
+
"""The span of the segment within the document's text."""
|
|
776
|
+
|
|
777
|
+
end: int
|
|
778
|
+
"""
|
|
779
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
780
|
+
Unicode code points in the input text.
|
|
781
|
+
"""
|
|
782
|
+
|
|
783
|
+
start: int
|
|
784
|
+
"""
|
|
785
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
786
|
+
input text.
|
|
787
|
+
"""
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
class ResultDocumentQuote(BaseModel):
|
|
791
|
+
"""A quotation within a document."""
|
|
792
|
+
|
|
793
|
+
amending: bool
|
|
794
|
+
"""
|
|
795
|
+
Whether the quote is being used to amend or modify content, typically in other
|
|
796
|
+
documents.
|
|
797
|
+
"""
|
|
798
|
+
|
|
799
|
+
source_document: Optional[str] = None
|
|
800
|
+
"""
|
|
801
|
+
A unique identifier for an external document in the format `exd:{index}` where
|
|
802
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
803
|
+
"""
|
|
804
|
+
|
|
805
|
+
source_person: Optional[str] = None
|
|
806
|
+
"""
|
|
807
|
+
A unique identifier for a legal person in the format `per:{index}` where
|
|
808
|
+
`{index}` is a non-negative incrementing integer starting from zero.
|
|
809
|
+
"""
|
|
810
|
+
|
|
811
|
+
source_segment: Optional[str] = None
|
|
812
|
+
"""
|
|
813
|
+
A unique identifier for a segment in the format `seg:{index}` where `{index}` is
|
|
814
|
+
a non-negative incrementing integer starting from zero.
|
|
815
|
+
"""
|
|
816
|
+
|
|
817
|
+
span: ResultDocumentQuoteSpan
|
|
818
|
+
"""The span of the segment within the document's text."""
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
class ResultDocumentSegmentCode(BaseModel):
|
|
822
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
823
|
+
|
|
824
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
825
|
+
|
|
826
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
827
|
+
|
|
828
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
829
|
+
"""
|
|
830
|
+
|
|
831
|
+
end: int
|
|
832
|
+
"""
|
|
833
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
834
|
+
Unicode code points in the input text.
|
|
835
|
+
"""
|
|
836
|
+
|
|
837
|
+
start: int
|
|
838
|
+
"""
|
|
839
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
840
|
+
input text.
|
|
841
|
+
"""
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
class ResultDocumentSegmentSpan(BaseModel):
|
|
845
|
+
"""The span of the segment within the document's text."""
|
|
846
|
+
|
|
847
|
+
end: int
|
|
848
|
+
"""
|
|
849
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
850
|
+
Unicode code points in the input text.
|
|
851
|
+
"""
|
|
852
|
+
|
|
853
|
+
start: int
|
|
854
|
+
"""
|
|
855
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
856
|
+
input text.
|
|
857
|
+
"""
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
class ResultDocumentSegmentTitle(BaseModel):
|
|
861
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
862
|
+
|
|
863
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
864
|
+
|
|
865
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
866
|
+
|
|
867
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
868
|
+
"""
|
|
869
|
+
|
|
870
|
+
end: int
|
|
871
|
+
"""
|
|
872
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
873
|
+
Unicode code points in the input text.
|
|
874
|
+
"""
|
|
875
|
+
|
|
876
|
+
start: int
|
|
877
|
+
"""
|
|
878
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
879
|
+
input text.
|
|
880
|
+
"""
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
class ResultDocumentSegmentTypeName(BaseModel):
|
|
884
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
885
|
+
|
|
886
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
887
|
+
|
|
888
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
889
|
+
|
|
890
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
891
|
+
"""
|
|
892
|
+
|
|
893
|
+
end: int
|
|
894
|
+
"""
|
|
895
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
896
|
+
Unicode code points in the input text.
|
|
897
|
+
"""
|
|
898
|
+
|
|
899
|
+
start: int
|
|
900
|
+
"""
|
|
901
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
902
|
+
input text.
|
|
903
|
+
"""
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
class ResultDocumentSegment(BaseModel):
|
|
907
|
+
"""
|
|
908
|
+
A segment within the document representing a structurally distinct portion of the document's content.
|
|
909
|
+
"""
|
|
910
|
+
|
|
911
|
+
id: str
|
|
912
|
+
"""
|
|
913
|
+
The unique identifier of the segment in the format `seg:{index}` where `{index}`
|
|
914
|
+
is a non-negative incrementing integer starting from zero.
|
|
915
|
+
"""
|
|
916
|
+
|
|
917
|
+
category: Literal["front_matter", "scope", "main", "annotation", "back_matter", "other"]
|
|
918
|
+
"""
|
|
919
|
+
The functional 'category' of the segment within the document, being one of
|
|
920
|
+
`front_matter`, `scope`, `main`, `annotation`, `back_matter`, or `other`.
|
|
921
|
+
|
|
922
|
+
`front_matter` denotes non-operative contextualizing content occurring at the
|
|
923
|
+
start of a document such as a preamble or recitals.
|
|
924
|
+
|
|
925
|
+
`scope` denotes operative content defining the application or interpretation of
|
|
926
|
+
a document such as definition sections and governing law clauses.
|
|
927
|
+
|
|
928
|
+
`main` denotes operative, non-scopal content.
|
|
929
|
+
|
|
930
|
+
`annotation` denotes non-operative annotative content providing explanatory or
|
|
931
|
+
referential information such as commentary, footnotes, and endnotes.
|
|
932
|
+
|
|
933
|
+
`back_matter` denotes non-operative contextualizing content occurring at the end
|
|
934
|
+
of a document such as authority statements.
|
|
935
|
+
|
|
936
|
+
`other` denotes content that does not fit into any of the other categories.
|
|
937
|
+
"""
|
|
938
|
+
|
|
939
|
+
code: Optional[ResultDocumentSegmentCode] = None
|
|
940
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
941
|
+
|
|
942
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
943
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
944
|
+
wholly nested. Spans of the exact same type (e.g., segments) will never be
|
|
945
|
+
duplicated.
|
|
946
|
+
|
|
947
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
948
|
+
|
|
949
|
+
Note that, when using programming languages other than Python (which uses
|
|
950
|
+
zero-based, half-open, Unicode code point-spaced string indexing), indices may
|
|
951
|
+
need to be translated accordingly (for example, JavaScript slices into UTF-16
|
|
952
|
+
code units instead of Unicode code points).
|
|
953
|
+
"""
|
|
954
|
+
|
|
955
|
+
kind: Literal["container", "unit", "item", "figure"]
|
|
956
|
+
"""
|
|
957
|
+
The structural 'kind' of the segment, being one of `container`, `unit`, `item`,
|
|
958
|
+
or `figure`.
|
|
959
|
+
|
|
960
|
+
A `container` is a structural or semantic grouping of content such as a chapter.
|
|
961
|
+
It can contain segments of any kind or none at all.
|
|
962
|
+
|
|
963
|
+
A `unit` is a single syntactically independent unit of text such as a paragraph.
|
|
964
|
+
It can only contain `item`s and `figure`s.
|
|
965
|
+
|
|
966
|
+
An `item` is a syntactically subordinate unit of text such as an item in a
|
|
967
|
+
run-in list. It can only contain other `item`s. Note that an `item` is
|
|
968
|
+
conceptually distinct from a list item—it is perfectly possible to encounter
|
|
969
|
+
list items that are syntactically independent of their surrounding items just as
|
|
970
|
+
it is possible to encounter dependent clauses that do not appear as part of a
|
|
971
|
+
list.
|
|
972
|
+
|
|
973
|
+
A `figure` is a visually structured or tabular unit of content such as a
|
|
974
|
+
diagram, equation, or table. It cannot contain segments.
|
|
975
|
+
"""
|
|
976
|
+
|
|
977
|
+
parent: Optional[str] = None
|
|
978
|
+
"""
|
|
979
|
+
A unique identifier for a segment in the format `seg:{index}` where `{index}` is
|
|
980
|
+
a non-negative incrementing integer starting from zero.
|
|
981
|
+
"""
|
|
982
|
+
|
|
983
|
+
span: ResultDocumentSegmentSpan
|
|
984
|
+
"""The span of the segment within the document's text."""
|
|
985
|
+
|
|
986
|
+
title: Optional[ResultDocumentSegmentTitle] = None
|
|
987
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
988
|
+
|
|
989
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
990
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
991
|
+
wholly nested. Spans of the exact same type (e.g., segments) will never be
|
|
992
|
+
duplicated.
|
|
993
|
+
|
|
994
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
995
|
+
|
|
996
|
+
Note that, when using programming languages other than Python (which uses
|
|
997
|
+
zero-based, half-open, Unicode code point-spaced string indexing), indices may
|
|
998
|
+
need to be translated accordingly (for example, JavaScript slices into UTF-16
|
|
999
|
+
code units instead of Unicode code points).
|
|
1000
|
+
"""
|
|
1001
|
+
|
|
1002
|
+
type: Optional[
|
|
1003
|
+
Literal[
|
|
1004
|
+
"title",
|
|
1005
|
+
"book",
|
|
1006
|
+
"part",
|
|
1007
|
+
"chapter",
|
|
1008
|
+
"subchapter",
|
|
1009
|
+
"division",
|
|
1010
|
+
"subdivision",
|
|
1011
|
+
"subpart",
|
|
1012
|
+
"subtitle",
|
|
1013
|
+
"table_of_contents",
|
|
1014
|
+
"article",
|
|
1015
|
+
"section",
|
|
1016
|
+
"regulation",
|
|
1017
|
+
"rule",
|
|
1018
|
+
"clause",
|
|
1019
|
+
"paragraph",
|
|
1020
|
+
"subarticle",
|
|
1021
|
+
"subsection",
|
|
1022
|
+
"subregulation",
|
|
1023
|
+
"subrule",
|
|
1024
|
+
"subclause",
|
|
1025
|
+
"subparagraph",
|
|
1026
|
+
"item",
|
|
1027
|
+
"subitem",
|
|
1028
|
+
"point",
|
|
1029
|
+
"indent",
|
|
1030
|
+
"schedule",
|
|
1031
|
+
"annex",
|
|
1032
|
+
"appendix",
|
|
1033
|
+
"exhibit",
|
|
1034
|
+
"recital",
|
|
1035
|
+
"signature",
|
|
1036
|
+
"note",
|
|
1037
|
+
"figure",
|
|
1038
|
+
"table",
|
|
1039
|
+
"formula",
|
|
1040
|
+
]
|
|
1041
|
+
] = None
|
|
1042
|
+
"""
|
|
1043
|
+
The addressable 'type' of the segment within the document's referential scheme
|
|
1044
|
+
and hierarchy, whether defined explicitly (e.g., by headings, such as
|
|
1045
|
+
'Section 2. Definitions'), implicitly (e.g., by way of reference, such as 'as
|
|
1046
|
+
defined in Section 2'), or by convention (e.g., [42] in a judgment often denotes
|
|
1047
|
+
a `paragraph`, independent provisions in statute are often `section`s, etc.). If
|
|
1048
|
+
the type is not known or not applicable, it will be set to `null`.
|
|
1049
|
+
|
|
1050
|
+
Note that, although many segment types may coincide with syntactic constructs,
|
|
1051
|
+
they should be thought of purely as distinct formal citable units. Most
|
|
1052
|
+
paragraphs (in the syntactic sense) will not have the `paragraph` type, for
|
|
1053
|
+
example. That type is reserved for segments that would formally be cited as a
|
|
1054
|
+
'Paragraph' within the document's referential scheme.
|
|
1055
|
+
|
|
1056
|
+
The following types are currently supported: `title`, `book`, `part`, `chapter`,
|
|
1057
|
+
`subchapter`, `division`, `subdivision`, `subpart`, `subtitle`,
|
|
1058
|
+
`table_of_contents`, `article`, `section`, `regulation`, `rule`, `clause`,
|
|
1059
|
+
`paragraph`, `subarticle`, `subsection`, `subregulation`, `subrule`,
|
|
1060
|
+
`subclause`, `subparagraph`, `item`, `subitem`, `point`, `indent`, `schedule`,
|
|
1061
|
+
`annex`, `appendix`, `exhibit`, `recital`, `signature`, `note`, `figure`,
|
|
1062
|
+
`table`, and `formula`.
|
|
1063
|
+
|
|
1064
|
+
The `title`, `book`, `part`, `chapter`, `subchapter`, `division`, `subdivision`,
|
|
1065
|
+
`subpart`, `subtitle`, and `table_of_contents` types are exclusive to the
|
|
1066
|
+
`container` kind.
|
|
1067
|
+
|
|
1068
|
+
The `figure` kind only supports the `figure`, `table`, and `formula` types, all
|
|
1069
|
+
of which are exclusive to it.
|
|
1070
|
+
"""
|
|
1071
|
+
|
|
1072
|
+
type_name: Optional[ResultDocumentSegmentTypeName] = None
|
|
1073
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1074
|
+
|
|
1075
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
1076
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
1077
|
+
wholly nested. Spans of the exact same type (e.g., segments) will never be
|
|
1078
|
+
duplicated.
|
|
1079
|
+
|
|
1080
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1081
|
+
|
|
1082
|
+
Note that, when using programming languages other than Python (which uses
|
|
1083
|
+
zero-based, half-open, Unicode code point-spaced string indexing), indices may
|
|
1084
|
+
need to be translated accordingly (for example, JavaScript slices into UTF-16
|
|
1085
|
+
code units instead of Unicode code points).
|
|
1086
|
+
"""
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
class ResultDocumentSubtitle(BaseModel):
|
|
1090
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1091
|
+
|
|
1092
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
1093
|
+
|
|
1094
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1095
|
+
|
|
1096
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
1097
|
+
"""
|
|
1098
|
+
|
|
1099
|
+
end: int
|
|
1100
|
+
"""
|
|
1101
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1102
|
+
Unicode code points in the input text.
|
|
1103
|
+
"""
|
|
1104
|
+
|
|
1105
|
+
start: int
|
|
1106
|
+
"""
|
|
1107
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1108
|
+
input text.
|
|
1109
|
+
"""
|
|
1110
|
+
|
|
1111
|
+
|
|
1112
|
+
class ResultDocumentTermMeaning(BaseModel):
|
|
1113
|
+
"""The span of the segment within the document's text."""
|
|
1114
|
+
|
|
1115
|
+
end: int
|
|
1116
|
+
"""
|
|
1117
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1118
|
+
Unicode code points in the input text.
|
|
1119
|
+
"""
|
|
1120
|
+
|
|
1121
|
+
start: int
|
|
1122
|
+
"""
|
|
1123
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1124
|
+
input text.
|
|
1125
|
+
"""
|
|
1126
|
+
|
|
1127
|
+
|
|
1128
|
+
class ResultDocumentTermMention(BaseModel):
|
|
1129
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1130
|
+
|
|
1131
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
1132
|
+
|
|
1133
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1134
|
+
|
|
1135
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
1136
|
+
"""
|
|
1137
|
+
|
|
1138
|
+
end: int
|
|
1139
|
+
"""
|
|
1140
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1141
|
+
Unicode code points in the input text.
|
|
1142
|
+
"""
|
|
1143
|
+
|
|
1144
|
+
start: int
|
|
1145
|
+
"""
|
|
1146
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1147
|
+
input text.
|
|
1148
|
+
"""
|
|
1149
|
+
|
|
1150
|
+
|
|
1151
|
+
class ResultDocumentTermName(BaseModel):
|
|
1152
|
+
"""The span of the segment within the document's text."""
|
|
1153
|
+
|
|
1154
|
+
end: int
|
|
1155
|
+
"""
|
|
1156
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1157
|
+
Unicode code points in the input text.
|
|
1158
|
+
"""
|
|
1159
|
+
|
|
1160
|
+
start: int
|
|
1161
|
+
"""
|
|
1162
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1163
|
+
input text.
|
|
1164
|
+
"""
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
class ResultDocumentTerm(BaseModel):
|
|
1168
|
+
"""A term assigned a definite meaning within a document."""
|
|
1169
|
+
|
|
1170
|
+
id: str
|
|
1171
|
+
"""
|
|
1172
|
+
The unique identifier of the term in the format `term:{index}` where `{index}`
|
|
1173
|
+
is a non-negative incrementing integer starting from zero.
|
|
1174
|
+
"""
|
|
1175
|
+
|
|
1176
|
+
meaning: ResultDocumentTermMeaning
|
|
1177
|
+
"""The span of the segment within the document's text."""
|
|
1178
|
+
|
|
1179
|
+
mentions: List[ResultDocumentTermMention]
|
|
1180
|
+
"""
|
|
1181
|
+
An array of spans within the document's text where the term is mentioned outside
|
|
1182
|
+
of its definition.
|
|
1183
|
+
|
|
1184
|
+
It is possible for the term to have no mentions if, outside of its definition,
|
|
1185
|
+
it is never referred to in the document.
|
|
1186
|
+
"""
|
|
1187
|
+
|
|
1188
|
+
name: ResultDocumentTermName
|
|
1189
|
+
"""The span of the segment within the document's text."""
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
class ResultDocumentTitle(BaseModel):
|
|
1193
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1194
|
+
|
|
1195
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
1196
|
+
|
|
1197
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1198
|
+
|
|
1199
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
1200
|
+
"""
|
|
1201
|
+
|
|
1202
|
+
end: int
|
|
1203
|
+
"""
|
|
1204
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1205
|
+
Unicode code points in the input text.
|
|
1206
|
+
"""
|
|
1207
|
+
|
|
1208
|
+
start: int
|
|
1209
|
+
"""
|
|
1210
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1211
|
+
input text.
|
|
1212
|
+
"""
|
|
1213
|
+
|
|
1214
|
+
|
|
1215
|
+
class ResultDocumentWebsiteMention(BaseModel):
|
|
1216
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1217
|
+
|
|
1218
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible for any two spans to partially overlap; they can only be disjoint, adjacent, or wholly nested. Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
1219
|
+
|
|
1220
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1221
|
+
|
|
1222
|
+
Note that, when using programming languages other than Python (which uses zero-based, half-open, Unicode code point-spaced string indexing), indices may need to be translated accordingly (for example, JavaScript slices into UTF-16 code units instead of Unicode code points).
|
|
1223
|
+
"""
|
|
1224
|
+
|
|
1225
|
+
end: int
|
|
1226
|
+
"""
|
|
1227
|
+
The zero-based end index of the half-open span (i.e., the end is exclusive) of
|
|
1228
|
+
Unicode code points in the input text.
|
|
1229
|
+
"""
|
|
1230
|
+
|
|
1231
|
+
start: int
|
|
1232
|
+
"""
|
|
1233
|
+
The zero-based start index of the half-open span of Unicode code points in the
|
|
1234
|
+
input text.
|
|
1235
|
+
"""
|
|
1236
|
+
|
|
1237
|
+
|
|
1238
|
+
class ResultDocumentWebsite(BaseModel):
|
|
1239
|
+
"""A website identified in a document belonging to a legal person.
|
|
1240
|
+
|
|
1241
|
+
If a website was mentioned in the document but is not attributable to a legal person, it will not be extracted.
|
|
1242
|
+
"""
|
|
1243
|
+
|
|
1244
|
+
mentions: List[ResultDocumentWebsiteMention]
|
|
1245
|
+
"""
|
|
1246
|
+
An array of one or more spans within the document's text where the website is
|
|
1247
|
+
mentioned (including paths and slugs which are not part of the website's
|
|
1248
|
+
normalized URL).
|
|
1249
|
+
"""
|
|
1250
|
+
|
|
1251
|
+
person: str
|
|
1252
|
+
"""The unique identifier of the person that this website belongs to."""
|
|
1253
|
+
|
|
1254
|
+
url: str
|
|
1255
|
+
"""The normalized URL of the website in the form `https://{host}/`."""
|
|
1256
|
+
|
|
1257
|
+
|
|
1258
|
+
class ResultDocument(BaseModel):
|
|
1259
|
+
"""The enriched document."""
|
|
1260
|
+
|
|
1261
|
+
crossreferences: List[ResultDocumentCrossreference]
|
|
1262
|
+
"""
|
|
1263
|
+
An array of cross-references within the document pointing to a single segment or
|
|
1264
|
+
a span of segments.
|
|
1265
|
+
"""
|
|
1266
|
+
|
|
1267
|
+
dates: List[ResultDocumentDate]
|
|
1268
|
+
"""
|
|
1269
|
+
An array of dates identified in the document belonging to one of the following
|
|
1270
|
+
types: `creation`, `signature`, `effective`, `expiry`, `delivery`, `renewal`,
|
|
1271
|
+
`payment`, `birth`, or `death`.
|
|
1272
|
+
|
|
1273
|
+
Only Gregorian dates between the years 1000 and 9999 (inclusive) fitting into
|
|
1274
|
+
one of the supported date types are extractable.
|
|
1275
|
+
"""
|
|
1276
|
+
|
|
1277
|
+
emails: List[ResultDocumentEmail]
|
|
1278
|
+
"""
|
|
1279
|
+
An array of email addresses identified in the document belonging to legal
|
|
1280
|
+
persons.
|
|
1281
|
+
|
|
1282
|
+
Email addresses mentioned in the document that are not attributable to legal
|
|
1283
|
+
persons will not be extracted.
|
|
1284
|
+
"""
|
|
1285
|
+
|
|
1286
|
+
external_documents: List[ResultDocumentExternalDocument]
|
|
1287
|
+
"""An array of documents identified within the document."""
|
|
1288
|
+
|
|
1289
|
+
headings: List[ResultDocumentHeading]
|
|
1290
|
+
"""An array of spans within the document's text constituting headings."""
|
|
1291
|
+
|
|
1292
|
+
id_numbers: List[ResultDocumentIDNumber]
|
|
1293
|
+
"""
|
|
1294
|
+
An array of identification numbers identified in the document belonging to legal
|
|
1295
|
+
persons.
|
|
1296
|
+
|
|
1297
|
+
Identification numbers mentioned in the document that are not attributable to
|
|
1298
|
+
legal persons will not be extracted.
|
|
1299
|
+
"""
|
|
1300
|
+
|
|
1301
|
+
junk: List[ResultDocumentJunk]
|
|
1302
|
+
"""
|
|
1303
|
+
An array of spans within the document's text constituting non-operative,
|
|
1304
|
+
non-substantive 'junk' content such as headers, footers, page numbers, and OCR
|
|
1305
|
+
artifacts.
|
|
1306
|
+
"""
|
|
1307
|
+
|
|
1308
|
+
jurisdiction: Optional[str] = None
|
|
1309
|
+
"""
|
|
1310
|
+
A jurisdiction code representing a country (via an initial country code) and,
|
|
1311
|
+
optionally, a subdivision within that country (via a subsequent subdivision code
|
|
1312
|
+
prefixed by a hyphen).
|
|
1313
|
+
|
|
1314
|
+
All 249 ISO 3166-1 alpha-2 country codes are representable in addition to
|
|
1315
|
+
special `INT` and `EU` codes for international and European Union law,
|
|
1316
|
+
respectively.
|
|
1317
|
+
|
|
1318
|
+
All 5,046 ISO 3166-2 codes are also representable in addition to a special `FED`
|
|
1319
|
+
code for federal law.
|
|
1320
|
+
"""
|
|
1321
|
+
|
|
1322
|
+
locations: List[ResultDocumentLocation]
|
|
1323
|
+
"""An array of locations identified in the document."""
|
|
1324
|
+
|
|
1325
|
+
persons: List[ResultDocumentPerson]
|
|
1326
|
+
"""An array of legal persons identified in the document."""
|
|
1327
|
+
|
|
1328
|
+
phone_numbers: List[ResultDocumentPhoneNumber]
|
|
1329
|
+
"""
|
|
1330
|
+
An array of valid phone numbers identified in the document belonging to legal
|
|
1331
|
+
persons.
|
|
1332
|
+
|
|
1333
|
+
Phone numbers mentioned in the document that are not valid, possible, or
|
|
1334
|
+
attributable to legal persons will not be extracted.
|
|
1335
|
+
"""
|
|
1336
|
+
|
|
1337
|
+
quotes: List[ResultDocumentQuote]
|
|
1338
|
+
"""An array of quotations within the document."""
|
|
1339
|
+
|
|
1340
|
+
segments: List[ResultDocumentSegment]
|
|
1341
|
+
"""
|
|
1342
|
+
An array of segments within the document representing structurally distinct
|
|
1343
|
+
portions of its content.
|
|
1344
|
+
"""
|
|
1345
|
+
|
|
1346
|
+
subtitle: Optional[ResultDocumentSubtitle] = None
|
|
1347
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1348
|
+
|
|
1349
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
1350
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
1351
|
+
wholly nested. Spans of the exact same type (e.g., segments) will never be
|
|
1352
|
+
duplicated.
|
|
1353
|
+
|
|
1354
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1355
|
+
|
|
1356
|
+
Note that, when using programming languages other than Python (which uses
|
|
1357
|
+
zero-based, half-open, Unicode code point-spaced string indexing), indices may
|
|
1358
|
+
need to be translated accordingly (for example, JavaScript slices into UTF-16
|
|
1359
|
+
code units instead of Unicode code points).
|
|
1360
|
+
"""
|
|
1361
|
+
|
|
1362
|
+
terms: List[ResultDocumentTerm]
|
|
1363
|
+
"""An array of terms assigned definite meanings within the document."""
|
|
1364
|
+
|
|
1365
|
+
title: Optional[ResultDocumentTitle] = None
|
|
1366
|
+
"""A zero-based, half-open span into the Unicode code point space of input text.
|
|
1367
|
+
|
|
1368
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
1369
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
1370
|
+
wholly nested. Spans of the exact same type (e.g., segments) will never be
|
|
1371
|
+
duplicated.
|
|
1372
|
+
|
|
1373
|
+
A span cannot be empty and will never start or end at whitespace.
|
|
1374
|
+
|
|
1375
|
+
Note that, when using programming languages other than Python (which uses
|
|
1376
|
+
zero-based, half-open, Unicode code point-spaced string indexing), indices may
|
|
1377
|
+
need to be translated accordingly (for example, JavaScript slices into UTF-16
|
|
1378
|
+
code units instead of Unicode code points).
|
|
1379
|
+
"""
|
|
1380
|
+
|
|
1381
|
+
type: Literal["statute", "regulation", "decision", "contract", "other"]
|
|
1382
|
+
"""
|
|
1383
|
+
The type of the document, being one of `statute`, `regulation`, `decision`,
|
|
1384
|
+
`contract`, or `other`.
|
|
1385
|
+
|
|
1386
|
+
`statute` denotes primary legislation such as acts, bills, codes, and
|
|
1387
|
+
constitutions.
|
|
1388
|
+
|
|
1389
|
+
`regulation` denotes secondary legislation such as rules, statutory instruments,
|
|
1390
|
+
and ordinances.
|
|
1391
|
+
|
|
1392
|
+
`decision` denotes judicial or quasi-judicial decisions such as court judgments,
|
|
1393
|
+
judicial opinions, and tribunal rulings.
|
|
1394
|
+
|
|
1395
|
+
`other` is used for all other types of legal documents that do not fit into any
|
|
1396
|
+
of the predefined types.
|
|
1397
|
+
"""
|
|
1398
|
+
|
|
1399
|
+
version: Literal["ilgs@1"]
|
|
1400
|
+
|
|
1401
|
+
websites: List[ResultDocumentWebsite]
|
|
1402
|
+
"""An array of websites identified in the document belonging to legal persons.
|
|
1403
|
+
|
|
1404
|
+
Websites mentioned in the document that are not attributable to legal persons
|
|
1405
|
+
will not be extracted.
|
|
1406
|
+
"""
|
|
1407
|
+
|
|
1408
|
+
|
|
1409
|
+
class Result(BaseModel):
|
|
1410
|
+
"""An enriched document alongside its index in the input array of texts."""
|
|
1411
|
+
|
|
1412
|
+
document: ResultDocument
|
|
1413
|
+
"""The enriched document."""
|
|
1414
|
+
|
|
1415
|
+
index: int
|
|
1416
|
+
"""
|
|
1417
|
+
The index of this document in the input array of texts, starting at `0` (and,
|
|
1418
|
+
therefore, ending at the number of inputs minus `1`).
|
|
1419
|
+
"""
|
|
1420
|
+
|
|
1421
|
+
|
|
1422
|
+
class Usage(BaseModel):
|
|
1423
|
+
"""Statistics about the usage of resources in the process of enriching the input."""
|
|
1424
|
+
|
|
1425
|
+
input_tokens: int
|
|
1426
|
+
"""The total number of tokens inputted to the model."""
|
|
1427
|
+
|
|
1428
|
+
|
|
1429
|
+
class EnrichmentResponse(BaseModel):
|
|
1430
|
+
results: List[Result]
|
|
1431
|
+
"""
|
|
1432
|
+
The input documents enriched into version 1.0.0 of the Isaacus Legal Graph
|
|
1433
|
+
Schema (IGLS).
|
|
1434
|
+
|
|
1435
|
+
All spans in an enriched document graph are indexed into the Unicode code point
|
|
1436
|
+
space of a source document. Access to source documents is thus required to
|
|
1437
|
+
resolve spans into text.
|
|
1438
|
+
|
|
1439
|
+
The start and end indices of spans are zero-based (i.e., the first Unicode code
|
|
1440
|
+
point in the document is at index 0) and half-open (i.e., the end index is
|
|
1441
|
+
exclusive).
|
|
1442
|
+
|
|
1443
|
+
All spans are globally laminar and well-nested similar to XML—it is impossible
|
|
1444
|
+
for any two spans to partially overlap; they can only be disjoint, adjacent, or
|
|
1445
|
+
wholly nested.
|
|
1446
|
+
|
|
1447
|
+
Spans of the exact same type (e.g., segments) will never be duplicated.
|
|
1448
|
+
|
|
1449
|
+
Spans cannot be empty and will never start or end at whitespace.
|
|
1450
|
+
|
|
1451
|
+
When using programming languages other than Python (which uses zero-based,
|
|
1452
|
+
half-open, Unicode code point-spaced string indexing), indices may need to be
|
|
1453
|
+
translated accordingly (for example, JavaScript slices into UTF-16 code units
|
|
1454
|
+
instead of Unicode code points).
|
|
1455
|
+
"""
|
|
1456
|
+
|
|
1457
|
+
usage: Usage
|
|
1458
|
+
"""Statistics about the usage of resources in the process of enriching the input."""
|