indexify 0.0.19__tar.gz → 0.0.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {indexify-0.0.19 → indexify-0.0.21}/PKG-INFO +2 -1
- {indexify-0.0.19 → indexify-0.0.21}/indexify/__init__.py +3 -2
- {indexify-0.0.19 → indexify-0.0.21}/indexify/client.py +80 -89
- indexify-0.0.21/indexify/extraction_policy.py +68 -0
- {indexify-0.0.19 → indexify-0.0.21}/indexify/extractor.py +6 -1
- indexify-0.0.21/indexify/settings.py +2 -0
- {indexify-0.0.19 → indexify-0.0.21}/pyproject.toml +2 -1
- indexify-0.0.19/indexify/extraction_policy.py +0 -28
- indexify-0.0.19/indexify/settings.py +0 -2
- {indexify-0.0.19 → indexify-0.0.21}/LICENSE.txt +0 -0
- {indexify-0.0.19 → indexify-0.0.21}/README.md +0 -0
- {indexify-0.0.19 → indexify-0.0.21}/indexify/data_containers.py +0 -0
- {indexify-0.0.19 → indexify-0.0.21}/indexify/exceptions.py +0 -0
- {indexify-0.0.19 → indexify-0.0.21}/indexify/index.py +0 -0
- {indexify-0.0.19 → indexify-0.0.21}/indexify/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: indexify
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.21
|
4
4
|
Summary: Python Client for Indexify
|
5
5
|
Home-page: https://github.com/tensorlakeai/indexify
|
6
6
|
License: Apache 2.0
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.11
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
15
15
|
Requires-Dist: httpx[http2] (>=0.26,<0.27)
|
16
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
16
17
|
Project-URL: Repository, https://github.com/tensorlakeai/indexify
|
17
18
|
Description-Content-Type: text/markdown
|
18
19
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from .index import Index
|
2
2
|
from .client import IndexifyClient
|
3
|
-
from .extraction_policy import ExtractionPolicy
|
3
|
+
from .extraction_policy import ExtractionPolicy, ExtractionGraphBuilder, ExtractionGraph
|
4
4
|
from .client import IndexifyClient, Document
|
5
5
|
from .settings import DEFAULT_SERVICE_URL
|
6
6
|
|
@@ -8,6 +8,7 @@ __all__ = [
|
|
8
8
|
"Index",
|
9
9
|
"Document",
|
10
10
|
"IndexifyClient",
|
11
|
-
"
|
11
|
+
"ExtractionGraph",
|
12
|
+
"ExtractionGraphBuilder" "ExtractionPolicy",
|
12
13
|
"DEFAULT_SERVICE_URL",
|
13
14
|
]
|
@@ -6,7 +6,7 @@ import json
|
|
6
6
|
from collections import namedtuple
|
7
7
|
from .settings import DEFAULT_SERVICE_URL
|
8
8
|
from .extractor import Extractor
|
9
|
-
from .extraction_policy import ExtractionPolicy
|
9
|
+
from .extraction_policy import ExtractionPolicy, ExtractionGraph
|
10
10
|
from .index import Index
|
11
11
|
from .utils import json_set_default
|
12
12
|
from .data_containers import TextChunk
|
@@ -19,6 +19,7 @@ Document = namedtuple("Document", ["text", "labels", "id"])
|
|
19
19
|
|
20
20
|
SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
|
21
21
|
|
22
|
+
|
22
23
|
@dataclass
|
23
24
|
class SqlQueryResult:
|
24
25
|
result: List[Dict]
|
@@ -45,22 +46,22 @@ class IndexifyClient:
|
|
45
46
|
|
46
47
|
def __init__(
|
47
48
|
self,
|
48
|
-
service_url: str = DEFAULT_SERVICE_URL,
|
49
|
+
service_url: str = DEFAULT_SERVICE_URL, # switch this to DEFAULT_SERVICE_URL_HTTPS for TLS
|
49
50
|
namespace: str = "default",
|
50
51
|
config_path: Optional[str] = None,
|
51
52
|
*args,
|
52
53
|
**kwargs,
|
53
54
|
):
|
54
55
|
if config_path:
|
55
|
-
with open(config_path,
|
56
|
+
with open(config_path, "r") as file:
|
56
57
|
config = yaml.safe_load(file)
|
57
|
-
|
58
|
-
if config.get(
|
59
|
-
tls_config = config[
|
58
|
+
|
59
|
+
if config.get("use_tls", False):
|
60
|
+
tls_config = config["tls_config"]
|
60
61
|
self._client = httpx.Client(
|
61
62
|
http2=True,
|
62
|
-
cert=(tls_config[
|
63
|
-
verify=tls_config.get(
|
63
|
+
cert=(tls_config["cert_path"], tls_config["key_path"]),
|
64
|
+
verify=tls_config.get("ca_bundle_path", True),
|
64
65
|
)
|
65
66
|
else:
|
66
67
|
self._client = httpx.Client(*args, **kwargs)
|
@@ -68,17 +69,18 @@ class IndexifyClient:
|
|
68
69
|
self._client = httpx.Client(*args, **kwargs)
|
69
70
|
|
70
71
|
self.namespace: str = namespace
|
71
|
-
self.
|
72
|
+
self.extraction_graphs: List[ExtractionGraph] = []
|
72
73
|
self.labels: dict = {}
|
73
74
|
self._service_url = service_url
|
75
|
+
self._timeout = kwargs.get("timeout")
|
74
76
|
|
75
77
|
# get namespace data
|
76
78
|
response = self.get(f"namespaces/{self.namespace}")
|
77
79
|
response.raise_for_status()
|
78
80
|
resp_json = response.json()
|
79
81
|
# initialize extraction_policies
|
80
|
-
for eb in resp_json["namespace"]["
|
81
|
-
self.
|
82
|
+
for eb in resp_json["namespace"]["extraction_graphs"]:
|
83
|
+
self.extraction_graphs.append(ExtractionGraph.from_dict(eb))
|
82
84
|
|
83
85
|
@classmethod
|
84
86
|
def with_mtls(
|
@@ -128,7 +130,7 @@ class IndexifyClient:
|
|
128
130
|
return client
|
129
131
|
|
130
132
|
def _request(self, method: str, **kwargs) -> httpx.Response:
|
131
|
-
response = self._client.request(method,timeout=
|
133
|
+
response = self._client.request(method, timeout=self._timeout, **kwargs)
|
132
134
|
try:
|
133
135
|
response.raise_for_status()
|
134
136
|
except httpx.HTTPStatusError as exc:
|
@@ -188,7 +190,7 @@ class IndexifyClient:
|
|
188
190
|
```
|
189
191
|
"""
|
190
192
|
return self._request("PUT", url=f"{self._service_url}/{endpoint}", **kwargs)
|
191
|
-
|
193
|
+
|
192
194
|
def delete(self, endpoint: str, **kwargs) -> httpx.Response:
|
193
195
|
"""
|
194
196
|
Make a DELETE request to the Indexify service.
|
@@ -243,9 +245,9 @@ class IndexifyClient:
|
|
243
245
|
def create_namespace(
|
244
246
|
self,
|
245
247
|
namespace: str,
|
246
|
-
|
248
|
+
extraction_graphs: list = [],
|
247
249
|
labels: dict = {},
|
248
|
-
service_url: str = DEFAULT_SERVICE_URL
|
250
|
+
service_url: str = DEFAULT_SERVICE_URL,
|
249
251
|
) -> "IndexifyClient":
|
250
252
|
"""
|
251
253
|
Create a new namespace.
|
@@ -253,16 +255,16 @@ class IndexifyClient:
|
|
253
255
|
Returns:
|
254
256
|
IndexifyClient: a new client with the given namespace
|
255
257
|
"""
|
256
|
-
|
257
|
-
for bd in
|
258
|
-
if isinstance(bd,
|
259
|
-
|
258
|
+
extraction_graphs = []
|
259
|
+
for bd in extraction_graphs:
|
260
|
+
if isinstance(bd, extraction_graphs):
|
261
|
+
extraction_graphs.append(bd.to_dict())
|
260
262
|
else:
|
261
|
-
|
263
|
+
extraction_graphs.append(bd)
|
262
264
|
|
263
265
|
req = {
|
264
266
|
"name": namespace,
|
265
|
-
"
|
267
|
+
"extraction_graphs": extraction_graphs,
|
266
268
|
"labels": labels,
|
267
269
|
}
|
268
270
|
|
@@ -318,57 +320,24 @@ class IndexifyClient:
|
|
318
320
|
self.extraction_policies.append(ExtractionPolicy.from_dict(eb))
|
319
321
|
return self.extraction_policies
|
320
322
|
|
321
|
-
def
|
322
|
-
|
323
|
-
|
324
|
-
name: str,
|
325
|
-
input_params: dict = {},
|
326
|
-
labels_eq: str = None,
|
327
|
-
content_source="ingestion",
|
328
|
-
) -> dict:
|
329
|
-
"""Add a new extraction policy.
|
323
|
+
def create_extraction_graph(self, extraction_graph: ExtractionGraph):
|
324
|
+
"""
|
325
|
+
Create a new extraction graph.
|
330
326
|
|
331
327
|
Args:
|
332
|
-
-
|
333
|
-
- name (str): Name for this instance
|
334
|
-
- input_params (dict): Dictionary containing extractor input params
|
335
|
-
- filter (Filter): Optional filter for this extractor
|
336
|
-
|
337
|
-
Returns:
|
338
|
-
dict: response payload
|
339
|
-
|
340
|
-
Examples:
|
341
|
-
>>> repo.add_extraction_policy("EfficientNet", "efficientnet")
|
342
|
-
|
343
|
-
>>> repo.add_extraction_policy("MiniLML6", "minilm")
|
344
|
-
|
328
|
+
- extraction_graph (ExtractionGraph): the extraction graph to create
|
345
329
|
"""
|
346
|
-
req =
|
347
|
-
|
348
|
-
"name": name,
|
349
|
-
"input_params": input_params,
|
350
|
-
"filters_eq": labels_eq,
|
351
|
-
"content_source": content_source,
|
352
|
-
}
|
353
|
-
if req["filters_eq"] == None:
|
354
|
-
del req["filters_eq"]
|
355
|
-
|
330
|
+
req = extraction_graph.to_dict()
|
331
|
+
req["namespace"] = self.namespace
|
356
332
|
request_body = json.dumps(req, default=json_set_default)
|
357
333
|
response = self.post(
|
358
|
-
f"namespaces/{self.namespace}/
|
334
|
+
f"namespaces/{self.namespace}/extraction_graphs",
|
359
335
|
data=request_body,
|
360
336
|
headers={"Content-Type": "application/json"},
|
361
337
|
)
|
362
|
-
|
363
|
-
# update self.extractor_bindings
|
364
|
-
self.get_extraction_policies()
|
365
|
-
|
366
|
-
try:
|
367
|
-
response.raise_for_status()
|
368
|
-
except httpx.HTTPStatusError as exc:
|
369
|
-
raise ApiException(exc.response.text)
|
338
|
+
response.raise_for_status()
|
370
339
|
return
|
371
|
-
|
340
|
+
|
372
341
|
def get_content_metadata(self, content_id: str) -> dict:
|
373
342
|
"""
|
374
343
|
Get metadata for a specific content ID in a given index.
|
@@ -399,11 +368,11 @@ class IndexifyClient:
|
|
399
368
|
self._add_content_url(content)
|
400
369
|
for content in response.json()["content_list"]
|
401
370
|
]
|
402
|
-
|
403
|
-
def download_content(self, id:str) -> bytes:
|
371
|
+
|
372
|
+
def download_content(self, id: str) -> bytes:
|
404
373
|
"""
|
405
374
|
Download content from id. Return bytes
|
406
|
-
|
375
|
+
|
407
376
|
Args:
|
408
377
|
- id (str): id of content to download
|
409
378
|
"""
|
@@ -415,7 +384,10 @@ class IndexifyClient:
|
|
415
384
|
raise ApiException(exc.response.text)
|
416
385
|
|
417
386
|
def add_documents(
|
418
|
-
self,
|
387
|
+
self,
|
388
|
+
extraction_graphs: Union[str, List[str]],
|
389
|
+
documents: Union[Document, str, List[Union[Document, str]]],
|
390
|
+
doc_id=None,
|
419
391
|
) -> None:
|
420
392
|
"""
|
421
393
|
Add documents to current namespace.
|
@@ -423,6 +395,8 @@ class IndexifyClient:
|
|
423
395
|
Args:
|
424
396
|
- documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
|
425
397
|
"""
|
398
|
+
if isinstance(extraction_graphs, str):
|
399
|
+
extraction_graphs = [extraction_graphs]
|
426
400
|
if isinstance(documents, Document):
|
427
401
|
documents = [documents]
|
428
402
|
elif isinstance(documents, str):
|
@@ -433,7 +407,9 @@ class IndexifyClient:
|
|
433
407
|
if isinstance(item, Document):
|
434
408
|
new_documents.append(item)
|
435
409
|
elif isinstance(item, str):
|
436
|
-
new_documents.append(
|
410
|
+
new_documents.append(
|
411
|
+
Document(item, {}, id=None)
|
412
|
+
) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
|
437
413
|
else:
|
438
414
|
raise ValueError(
|
439
415
|
"List items must be either Document instances or strings."
|
@@ -444,7 +420,10 @@ class IndexifyClient:
|
|
444
420
|
"Invalid type for documents. Expected Document, str, or list of these."
|
445
421
|
)
|
446
422
|
|
447
|
-
req = {
|
423
|
+
req = {
|
424
|
+
"documents": [doc._asdict() for doc in documents],
|
425
|
+
"extraction_graph_names": extraction_graphs,
|
426
|
+
}
|
448
427
|
response = self.post(
|
449
428
|
f"namespaces/{self.namespace}/add_texts",
|
450
429
|
json=req,
|
@@ -475,7 +454,9 @@ class IndexifyClient:
|
|
475
454
|
- path (str): relative path to the file to be uploaded
|
476
455
|
"""
|
477
456
|
with open(path, "rb") as f:
|
478
|
-
response = self.put(
|
457
|
+
response = self.put(
|
458
|
+
f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}
|
459
|
+
)
|
479
460
|
response.raise_for_status()
|
480
461
|
|
481
462
|
def get_structured_data(self, content_id: str) -> dict:
|
@@ -485,11 +466,15 @@ class IndexifyClient:
|
|
485
466
|
Args:
|
486
467
|
- content_id (str): content id to query
|
487
468
|
"""
|
488
|
-
response = self.get(
|
469
|
+
response = self.get(
|
470
|
+
f"namespaces/{self.namespace}/content/{content_id}/metadata"
|
471
|
+
)
|
489
472
|
response.raise_for_status()
|
490
|
-
return response.json().get("metadata",[])
|
473
|
+
return response.json().get("metadata", [])
|
491
474
|
|
492
|
-
def search_index(
|
475
|
+
def search_index(
|
476
|
+
self, name: str, query: str, top_k: int, filters: List[str] = []
|
477
|
+
) -> list[TextChunk]:
|
493
478
|
"""
|
494
479
|
Search index in the current namespace.
|
495
480
|
|
@@ -508,7 +493,7 @@ class IndexifyClient:
|
|
508
493
|
response.raise_for_status()
|
509
494
|
return response.json()["results"]
|
510
495
|
|
511
|
-
def upload_file(self, path: str, id=None, labels: dict = {}) -> str:
|
496
|
+
def upload_file(self, extraction_graphs: Union[str, List[str]], path: str, id=None, labels: dict = {}) -> str:
|
512
497
|
"""
|
513
498
|
Upload a file.
|
514
499
|
|
@@ -516,9 +501,11 @@ class IndexifyClient:
|
|
516
501
|
- path (str): relative path to the file to be uploaded
|
517
502
|
- labels (dict): labels to be associated with the file
|
518
503
|
"""
|
519
|
-
|
504
|
+
if isinstance(extraction_graphs, str):
|
505
|
+
extraction_graphs = [extraction_graphs]
|
506
|
+
params = {"extraction_graph_names": extraction_graphs}
|
520
507
|
if id is not None:
|
521
|
-
params[
|
508
|
+
params["id"] = id
|
522
509
|
with open(path, "rb") as f:
|
523
510
|
response = self.post(
|
524
511
|
f"namespaces/{self.namespace}/upload_file",
|
@@ -537,18 +524,20 @@ class IndexifyClient:
|
|
537
524
|
response = self.get(f"namespaces/{self.namespace}/schemas")
|
538
525
|
response.raise_for_status()
|
539
526
|
return response.json()
|
540
|
-
|
541
|
-
def get_content_tree(self, content_id:str):
|
527
|
+
|
528
|
+
def get_content_tree(self, content_id: str):
|
542
529
|
"""
|
543
530
|
Get content tree for a given content id
|
544
531
|
|
545
532
|
Args:
|
546
533
|
- content_id (str): id of content
|
547
534
|
"""
|
548
|
-
response = self.get(
|
535
|
+
response = self.get(
|
536
|
+
f"namespaces/{self.namespace}/content/{content_id}/content-tree"
|
537
|
+
)
|
549
538
|
response.raise_for_status()
|
550
539
|
return response.json()
|
551
|
-
|
540
|
+
|
552
541
|
def sql_query(self, query: str):
|
553
542
|
"""
|
554
543
|
Execute a SQL query.
|
@@ -569,9 +558,13 @@ class IndexifyClient:
|
|
569
558
|
data = row["data"]
|
570
559
|
rows.append(data)
|
571
560
|
return SqlQueryResult(result=rows)
|
572
|
-
|
573
|
-
def ingest_remote_file(
|
574
|
-
|
561
|
+
|
562
|
+
def ingest_remote_file(
|
563
|
+
self, extraction_graphs: Union[str, List[str]], url: str, mime_type: str, labels: Dict[str, str], id=None
|
564
|
+
):
|
565
|
+
if isinstance(extraction_graphs, str):
|
566
|
+
extraction_graphs = [extraction_graphs]
|
567
|
+
req = {"url": url, "mime_type": mime_type, "labels": labels, "id": id, "extraction_graph_names": extraction_graphs}
|
575
568
|
response = self.post(
|
576
569
|
f"namespaces/{self.namespace}/ingest_remote_file",
|
577
570
|
json=req,
|
@@ -579,7 +572,7 @@ class IndexifyClient:
|
|
579
572
|
)
|
580
573
|
response.raise_for_status()
|
581
574
|
return response.json()
|
582
|
-
|
575
|
+
|
583
576
|
def generate_unique_hex_id(self):
|
584
577
|
"""
|
585
578
|
Generate a unique hexadecimal identifier
|
@@ -588,18 +581,16 @@ class IndexifyClient:
|
|
588
581
|
str: a unique hexadecimal string
|
589
582
|
"""
|
590
583
|
return uuid.uuid4().hex[:16]
|
591
|
-
|
584
|
+
|
592
585
|
def generate_hash_from_string(self, input_string: str):
|
593
586
|
"""
|
594
587
|
Generate a hash for the given string and return it as a hexadecimal string.
|
595
|
-
|
588
|
+
|
596
589
|
Args:
|
597
590
|
input_string (str): The input string to hash.
|
598
|
-
|
591
|
+
|
599
592
|
Returns:
|
600
593
|
str: The hexadecimal hash of the input string.
|
601
594
|
"""
|
602
595
|
hash_object = hashlib.sha256(input_string.encode())
|
603
596
|
return hash_object.hexdigest()[:16]
|
604
|
-
|
605
|
-
|
@@ -0,0 +1,68 @@
|
|
1
|
+
from dataclasses import dataclass, asdict
|
2
|
+
from typing import Optional, List
|
3
|
+
|
4
|
+
|
5
|
+
@dataclass
|
6
|
+
class ExtractionPolicy:
|
7
|
+
extractor: str
|
8
|
+
name: str
|
9
|
+
content_source: str
|
10
|
+
input_params: Optional[dict] = None
|
11
|
+
id: Optional[str] = None
|
12
|
+
labels_eq: Optional[str] = None
|
13
|
+
|
14
|
+
def __repr__(self) -> str:
|
15
|
+
return f"ExtractionPolicy(name={self.name} extractor={self.extractor})"
|
16
|
+
|
17
|
+
def __str__(self) -> str:
|
18
|
+
return self.__repr__()
|
19
|
+
|
20
|
+
def to_dict(self) -> dict:
|
21
|
+
filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
|
22
|
+
return filtered_dict
|
23
|
+
|
24
|
+
@classmethod
|
25
|
+
def from_dict(cls, json: dict):
|
26
|
+
if "filters_eq" in json:
|
27
|
+
json["labels_eq"] = json.pop("filters_eq")
|
28
|
+
json["id"] = json.get("id", None)
|
29
|
+
return ExtractionPolicy(**json)
|
30
|
+
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class ExtractionGraph:
|
34
|
+
id: str
|
35
|
+
name: str
|
36
|
+
extraction_policies: List[ExtractionPolicy]
|
37
|
+
|
38
|
+
@classmethod
|
39
|
+
def from_dict(cls, json: dict):
|
40
|
+
json["id"] = json.get("id", None)
|
41
|
+
if "namespace" in json.keys():
|
42
|
+
json.pop("namespace")
|
43
|
+
return ExtractionGraph(**json)
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def from_yaml(spec: str):
|
47
|
+
import yaml
|
48
|
+
|
49
|
+
return ExtractionGraph.from_dict(yaml.load(spec, Loader=yaml.FullLoader))
|
50
|
+
|
51
|
+
def to_dict(self) -> dict:
|
52
|
+
filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
|
53
|
+
return filtered_dict
|
54
|
+
|
55
|
+
|
56
|
+
class ExtractionGraphBuilder:
|
57
|
+
def __init__(self, name: str):
|
58
|
+
self.name = name
|
59
|
+
self.extraction_policies = []
|
60
|
+
|
61
|
+
def policy(self, policy: ExtractionPolicy) -> "ExtractionGraphBuilder":
|
62
|
+
self.extraction_policies.append(policy)
|
63
|
+
return self
|
64
|
+
|
65
|
+
def build(self):
|
66
|
+
return ExtractionGraph(
|
67
|
+
id=self.id, name=self.name, extraction_policies=self.extraction_policies
|
68
|
+
)
|
@@ -17,7 +17,12 @@ class ExtractorSchema:
|
|
17
17
|
|
18
18
|
class Extractor:
|
19
19
|
def __init__(
|
20
|
-
self,
|
20
|
+
self,
|
21
|
+
name: str,
|
22
|
+
description: str,
|
23
|
+
input_params: dict,
|
24
|
+
outputs: ExtractorSchema,
|
25
|
+
input_mime_types: list[str],
|
21
26
|
):
|
22
27
|
self.name = name
|
23
28
|
self.description = description
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "indexify"
|
3
|
-
version = "0.0.
|
3
|
+
version = "0.0.21"
|
4
4
|
description = "Python Client for Indexify"
|
5
5
|
authors = ["Diptanu Gon Choudhury <diptanuc@gmail.com>", "Vijay Parthasarathy <vijay2win@gmail.com>"]
|
6
6
|
license = "Apache 2.0"
|
@@ -11,6 +11,7 @@ repository = "https://github.com/tensorlakeai/indexify"
|
|
11
11
|
[tool.poetry.dependencies]
|
12
12
|
python = "^3.10.0"
|
13
13
|
httpx = { version = "^0.26", extras = ["http2"] }
|
14
|
+
pyyaml = "^6.0.1"
|
14
15
|
|
15
16
|
[tool.poetry.dev-dependencies]
|
16
17
|
black = "^22.3.0"
|
@@ -1,28 +0,0 @@
|
|
1
|
-
from dataclasses import dataclass, asdict
|
2
|
-
from typing import Optional
|
3
|
-
|
4
|
-
|
5
|
-
@dataclass
|
6
|
-
class ExtractionPolicy:
|
7
|
-
extractor: str
|
8
|
-
name: str
|
9
|
-
content_source: str
|
10
|
-
input_params: dict
|
11
|
-
id: Optional[str] = None
|
12
|
-
labels_eq: Optional[str] = None
|
13
|
-
|
14
|
-
def __repr__(self) -> str:
|
15
|
-
return f"ExtractionPolicy(name={self.name} extractor={self.extractor})"
|
16
|
-
|
17
|
-
def __str__(self) -> str:
|
18
|
-
return self.__repr__()
|
19
|
-
|
20
|
-
def to_dict(self) -> dict:
|
21
|
-
filtered_dict = {k: v for k, v in asdict(self).items() if v is not None}
|
22
|
-
return filtered_dict
|
23
|
-
|
24
|
-
@classmethod
|
25
|
-
def from_dict(cls, json: dict):
|
26
|
-
if "filters_eq" in json:
|
27
|
-
json["labels_eq"] = json.pop("filters_eq")
|
28
|
-
return ExtractionPolicy(**json)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|