indexify 0.0.42__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. indexify/__init__.py +13 -14
  2. indexify/base_client.py +48 -21
  3. indexify/cli.py +235 -0
  4. indexify/client.py +18 -790
  5. indexify/error.py +3 -30
  6. indexify/executor/agent.py +362 -0
  7. indexify/executor/api_objects.py +43 -0
  8. indexify/executor/downloader.py +124 -0
  9. indexify/executor/executor_tasks.py +72 -0
  10. indexify/executor/function_worker.py +177 -0
  11. indexify/executor/indexify_executor.py +32 -0
  12. indexify/executor/task_reporter.py +110 -0
  13. indexify/executor/task_store.py +113 -0
  14. indexify/foo +72 -0
  15. indexify/functions_sdk/data_objects.py +37 -0
  16. indexify/functions_sdk/graph.py +276 -0
  17. indexify/functions_sdk/graph_validation.py +69 -0
  18. indexify/functions_sdk/image.py +26 -0
  19. indexify/functions_sdk/indexify_functions.py +192 -0
  20. indexify/functions_sdk/local_cache.py +46 -0
  21. indexify/functions_sdk/object_serializer.py +61 -0
  22. indexify/local_client.py +183 -0
  23. indexify/remote_client.py +319 -0
  24. indexify-0.2.dist-info/METADATA +151 -0
  25. indexify-0.2.dist-info/RECORD +32 -0
  26. indexify-0.2.dist-info/entry_points.txt +3 -0
  27. indexify/exceptions.py +0 -3
  28. indexify/extraction_policy.py +0 -75
  29. indexify/extractor_sdk/__init__.py +0 -14
  30. indexify/extractor_sdk/data.py +0 -100
  31. indexify/extractor_sdk/extractor.py +0 -223
  32. indexify/extractor_sdk/utils.py +0 -102
  33. indexify/extractors/__init__.py +0 -0
  34. indexify/extractors/embedding.py +0 -55
  35. indexify/extractors/pdf_parser.py +0 -93
  36. indexify/graph.py +0 -133
  37. indexify/local_runner.py +0 -128
  38. indexify/runner.py +0 -22
  39. indexify/utils.py +0 -7
  40. indexify-0.0.42.dist-info/METADATA +0 -66
  41. indexify-0.0.42.dist-info/RECORD +0 -25
  42. {indexify-0.0.42.dist-info → indexify-0.2.dist-info}/LICENSE.txt +0 -0
  43. {indexify-0.0.42.dist-info → indexify-0.2.dist-info}/WHEEL +0 -0
indexify/client.py CHANGED
@@ -1,790 +1,18 @@
1
- import hashlib
2
- import json
3
- import logging
4
- import uuid
5
- from collections import namedtuple
6
- from dataclasses import dataclass
7
- from typing import Dict, List, Optional, Union
8
-
9
- import httpx
10
- import yaml
11
-
12
- from indexify.exceptions import ApiException
13
-
14
- from .data_loaders import DataLoader
15
- from .error import Error
16
- from .extraction_policy import ExtractionGraph
17
- from .extractor_sdk.data import ContentMetadata
18
- from .extractor_sdk.extractor import ExtractorMetadata
19
- from .settings import DEFAULT_SERVICE_URL, DEFAULT_SERVICE_URL_HTTPS
20
- from .utils import json_set_default
21
-
22
- Document = namedtuple("Document", ["text", "labels", "id"])
23
-
24
- SQLQueryRow = namedtuple("SQLQueryRow", ["content_id", "data"])
25
-
26
-
27
- def generate_unique_hex_id():
28
- """
29
- Generate a unique hexadecimal identifier
30
-
31
- Returns:
32
- str: a unique hexadecimal string
33
- """
34
- return uuid.uuid4().hex[:16]
35
-
36
-
37
- def generate_hash_from_string(input_string: str):
38
- """
39
- Generate a hash for the given string and return it as a hexadecimal string.
40
-
41
- Args:
42
- input_string (str): The input string to hash.
43
-
44
- Returns:
45
- str: The hexadecimal hash of the input string.
46
- """
47
- hash_object = hashlib.sha256(input_string.encode())
48
- return hash_object.hexdigest()[:16]
49
-
50
-
51
- @dataclass
52
- class SqlQueryResult:
53
- result: List[Dict]
54
-
55
-
56
- class IndexifyClient:
57
- """
58
- IndexifyClient is the main entry point for the SDK.
59
- For the full list of client features, see the
60
- [httpx Client documentation](https://www.python-httpx.org/api/#client).
61
-
62
- :param service_url: The URL of the Indexify service to connect to.
63
- :param args: Arguments to pass to the httpx.Client constructor
64
- :param kwargs: Keyword arguments to pass to the httpx.Client constructor
65
-
66
- Example usage:
67
- ```
68
- from indexify import IndexifyClient
69
-
70
- client = IndexifyClient()
71
- assert client.heartbeat() == True
72
- ```
73
- """
74
-
75
- def __init__(
76
- self,
77
- service_url: str = DEFAULT_SERVICE_URL, # switch this to DEFAULT_SERVICE_URL_HTTPS for TLS
78
- namespace: str = "default",
79
- config_path: Optional[str] = None,
80
- *args,
81
- **kwargs,
82
- ):
83
- if config_path:
84
- with open(config_path, "r") as file:
85
- config = yaml.safe_load(file)
86
-
87
- if config.get("use_tls", False):
88
- tls_config = config["tls_config"]
89
- self._client = httpx.Client(
90
- http2=True,
91
- cert=(tls_config["cert_path"], tls_config["key_path"]),
92
- verify=tls_config.get("ca_bundle_path", True),
93
- )
94
- else:
95
- self._client = httpx.Client(*args, **kwargs)
96
- else:
97
- self._client = httpx.Client(*args, **kwargs)
98
-
99
- self.namespace: str = namespace
100
- self.extraction_graphs: List[ExtractionGraph] = []
101
- self.labels: dict = {}
102
- self._service_url = service_url
103
- self._timeout = kwargs.get("timeout")
104
-
105
- # get namespace data
106
- self.extraction_graphs = self.get_extraction_graphs()
107
-
108
- @classmethod
109
- def with_mtls(
110
- cls,
111
- cert_path: str,
112
- key_path: str,
113
- ca_bundle_path: Optional[str] = None,
114
- service_url: str = DEFAULT_SERVICE_URL_HTTPS,
115
- *args,
116
- **kwargs,
117
- ) -> "IndexifyClient":
118
- """
119
- Create a client with mutual TLS authentication. Also enables HTTP/2,
120
- which is required for mTLS.
121
- NOTE: mTLS must be enabled on the Indexify service for this to work.
122
-
123
- :param cert_path: Path to the client certificate. Resolution handled by httpx.
124
- :param key_path: Path to the client key. Resolution handled by httpx.
125
- :param args: Arguments to pass to the httpx.Client constructor
126
- :param kwargs: Keyword arguments to pass to the httpx.Client constructor
127
- :return: A client with mTLS authentication
128
-
129
- Example usage:
130
- ```
131
- from indexify import IndexifyClient
132
-
133
- client = IndexifyClient.with_mtls(
134
- cert_path="/path/to/cert.pem",
135
- key_path="/path/to/key.pem",
136
- )
137
- assert client.heartbeat() == True
138
- ```
139
- """
140
- if not (cert_path and key_path):
141
- raise ValueError("Both cert and key must be provided for mTLS")
142
-
143
- client_certs = (cert_path, key_path)
144
- verify_option = ca_bundle_path if ca_bundle_path else True
145
- client = IndexifyClient(
146
- *args,
147
- **kwargs,
148
- service_url=service_url,
149
- http2=True,
150
- cert=client_certs,
151
- verify=verify_option,
152
- )
153
- return client
154
-
155
- def _request(self, method: str, **kwargs) -> httpx.Response:
156
- try:
157
- response = self._client.request(method, timeout=self._timeout, **kwargs)
158
- status_code = str(response.status_code)
159
- if status_code.startswith("4"):
160
- raise ApiException(
161
- "status code: " + status_code + " request args: " + str(kwargs)
162
- )
163
- if status_code.startswith("5"):
164
- raise ApiException(response.text)
165
- # error = Error.from_tonic_error_string(str(response.url), response.text)
166
- # self.__print_additional_error_context(error)
167
- # raise error
168
- except httpx.ConnectError:
169
- message = (
170
- f"Make sure the server is running and accesible at {self._service_url}"
171
- )
172
- error = Error(status="ConnectionError", message=message)
173
- print(error)
174
- raise error
175
- return response
176
-
177
- def get(self, endpoint: str, **kwargs) -> httpx.Response:
178
- """
179
- Make a GET request to the Indexify service.
180
-
181
- :param endpoint: The endpoint to make the request to.
182
-
183
- Example usage:
184
- ```
185
- from indexify import IndexifyClient
186
-
187
- client = IndexifyClient()
188
- response = client.get("namespaces")
189
- print(response.json())
190
- ```
191
- """
192
- return self._request("GET", url=f"{self._service_url}/{endpoint}", **kwargs)
193
-
194
- def post(self, endpoint: str, **kwargs) -> httpx.Response:
195
- """
196
- Make a POST request to the Indexify service.
197
-
198
- :param endpoint: The endpoint to make the request to.
199
-
200
- Example usage:
201
-
202
- ```
203
- from indexify import IndexifyClient
204
-
205
- client = IndexifyClient()
206
- response = client.post("namespaces", json={"name": "my-repo"})
207
- print(response.json())
208
- ```
209
- """
210
- return self._request("POST", url=f"{self._service_url}/{endpoint}", **kwargs)
211
-
212
- def put(self, endpoint: str, **kwargs) -> httpx.Response:
213
- """
214
- Make a PUT request to the Indexify service.
215
-
216
- :param endpoint: The endpoint to make the request to.
217
-
218
- Example usage:
219
-
220
- ```
221
- from indexify import IndexifyClient
222
-
223
- client = IndexifyClient()
224
- response = client.put("namespaces", json={"name": "my-repo"})
225
- print(response.json())
226
- ```
227
- """
228
- return self._request("PUT", url=f"{self._service_url}/{endpoint}", **kwargs)
229
-
230
- def delete(self, endpoint: str, **kwargs) -> httpx.Response:
231
- """
232
- Make a DELETE request to the Indexify service.
233
-
234
- :param endpoint: The endpoint to make the request to.
235
-
236
- Example usage:
237
-
238
- ```
239
- from indexify import IndexifyClient
240
-
241
- client = IndexifyClient()
242
- response = client.delete("namespaces")
243
- print(response.json())
244
- ```
245
- """
246
- return self._request("DELETE", url=f"{self._service_url}/{endpoint}", **kwargs)
247
-
248
- def close(self):
249
- """
250
- Close the underlying httpx.Client.
251
- """
252
- self._client.close()
253
-
254
- # __enter__ and __exit__ allow the client to be used as a context manager
255
- def __enter__(self):
256
- return self
257
-
258
- def __exit__(self, exc_type, exc_value, traceback):
259
- self.close()
260
-
261
- def heartbeat(self, heartbeat_response="Indexify Server") -> bool:
262
- """
263
- Check if the Indexify service is alive.
264
- """
265
- response = self.get(f"")
266
- # Server responds with text: "Indexify Server"
267
- return response.text == heartbeat_response
268
-
269
- def namespaces(self) -> list[str]:
270
- """
271
- Get a list of all namespaces.
272
- """
273
- response = self.get(f"namespaces")
274
- namespaces_dict = response.json()["namespaces"]
275
- namespaces = []
276
- for item in namespaces_dict:
277
- namespaces.append(item["name"])
278
- return namespaces
279
-
280
- @classmethod
281
- def create_namespace(
282
- self,
283
- namespace: str,
284
- extraction_graphs: list = [],
285
- labels: dict = {},
286
- service_url: str = DEFAULT_SERVICE_URL,
287
- ) -> "IndexifyClient":
288
- """
289
- Create a new namespace.
290
-
291
- Returns:
292
- IndexifyClient: a new client with the given namespace
293
- """
294
- extraction_graphs = []
295
- for bd in extraction_graphs:
296
- if isinstance(bd, extraction_graphs):
297
- extraction_graphs.append(bd.to_dict())
298
- else:
299
- extraction_graphs.append(bd)
300
-
301
- req = {
302
- "name": namespace,
303
- "extraction_graphs": extraction_graphs,
304
- "labels": labels,
305
- }
306
-
307
- with httpx.Client() as client:
308
- client.post(f"{service_url}/namespaces", json=req)
309
-
310
- client = IndexifyClient(namespace=namespace, service_url=service_url)
311
- return client
312
-
313
- def _add_content_url(self, content):
314
- """
315
- Add download content_url url property
316
- """
317
- return {
318
- **content,
319
- "content_url": f"{self._service_url}/namespaces/{self.namespace}/content/{content['id']}/download",
320
- }
321
-
322
- def indexes(self) -> dict:
323
- """
324
- Get the indexes of the current namespace.
325
-
326
- Returns:
327
- List[Index]: list of indexes in the current namespace
328
- """
329
- response = self.get(f"namespaces/{self.namespace}/indexes")
330
- return response.json()["indexes"]
331
-
332
- def extractors(self) -> List[ExtractorMetadata]:
333
- """
334
- Get a list of all extractors.
335
-
336
- Returns:
337
- List[Extractor]: list of extractors
338
- """
339
- response = self.get(f"extractors")
340
- extractors_dict = response.json()["extractors"]
341
- extractors = []
342
- for ed in extractors_dict:
343
- print(ed)
344
- extractors.append(ExtractorMetadata.model_validate(ed))
345
- return extractors
346
-
347
- def get_extraction_graphs(self) -> List[ExtractionGraph]:
348
- """
349
- Retrieve and update the list of extraction policies for the current namespace.
350
- """
351
- response = self.get(f"namespaces/{self.namespace}/extraction_graphs")
352
- json = response.json()
353
-
354
- self.extraction_graphs = []
355
- for graph in json["extraction_graphs"]:
356
- self.extraction_graphs.append(ExtractionGraph.from_dict(graph))
357
-
358
- return self.extraction_graphs
359
-
360
- def create_extraction_graph(self, extraction_graph: ExtractionGraph):
361
- """
362
- Create a new extraction graph.
363
-
364
- Args:
365
- - extraction_graph (ExtractionGraph): the extraction graph to create
366
- """
367
- req = extraction_graph.to_dict()
368
- req["namespace"] = self.namespace
369
- request_body = json.dumps(req, default=json_set_default)
370
- response = self.post(
371
- f"namespaces/{self.namespace}/extraction_graphs",
372
- data=request_body,
373
- headers={"Content-Type": "application/json"},
374
- )
375
- return
376
-
377
- def link_extraction_graphs(
378
- self, source_graph: str, content_source: str, linked_graph: str
379
- ):
380
- """
381
- Link an extraction graph to another extraction graph.
382
-
383
- Args:
384
- - source_graph (str): source extraction graph
385
- - content_source (str): content source in source graph
386
- - linked_graph (str): target extraction graph
387
- """
388
- req = {
389
- "content_source": content_source,
390
- "linked_graph_name": linked_graph,
391
- }
392
- response = self.post(
393
- f"namespaces/{self.namespace}/extraction_graphs/{source_graph}/links",
394
- json=req,
395
- headers={"Content-Type": "application/json"},
396
- )
397
- return
398
-
399
- def get_content_metadata(self, content_id: str) -> dict:
400
- """
401
- Get metadata for a specific content ID in a given index.
402
-
403
- Args:
404
- - content_id (str): content id to query
405
- """
406
- response = self.get(
407
- f"namespaces/{self.namespace}/content/{content_id}/metadata"
408
- )
409
- return response.json()["content_metadata"]
410
-
411
- def download_content(self, content_id: str) -> bytes:
412
- """
413
- Download content from id. Return bytes
414
-
415
- Args:
416
- - content_id (str): id of content to download
417
- """
418
- response = self.get(
419
- f"namespaces/{self.namespace}/content/{content_id}/download"
420
- )
421
- return response.content
422
-
423
- def add_documents(
424
- self,
425
- extraction_graphs: Union[str, List[str]],
426
- documents: Union[Document, str, List[Union[Document, str]]],
427
- doc_id=None,
428
- ) -> Union[str, List[str]]:
429
- """
430
- Add documents to current namespace.
431
-
432
- Args:
433
- - documents (Union[Document, str, List[Union[Document, str]]]): this can be a list of strings, list of Documents or a mix of both
434
- """
435
- if isinstance(extraction_graphs, str):
436
- extraction_graphs = [extraction_graphs]
437
- if isinstance(documents, Document):
438
- documents = [documents]
439
- elif isinstance(documents, str):
440
- documents = [Document(documents, {}, id=doc_id)]
441
- elif isinstance(documents, list):
442
- new_documents = []
443
- for item in documents:
444
- if isinstance(item, Document):
445
- new_documents.append(item)
446
- elif isinstance(item, str):
447
- new_documents.append(
448
- Document(item, {}, id=None)
449
- ) # don't pass in id for a string content because doesn't make sense to have same content id for all strings
450
- else:
451
- raise ValueError(
452
- "List items must be either Document instances or strings."
453
- )
454
- documents = new_documents
455
- else:
456
- raise TypeError(
457
- "Invalid type for documents. Expected Document, str, or list of these."
458
- )
459
- for document in documents:
460
- document.labels["mime_type"] = "text/plain"
461
- content_ids = []
462
- if isinstance(extraction_graphs, str):
463
- extraction_graphs = [extraction_graphs]
464
- for extraction_graph in extraction_graphs:
465
- for document in documents:
466
- response = self.post(
467
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
468
- files={"file": document.text},
469
- data={"labels": json.dumps(document.labels)},
470
- )
471
- response_json = response.json()
472
- content_id = response_json["content_id"]
473
- content_ids.append(content_id)
474
- return content_ids
475
-
476
- def delete_documents(self, document_ids: List[str]) -> None:
477
- """
478
- Delete documents from current namespace.
479
-
480
- Args:
481
- - document_ids (List[str]): list of document ids to delete
482
- """
483
- req = {"content_ids": document_ids}
484
- response = self.delete(
485
- f"namespaces/{self.namespace}/content",
486
- json=req,
487
- headers={"Content-Type": "application/json"},
488
- )
489
-
490
- def update_labels(self, document_id: str, labels: Dict[str, str]) -> None:
491
- """
492
- Update labels for a document.
493
-
494
- Args:
495
- - document_id (str): id of document to update
496
- - labels (Dict[str, str]): labels to update
497
- """
498
- req = {"labels": labels}
499
- response = self.put(
500
- f"namespaces/{self.namespace}/content/{document_id}/labels",
501
- json=req,
502
- headers={"Content-Type": "application/json"},
503
- )
504
-
505
- def update_content(self, document_id: str, path: str) -> None:
506
- """
507
- Update a piece of content with a new file
508
-
509
- Args:
510
- - path (str): relative path to the file to be uploaded
511
- """
512
- with open(path, "rb") as f:
513
- response = self.put(
514
- f"namespaces/{self.namespace}/content/{document_id}", files={"file": f}
515
- )
516
-
517
- def get_structured_data(self, content_id: str) -> dict:
518
- """
519
- Query metadata for a specific content ID in a given index.
520
-
521
- Args:
522
- - content_id (str): content id to query
523
- """
524
- response = self.get(
525
- f"namespaces/{self.namespace}/content/{content_id}/metadata"
526
- )
527
- return response.json().get("metadata", [])
528
-
529
- def search_index(
530
- self, name: str, query: str, top_k: int, filters: List[str] = []
531
- ) -> dict:
532
- """
533
- Search index in the current namespace.
534
-
535
- Args:
536
- - name (str): name of index to search
537
- - query (str): query string
538
- - top_k (int): top k nearest neighbors to be returned
539
- - filters (List[str]): list of filters to apply
540
- """
541
- req = {"query": query, "k": top_k, "filters": filters}
542
- response = self.post(
543
- f"namespaces/{self.namespace}/indexes/{name}/search",
544
- json=req,
545
- headers={"Content-Type": "application/json"},
546
- )
547
- return response.json()["results"]
548
-
549
- def list_content(
550
- self,
551
- extraction_graph: str,
552
- extraction_policy: str = "",
553
- labels_filter: List[str] = [],
554
- start_id: str = "",
555
- limit: int = 10,
556
- ) -> List[ContentMetadata]:
557
- """
558
- List content in the current namespace.
559
-
560
- Args:
561
- - extraction_graph (str): extraction graph name
562
- - start_index (str): start index for pagination
563
- - limit (int): number of items to return
564
- """
565
- params = {"start_id": start_id, "limit": limit}
566
- if extraction_policy:
567
- params["source"] = extraction_policy
568
- else:
569
- params["source"] = "ingestion"
570
- if len(labels_filter) > 0:
571
- params["labels_filter"] = labels_filter
572
- response = self.get(
573
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/content",
574
- params=params,
575
- )
576
- content_list = response.json()["content_list"]
577
- content = []
578
- for item in content_list:
579
- content.append(ContentMetadata.from_dict(item))
580
- return content
581
-
582
- def upload_file(
583
- self,
584
- extraction_graph: str,
585
- path: str,
586
- file_bytes: bytes = None,
587
- id=None,
588
- labels: dict = {},
589
- ) -> str:
590
- """
591
- Upload a file from a path or the bytes.
592
-
593
- Args:
594
- - extraction_graph (str): name of the extraction graph to use for extraction
595
- - path (Union[str, bytes]): relative path to the file to be uploaded, or the bytes of the file
596
- - labels (dict): labels to be associated with the file
597
- """
598
- params = {}
599
- if id is not None:
600
- params["id"] = id
601
-
602
- if file_bytes == None:
603
- with open(path, "rb") as f:
604
- response = self.post(
605
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
606
- files={"file": f},
607
- data={"labels": json.dumps(labels)},
608
- params=params,
609
- )
610
- else:
611
- response = self.post(
612
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
613
- files={"file": (path, file_bytes)},
614
- data={"labels": json.dumps(labels)},
615
- params=params,
616
- )
617
- file_content = path
618
-
619
- response_json = response.json()
620
- content_id = response_json["content_id"]
621
- return content_id
622
-
623
- def ingest_from_loader(
624
- self, loader: DataLoader, extraction_graph: str
625
- ) -> List[str]:
626
- """
627
- Loads content using the loader, uploads them to Indexify and returns the content ids.
628
- loader: DataLoader: The DataLoader object to use for loading content
629
- extraction_graph: str: The name of the extraction graph to use for extraction
630
- """
631
- content_ids = []
632
- files = loader.load()
633
- for file_metadata in files:
634
- labels = {"file_name": file_metadata.path}
635
- content_id = self.upload_file(
636
- extraction_graph,
637
- file_metadata.path,
638
- loader.read_all_bytes(file_metadata),
639
- labels=labels,
640
- )
641
- content_ids.append(content_id)
642
- return content_ids
643
-
644
- def list_schemas(self) -> List[str]:
645
- """
646
- List all schemas in the current namespace.
647
- """
648
- response = self.get(f"namespaces/{self.namespace}/schemas")
649
- return response.json()
650
-
651
- def get_extracted_content(
652
- self,
653
- ingested_content_id: str,
654
- graph_name: str,
655
- policy_name: str,
656
- blocking=False,
657
- ):
658
- """
659
- Get list of child for a given content id and their content up to the specified level.
660
-
661
- Args:
662
- - ingested_content_id (str): id of content
663
- - graph_name (str): name of extraction graph
664
- - policy_name(str): name of extraction policy in the graph
665
- - blocking (bool): wait for extraction to complete before returning (default: False)
666
- """
667
- if blocking:
668
- self.wait_for_extraction(ingested_content_id)
669
- response = self.get(
670
- f"namespaces/{self.namespace}/extraction_graphs/{graph_name}/content/{ingested_content_id}/extraction_policies/{policy_name}"
671
- )
672
- content_tree = response.json()
673
- child_list = []
674
- for item in content_tree["content_tree_metadata"]:
675
- if (
676
- graph_name in item["extraction_graph_names"]
677
- and item["source"] == policy_name
678
- ):
679
- content = self.download_content(item["id"])
680
- child_list.append(
681
- {
682
- "id": item["id"],
683
- "mime_type": item["mime_type"],
684
- "content": content,
685
- }
686
- )
687
-
688
- return child_list
689
-
690
- def sql_query(self, query: str):
691
- """
692
- Execute a SQL query.
693
-
694
- Args:
695
- - query (str): SQL query to be executed
696
- """
697
- req = {"query": query}
698
- response = self.post(
699
- f"namespaces/{self.namespace}/sql_query",
700
- json=req,
701
- headers={"Content-Type": "application/json"},
702
- )
703
- result = response.json()
704
- rows = []
705
- for row in result["rows"]:
706
- data = row["data"]
707
- rows.append(data)
708
- return SqlQueryResult(result=rows)
709
-
710
- def ingest_remote_file(
711
- self,
712
- extraction_graph: str,
713
- url: str,
714
- mime_type: str,
715
- labels: Dict[str, str] = {},
716
- id=None,
717
- ):
718
- req = {
719
- "url": url,
720
- "mime_type": mime_type,
721
- "labels": labels,
722
- "id": id,
723
- }
724
- response = self.post(
725
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract_remote",
726
- json=req,
727
- headers={"Content-Type": "application/json"},
728
- )
729
- response.raise_for_status()
730
- return response.json()["content_id"]
731
-
732
- def wait_for_extraction(self, content_ids: Union[str, List[str]]):
733
- """
734
- Wait for extraction to complete for a given content id
735
-
736
- Args:
737
- - content_id (str): id of content
738
- """
739
- if type(content_ids) == str:
740
- content_ids = [content_ids]
741
- print(
742
- "Waiting for extraction to complete for content id: ", ",".join(content_ids)
743
- )
744
- for content_id in content_ids:
745
- response = self.get(
746
- f"namespaces/{self.namespace}/content/{content_id}/wait"
747
- )
748
- print("Extraction completed for content id: ", content_id)
749
- response.raise_for_status()
750
-
751
- def generate_unique_hex_id(self):
752
- """
753
- Generate a unique hexadecimal identifier
754
-
755
- Returns:
756
- str: a unique hexadecimal string
757
- """
758
- logging.warning(
759
- "This method is deprecated. Use generate_unique_hex_id from indexify instead."
760
- )
761
- return uuid.uuid4().hex[:16]
762
-
763
- def generate_hash_from_string(self, input_string: str):
764
- """
765
- Generate a hash for the given string and return it as a hexadecimal string.
766
-
767
- Args:
768
- input_string (str): The input string to hash.
769
-
770
- Returns:
771
- str: The hexadecimal hash of the input string.
772
- """
773
- logging.warning(
774
- "This method is deprecated. Use generate_hash_from_string from indexify instead."
775
- )
776
- hash_object = hashlib.sha256(input_string.encode())
777
- return hash_object.hexdigest()[:16]
778
-
779
- def __print_additional_error_context(self, error: Error):
780
- print(error)
781
-
782
- if error.status == "ExtractionGraphError":
783
- graphs = [eg.name for eg in self.extraction_graphs]
784
- extractors = [ext.name for ext in self.extractors()]
785
- print(f"Available extraction graphs: {graphs}")
786
- print(f"Available extractors: {extractors}")
787
-
788
- if error.status == "SearchError":
789
- indexes = [index["name"] for index in self.indexes()]
790
- print(f"Available indexes: {indexes}")
1
+ from typing import Optional
2
+
3
+ from .base_client import IndexifyClient
4
+ from .local_client import LocalClient
5
+ from .remote_client import RemoteClient
6
+ from .settings import DEFAULT_SERVICE_URL
7
+
8
+
9
+ def create_client(
10
+ service_url: str = DEFAULT_SERVICE_URL,
11
+ config_path: Optional[str] = None,
12
+ local: bool = False,
13
+ *args,
14
+ **kwargs,
15
+ ) -> IndexifyClient:
16
+ if local:
17
+ return LocalClient()
18
+ return RemoteClient(config_path=config_path, service_url=service_url, **kwargs)