indexify 0.0.37__py3-none-any.whl → 0.0.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
indexify/__init__.py CHANGED
@@ -6,24 +6,16 @@ from .client import (
6
6
  generate_hash_from_string,
7
7
  generate_unique_hex_id,
8
8
  )
9
- from .data import ContentMetadata, Content, Feature
10
- from .extractor import Extractor, extractor, EmbeddingSchema
9
+ from . import extractor_sdk
11
10
  from .settings import DEFAULT_SERVICE_URL
12
11
  from . import data_loaders
13
12
 
14
13
  __all__ = [
15
- "ContentMetadata",
16
- "Content",
17
14
  "data_loaders",
18
- "Feature",
19
- "Extractor",
20
- "extractor",
21
- "EmbeddingSchema",
22
- "extractor",
23
15
  "Document",
16
+ "extractor_sdk",
24
17
  "IndexifyClient",
25
18
  "ExtractionGraph",
26
- "ExtractionGraphBuilder" "ExtractionPolicy",
27
19
  "DEFAULT_SERVICE_URL",
28
20
  "generate_hash_from_string",
29
21
  "generate_unique_hex_id",
@@ -0,0 +1,67 @@
1
+ from abc import ABC, abstractmethod
2
+ from .extractor_sdk import Graph, Feature
3
+ from typing import Any, List, Optional, Union, Dict
4
+
5
+ class BaseClient(ABC):
6
+
7
+ ### Operational APIs
8
+ @abstractmethod
9
+ def register_extraction_graph(self, graph: Graph):
10
+ pass
11
+
12
+ @abstractmethod
13
+ def graphs(self) -> str:
14
+ pass
15
+
16
+ @abstractmethod
17
+ def namespaces(self) -> str:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def create_namespace(self, namespace: str):
22
+ pass
23
+
24
+ ### Ingestion APIs
25
+ @abstractmethod
26
+ def invoke_graph_with_object(self, graph:str, object: Any) -> str:
27
+ """
28
+ Invokes a graph with an input object.
29
+ graph: str: The name of the graph to invoke
30
+ object: Any: The input object to the graph. It should be JSON serializable
31
+ return: str: The ID of the ingested object
32
+ """
33
+ pass
34
+
35
+ @abstractmethod
36
+ def invoke_graph_with_file(self, graph: str, path: str) -> str:
37
+ """
38
+ Invokes a graph with an input file. The file's mimetype is appropriately detected.
39
+ graph: str: The name of the graph to invoke
40
+ path: str: The path to the file to be ingested
41
+ return: str: The ID of the ingested object
42
+ """
43
+ pass
44
+
45
+
46
+ ### Retrieval APIs
47
+ @abstractmethod
48
+ def extracted_objects(self, graph: str, ingested_object_id: str, extractor_name: Optional[str]) -> Union[Dict[str, List[Any]], List[Any]]:
49
+ """
50
+ Returns the extracted objects by a graph for an ingested object. If the extractor name is provided, only the objects extracted by that extractor are returned.
51
+ If the extractor name is not provided, all the extracted objects are returned for the input object.
52
+ graph: str: The name of the graph
53
+ ingested_object_id: str: The ID of the ingested object
54
+ extractor_name: Optional[str]: The name of the extractor whose output is to be returned if provided
55
+ return: Union[Dict[str, List[Any]], List[Any]]: The extracted objects. If the extractor name is provided, the output is a list of extracted objects by the extractor. If the extractor name is not provided, the output is a dictionary with the extractor name as the key and the extracted objects as the value. If no objects are found, an empty list is returned.
56
+ """
57
+ pass
58
+
59
+ @abstractmethod
60
+ def features(self, object_id: str, graph: Optional[str]) -> Union[Dict[str, List[Feature]], List[Feature]]:
61
+ """
62
+ Returns the features of an object.
63
+ object_id: str: The ID of the object
64
+ return: List[Feature]: The features associated with the object that were extracted. If a graph name is provided, only the features extracted by that graph are returned.
65
+ """
66
+ pass
67
+
indexify/client.py CHANGED
@@ -5,11 +5,11 @@ import hashlib
5
5
  import json
6
6
  from collections import namedtuple
7
7
  from .settings import DEFAULT_SERVICE_URL, DEFAULT_SERVICE_URL_HTTPS
8
- from .extractor import Extractor
8
+ from .extractor_sdk.extractor import ExtractorMetadata
9
9
  from .extraction_policy import ExtractionGraph
10
10
  from .utils import json_set_default
11
11
  from .error import Error
12
- from .data import Content, ContentMetadata
12
+ from .extractor_sdk.data import ContentMetadata
13
13
  from .data_loaders import DataLoader
14
14
  from indexify.exceptions import ApiException
15
15
  from dataclasses import dataclass
@@ -326,7 +326,7 @@ class IndexifyClient:
326
326
  response = self.get(f"namespaces/{self.namespace}/indexes")
327
327
  return response.json()["indexes"]
328
328
 
329
- def extractors(self) -> List[Extractor]:
329
+ def extractors(self) -> List[ExtractorMetadata]:
330
330
  """
331
331
  Get a list of all extractors.
332
332
 
@@ -337,7 +337,8 @@ class IndexifyClient:
337
337
  extractors_dict = response.json()["extractors"]
338
338
  extractors = []
339
339
  for ed in extractors_dict:
340
- extractors.append(Extractor.from_dict(ed))
340
+ print(ed)
341
+ extractors.append(ExtractorMetadata.model_validate(ed))
341
342
  return extractors
342
343
 
343
344
  def get_extraction_graphs(self) -> List[ExtractionGraph]:
@@ -578,8 +579,8 @@ class IndexifyClient:
578
579
  def upload_file(
579
580
  self,
580
581
  extraction_graph: str,
581
- path: str,
582
- file_bytes:bytes=None,
582
+ path: str,
583
+ file_bytes: bytes = None,
583
584
  id=None,
584
585
  labels: dict = {},
585
586
  ) -> str:
@@ -605,18 +606,20 @@ class IndexifyClient:
605
606
  )
606
607
  else:
607
608
  response = self.post(
608
- f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
609
- files={"file": (path, file_bytes)},
610
- data={"labels": json.dumps(labels)},
611
- params=params,
609
+ f"namespaces/{self.namespace}/extraction_graphs/{extraction_graph}/extract",
610
+ files={"file": (path, file_bytes)},
611
+ data={"labels": json.dumps(labels)},
612
+ params=params,
612
613
  )
613
614
  file_content = path
614
-
615
+
615
616
  response_json = response.json()
616
617
  content_id = response_json["content_id"]
617
618
  return content_id
618
-
619
- def ingest_from_loader(self, loader: DataLoader, extraction_graph: str) -> List[str]:
619
+
620
+ def ingest_from_loader(
621
+ self, loader: DataLoader, extraction_graph: str
622
+ ) -> List[str]:
620
623
  """
621
624
  Loads content using the loader, uploads them to Indexify and returns the content ids.
622
625
  loader: DataLoader: The DataLoader object to use for loading content
@@ -625,9 +628,13 @@ class IndexifyClient:
625
628
  content_ids = []
626
629
  files = loader.load()
627
630
  for file_metadata in files:
628
- labels={"file_name": file_metadata.path}
629
- print(labels)
630
- content_id = self.upload_file(extraction_graph, file_metadata.path, file_metadata.read_all_bytes(), labels=labels)
631
+ labels = {"file_name": file_metadata.path}
632
+ content_id = self.upload_file(
633
+ extraction_graph,
634
+ file_metadata.path,
635
+ loader.read_all_bytes(file_metadata),
636
+ labels=labels,
637
+ )
631
638
  content_ids.append(content_id)
632
639
  return content_ids
633
640
 
@@ -702,7 +709,7 @@ class IndexifyClient:
702
709
  extraction_graph: str,
703
710
  url: str,
704
711
  mime_type: str,
705
- labels: Dict[str, str],
712
+ labels: Dict[str, str] = {},
706
713
  id=None,
707
714
  ):
708
715
  req = {
@@ -6,6 +6,7 @@ import os
6
6
  import mimetypes
7
7
  import hashlib
8
8
 
9
+
9
10
  class FileMetadata(BaseModel):
10
11
  path: str
11
12
  file_size: int
@@ -38,18 +39,20 @@ class FileMetadata(BaseModel):
38
39
  updated_at=updated_at,
39
40
  )
40
41
 
41
- def read_all_bytes(self) -> bytes:
42
- with open(self.path, "rb") as f:
43
- return f.read()
44
-
45
42
 
46
43
  class DataLoader(ABC):
47
44
  @abstractmethod
48
45
  def load(self) -> List[FileMetadata]:
49
46
  pass
50
47
 
48
+ @abstractmethod
49
+ def read_all_bytes(self, file_metadata: FileMetadata) -> bytes:
50
+ pass
51
+
51
52
  @abstractmethod
52
53
  def state(self) -> dict:
53
54
  pass
54
55
 
55
- from .local_directory_loader import LocalDirectoryLoader
56
+
57
+ from .local_directory_loader import LocalDirectoryLoader
58
+ from .url_loader import UrlLoader
@@ -4,7 +4,12 @@ import os
4
4
 
5
5
 
6
6
  class LocalDirectoryLoader(DataLoader):
7
- def __init__(self, directory: str, file_extensions: Optional[List[str]] = None, state: dict ={}):
7
+ def __init__(
8
+ self,
9
+ directory: str,
10
+ file_extensions: Optional[List[str]] = None,
11
+ state: dict = {},
12
+ ):
8
13
  self.directory = directory
9
14
  self.file_extensions = file_extensions
10
15
  self.processed_files = set(state.get("processed_files", []))
@@ -23,5 +28,9 @@ class LocalDirectoryLoader(DataLoader):
23
28
 
24
29
  return file_metadata_list
25
30
 
31
+ def read_all_bytes(self, file: FileMetadata) -> bytes:
32
+ with open(file.path, "rb") as f:
33
+ return f.read()
34
+
26
35
  def state(self) -> dict:
27
36
  return {"processed_files": list(self.processed_files)}
@@ -0,0 +1,51 @@
1
+ from . import DataLoader, FileMetadata
2
+ from typing import List
3
+ import httpx
4
+ import hashlib
5
+ import email.utils
6
+
7
+
8
+ def convert_date_to_epoch(date_str: str) -> int:
9
+ """
10
+ Convert a date string from URL header to Unix epoch time.
11
+
12
+ Args:
13
+ date_str (str): The date string from the URL header.
14
+
15
+ Returns:
16
+ int: The Unix epoch time.
17
+ """
18
+ if not date_str:
19
+ return 0
20
+ parsed_date = email.utils.parsedate_to_datetime(date_str)
21
+ return int(parsed_date.timestamp())
22
+
23
+
24
+ class UrlLoader(DataLoader):
25
+ def __init__(self, urls: List[str], state: dict = {}):
26
+ self.urls = urls
27
+
28
+ def load(self) -> List[FileMetadata]:
29
+ file_metadata_list = []
30
+ for url in self.urls:
31
+ response = httpx.head(url, follow_redirects=True)
32
+ file_metadata_list.append(
33
+ FileMetadata(
34
+ path=url,
35
+ file_size=response.headers.get("content-length", 0),
36
+ mime_type=response.headers.get("content-type"),
37
+ md5_hash="",
38
+ created_at=convert_date_to_epoch(response.headers.get("date")),
39
+ updated_at=convert_date_to_epoch(
40
+ response.headers.get("last-modified")
41
+ ),
42
+ )
43
+ )
44
+ return file_metadata_list
45
+
46
+ def read_all_bytes(self, file: FileMetadata) -> bytes:
47
+ response = httpx.get(file.path, follow_redirects=True)
48
+ return response.content
49
+
50
+ def state(self) -> dict:
51
+ return {}
@@ -0,0 +1,14 @@
1
+ from .data import ContentMetadata, Content, Feature
2
+ from .extractor import Extractor, extractor, EmbeddingSchema, ExtractorMetadata
3
+ from .utils import SampleExtractorData
4
+
5
+ __all__ = [
6
+ "ContentMetadata",
7
+ "Content",
8
+ "Feature",
9
+ "Extractor",
10
+ "extractor",
11
+ "EmbeddingSchema",
12
+ "ExtractorMetadata",
13
+ "SampleExtractorData",
14
+ ]
@@ -1,6 +1,26 @@
1
- from typing import Any, List, Optional, Literal, Dict
1
+ from typing import (
2
+ Any,
3
+ List,
4
+ Optional,
5
+ Literal,
6
+ Dict,
7
+ Type,
8
+ cast,
9
+ Mapping,
10
+ )
2
11
  from pydantic import BaseModel, Json, Field
3
12
  import json
13
+ from typing_extensions import Annotated, Doc
14
+
15
+
16
+ class BaseData(BaseModel):
17
+ meta: Mapping[str, Type[BaseModel]] = {}
18
+
19
+ def get_features(self) -> List[Type[BaseModel]]:
20
+ return self.meta
21
+
22
+ def get_feature(self, name: str) -> Optional[Type[BaseModel]]:
23
+ return self.meta.get(name)
4
24
 
5
25
 
6
26
  class Feature(BaseModel):
@@ -14,7 +34,7 @@ class Feature(BaseModel):
14
34
  return cls(
15
35
  feature_type="embedding",
16
36
  name=name,
17
- value={values: values, distance: distance},
37
+ value=json.dumps({"values": values, "distance": distance}),
18
38
  comment=None,
19
39
  )
20
40
 
@@ -26,7 +46,7 @@ class Feature(BaseModel):
26
46
 
27
47
 
28
48
  class Content(BaseModel):
29
- id: str
49
+ id: Optional[str] = (None,)
30
50
  content_type: Optional[str]
31
51
  data: bytes
32
52
  features: List[Feature] = []
@@ -38,7 +58,7 @@ class Content(BaseModel):
38
58
  features: List[Feature] = [],
39
59
  ):
40
60
  return Content(
41
- id="none-for-now",
61
+ id=None,
42
62
  content_type="text/plain",
43
63
  data=bytes(text, "utf-8"),
44
64
  features=features,
@@ -81,3 +101,8 @@ class ContentMetadata(BaseModel):
81
101
  mime_type=json["mime_type"],
82
102
  extracted_metadata=json["extracted_metadata"],
83
103
  )
104
+
105
+
106
+ class PDFFile(BaseData):
107
+ data: bytes
108
+ mime_type: str
@@ -0,0 +1,231 @@
1
+ from typing import Union, Optional, List, Type, Tuple, Callable, get_type_hints, Dict
2
+ import inspect
3
+ from pydantic import BaseModel, Field
4
+ from abc import ABC, abstractmethod
5
+ from .data import BaseData, Content, Feature
6
+ import json
7
+ import os
8
+ import requests
9
+
10
+
11
+ class EmbeddingSchema(BaseModel):
12
+ dim: int
13
+ distance: str = "cosine"
14
+
15
+ class ExtractorMetadata(BaseModel):
16
+ name: str
17
+ version: str
18
+ description: str
19
+ input_mime_types: List[str]
20
+ system_dependencies: List[str]
21
+ python_dependencies: List[str]
22
+ input_mime_types: List[str]
23
+ embedding_schemas: Dict[str, EmbeddingSchema]
24
+ # Make this a dynamic model since its a json schema
25
+ input_params: Optional[Dict]
26
+ # for backward compatibility
27
+ metadata_schemas: Optional[Dict]
28
+
29
+
30
+ class Extractor(ABC):
31
+ name: str = ""
32
+
33
+ version: str = "0.0.0"
34
+
35
+ system_dependencies: List[str] = []
36
+
37
+ python_dependencies: List[str] = []
38
+
39
+ description: str = ""
40
+
41
+ input_mime_types = ["text/plain"]
42
+
43
+ embeddings: Dict[str, EmbeddingSchema] = {}
44
+
45
+ @abstractmethod
46
+ def extract(
47
+ self, input: Type[BaseModel], params: Type[BaseModel] = None
48
+ ) -> List[Union[Feature, Type[BaseModel]]]:
49
+ """
50
+ Extracts information from the content. Returns a list of features to add
51
+ to the content.
52
+ It can also return a list of Content objects, which will be added to storage
53
+ and any extraction policies defined will be applied to them.
54
+ """
55
+ pass
56
+
57
+ @classmethod
58
+ @abstractmethod
59
+ def sample_input(cls) -> Tuple[Content, Type[BaseModel]]:
60
+ pass
61
+
62
+ def describe(self) -> ExtractorMetadata:
63
+ embedding_schemas = {}
64
+ try:
65
+ embedding_schemas = self.embedding_schemas
66
+ except NotImplementedError:
67
+ pass
68
+
69
+ json_schema = (
70
+ self._param_cls.model_json_schema() if self._param_cls is not None else None
71
+ )
72
+ return ExtractorMetadata(
73
+ name=self.name,
74
+ version=self.version,
75
+ description=self.description,
76
+ system_dependencies=self.system_dependencies,
77
+ python_dependencies=self.python_dependencies,
78
+ input_mime_types=self.input_mime_types,
79
+ embedding_schemas=embedding_schemas,
80
+ input_params=json.dumps(json_schema),
81
+ )
82
+
83
+ def _download_file(self, url, filename):
84
+ if os.path.exists(filename):
85
+ # file exists skip
86
+ return
87
+ try:
88
+ with requests.get(url, stream=True) as r:
89
+ r.raise_for_status() # Raises an HTTPError if the response status code is 4XX/5XX
90
+ with open(filename, "wb") as f:
91
+ for chunk in r.iter_content(chunk_size=8192):
92
+ f.write(chunk)
93
+ except requests.exceptions.RequestException as e:
94
+ print(f"Error downloading the file: {e}")
95
+
96
+ def sample_mp3(self, features: List[Feature] = []) -> Content:
97
+ file_name = "sample.mp3"
98
+ self._download_file(
99
+ "https://extractor-files.diptanu-6d5.workers.dev/sample-000009.mp3",
100
+ file_name,
101
+ )
102
+ f = open(file_name, "rb")
103
+ return Content(content_type="audio/mpeg", data=f.read(), features=features)
104
+
105
+ def sample_mp4(self, features: List[Feature] = []) -> Content:
106
+ file_name = "sample.mp4"
107
+ self._download_file(
108
+ "https://extractor-files.diptanu-6d5.workers.dev/sample.mp4",
109
+ file_name,
110
+ )
111
+ f = open(file_name, "rb")
112
+ return Content(content_type="video/mp4", data=f.read(), features=features)
113
+
114
+ def sample_jpg(self, features: List[Feature] = []) -> Content:
115
+ file_name = "sample.jpg"
116
+ self._download_file(
117
+ "https://extractor-files.diptanu-6d5.workers.dev/people-standing.jpg",
118
+ file_name,
119
+ )
120
+ f = open(file_name, "rb")
121
+ return Content(content_type="image/jpg", data=f.read(), features=features)
122
+
123
+ def sample_invoice_jpg(self, features: List[Feature] = []) -> Content:
124
+ file_name = "sample.jpg"
125
+ self._download_file(
126
+ "https://extractor-files.diptanu-6d5.workers.dev/invoice-example.jpg",
127
+ file_name,
128
+ )
129
+ f = open(file_name, "rb")
130
+ return Content(content_type="image/jpg", data=f.read(), features=features)
131
+
132
+ def sample_invoice_pdf(self, features: List[Feature] = []) -> Content:
133
+ file_name = "sample.pdf"
134
+ self._download_file(
135
+ "https://extractor-files.diptanu-6d5.workers.dev/invoice-example.pdf",
136
+ file_name,
137
+ )
138
+ f = open(file_name, "rb")
139
+ return Content(content_type="application/pdf", data=f.read(), features=features)
140
+
141
+ def sample_image_based_pdf(self, features: List[Feature] = []) -> Content:
142
+ file_name = "sample.pdf"
143
+ self._download_file(
144
+ "https://extractor-files.diptanu-6d5.workers.dev/image-based.pdf",
145
+ file_name,
146
+ )
147
+ f = open(file_name, "rb")
148
+ return Content(content_type="application/pdf", data=f.read(), features=features)
149
+
150
+ def sample_scientific_pdf(self, features: List[Feature] = []) -> Content:
151
+ file_name = "sample.pdf"
152
+ self._download_file(
153
+ "https://extractor-files.diptanu-6d5.workers.dev/scientific-paper-example.pdf",
154
+ file_name,
155
+ )
156
+ f = open(file_name, "rb")
157
+ return Content(content_type="application/pdf", data=f.read(), features=features)
158
+
159
+ def sample_presentation(self, features: List[Feature] = []) -> Content:
160
+ file_name = "test.pptx"
161
+ self._download_file(
162
+ "https://raw.githubusercontent.com/tensorlakeai/indexify/main/docs/docs/files/test.pptx",
163
+ file_name,
164
+ )
165
+ f = open(file_name, "rb")
166
+ return Content(
167
+ content_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
168
+ data=f.read(),
169
+ features=features,
170
+ )
171
+
172
+ def sample_text(self, features: List[Feature] = []) -> Content:
173
+ article = """New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York. A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband. Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other. In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage. Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the 2010 marriage license application, according to court documents. Prosecutors said the marriages were part of an immigration scam. On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further. After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002. All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say. Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages. Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted. The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali. Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force. If convicted, Barrientos faces up to four years in prison. Her next court appearance is scheduled for May 18."""
174
+ return Content(content_type="text/plain", data=article, features=features)
175
+
176
+ def sample_html(self, features: List[Feature] = []) -> Content:
177
+ file_name = "sample.html"
178
+ self._download_file(
179
+ "https://extractor-files.diptanu-6d5.workers.dev/sample.html",
180
+ file_name,
181
+ )
182
+ f = open(file_name, "rb")
183
+ return Content(content_type="text/html", data=f.read(), features=features)
184
+
185
+
186
+ def extractor(
187
+ name: Optional[str] = None,
188
+ description: Optional[str] = "",
189
+ version: Optional[str] = "",
190
+ python_dependencies: Optional[List[str]] = None,
191
+ system_dependencies: Optional[List[str]] = None,
192
+ input_mime_types: Optional[List[str]] = None,
193
+ embedding_schemas: Optional[Dict[str, EmbeddingSchema]] = None,
194
+ sample_content: Optional[Callable] = None,
195
+ ):
196
+ args = locals()
197
+ del args["sample_content"]
198
+
199
+ def construct(fn):
200
+ def wrapper():
201
+ hint = get_type_hints(fn).get("params", dict)
202
+
203
+ if not args.get("name"):
204
+ args[
205
+ "name"
206
+ ] = f"{inspect.getmodule(inspect.stack()[1][0]).__name__}:{fn.__name__}"
207
+
208
+ class DecoratedFn(Extractor):
209
+ @classmethod
210
+ def extract(cls, input: Type[BaseData], params: Type[BaseModel]=None) -> List[Union[Type[BaseModel], Type[Feature]]]: # type: ignore
211
+ # TODO we can force all the functions to take in a parms object
212
+ # or check if someone adds a params
213
+ if params is None:
214
+ return fn(input)
215
+ else:
216
+ return fn(input, params)
217
+
218
+ def sample_input(self) -> Content:
219
+ return sample_content() if sample_content else self.sample_text()
220
+
221
+ for key, val in args.items():
222
+ setattr(DecoratedFn, key, val)
223
+
224
+ return DecoratedFn
225
+
226
+ wrapper._extractor_name = fn.__name__
227
+ wrapper.name = fn.__name__
228
+
229
+ return wrapper
230
+
231
+ return construct
@@ -3,8 +3,8 @@ import httpx
3
3
  from typing import List
4
4
  from .data import Content, Feature
5
5
 
6
- class SampleExtractorData:
7
6
 
7
+ class SampleExtractorData:
8
8
  def _download_file(self, url, filename):
9
9
  if os.path.exists(filename):
10
10
  # file exists skip
@@ -105,4 +105,4 @@ class SampleExtractorData:
105
105
  file_name,
106
106
  )
107
107
  f = open(file_name, "rb")
108
- return Content(content_type="text/html", data=f.read(), features=features)
108
+ return Content(content_type="text/html", data=f.read(), features=features)
indexify/graph.py CHANGED
@@ -1,80 +1,23 @@
1
- from indexify import Content, extractor
2
- from indexify.extractor import Extractor
1
+ from .extractor_sdk import extractor, Extractor
3
2
 
4
- from collections import defaultdict
5
- from typing import Any, Callable, Dict, List, Optional, Self
3
+ from typing import Type, Union
4
+ from pydantic import BaseModel
6
5
 
7
- import itertools
6
+ from .run_graph import RunGraph
7
+ from .local_runner import LocalRunner
8
8
 
9
9
 
10
- @extractor(description="id function")
11
- def _id(content: Content) -> List[Content]:
12
- return [content]
10
+ def Graph(
11
+ name: str,
12
+ input: Type[BaseModel],
13
+ start_node: Union[extractor, Extractor],
14
+ run_local: bool,
15
+ ) -> RunGraph:
13
16
 
14
- class Graph:
15
- def __init__(self, name: str):
16
- # TODO check for cycles
17
- self.name = name
17
+ if run_local:
18
+ runner = LocalRunner()
19
+ else:
20
+ raise NotImplementedError("Remote runner not supported yet")
18
21
 
19
- self.nodes: Dict[str, Callable] = {}
20
- self.params: Dict[str, Any] = {}
21
-
22
- self.edges: Dict[str, List[(str, str)]] = defaultdict(list)
23
-
24
- self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
25
-
26
- self.nodes["start"] = _id
27
- self.nodes["end"] = _id
28
-
29
- self._topo_counter = defaultdict(int)
30
-
31
- self._start_node = None
32
-
33
- def _node(self, extractor: Extractor, params: Any = None) -> Self:
34
- name = extractor._extractor_name
35
-
36
- # if you've already inserted a node just ignore the new insertion.
37
- if name in self.nodes:
38
- return
39
-
40
- self.nodes[name] = extractor
41
- self.params[name] = extractor.__dict__.get('params', None)
42
-
43
- # assign each node a rank of 1 to init the graph
44
- self._topo_counter[name] = 1
45
-
46
- return self
47
-
48
- def step(self,
49
- from_node: extractor,
50
- to_node: extractor,
51
- prefilter_predicates: Optional[str] = None
52
- ) -> Self:
53
-
54
- self._node(from_node)
55
- self._node(to_node)
56
-
57
- from_node_name = from_node._extractor_name
58
- to_node_name = to_node._extractor_name
59
-
60
- self.edges[from_node_name].append((to_node_name, prefilter_predicates))
61
-
62
- self._topo_counter[to_node_name] += 1
63
-
64
- return self
65
-
66
- """
67
- Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
68
- Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
69
- """
70
- def steps(self, from_node: extractor, to_nodes: List[extractor], prefilter_predicates: List[str] = []) -> Self:
71
- print(f'{to_nodes}, {prefilter_predicates}, {prefilter_predicates}')
72
- for t_n, p in itertools.zip_longest(to_nodes, prefilter_predicates, fillvalue=None):
73
- self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
74
-
75
- return self
76
-
77
- def _assign_start_node(self):
78
- # this method should be called before a graph can be run
79
- nodes = sorted(self._topo_counter.items(), key=lambda x: x[1])
80
- self._start_node = nodes[0][0]
22
+ graph = RunGraph(name=name, input=input, start_node=start_node, runner=runner)
23
+ return graph
indexify/local_runner.py CHANGED
@@ -1,65 +1,125 @@
1
- from indexify import Content, Extractor
1
+ import hashlib
2
+ import os
3
+ import pickle
4
+ import shutil
5
+ from pathlib import Path
6
+
7
+ from indexify.extractor_sdk.data import BaseData, Feature
8
+ from indexify.extractor_sdk.extractor import extractor, Extractor
2
9
 
3
10
  from collections import defaultdict
4
- from typing import Any, Callable, Dict, Optional
11
+ from typing import Any, Callable, Dict, Optional, Union
12
+
13
+ from indexify.run_graph import RunGraph
14
+ from indexify.runner import Runner
5
15
 
6
- import json
7
16
 
8
- class LocalRunner:
17
+ class LocalRunner(Runner):
9
18
  def __init__(self):
10
- self.results: Dict[str, Any] = defaultdict(list) # TODO should the Any be Content?
19
+ self.results: Dict[str, Any] = defaultdict(
20
+ list
21
+ ) # TODO should the Any be Content?
22
+
23
+ def run(self, g, wf_input: BaseData):
24
+ return self._run(g, _input=wf_input, node_name=g._start_node)
11
25
 
12
- def run(self, g, content: Content):
13
- g._assign_start_node()
14
- return self._run(g, content=content, node_name=g._start_node)
26
+ # graph is getting some files which are files, some lables and the MIME type of the bytes
27
+ # those bytes have to be a python type
28
+
29
+ # _input needs to be serializable into python object (ie json for ex) and Feature
30
+ def _run(self, g: RunGraph, _input: BaseData, node_name: str):
31
+ print(f"---- Starting node {node_name}")
32
+ print(f'node_name {node_name}')
15
33
 
16
- def _run(self, g, content: Content, node_name: str):
17
34
  extractor_construct: Callable = g.nodes[node_name]
18
35
  params = g.params.get(node_name, None)
19
36
 
20
- print(f"----Starting {node_name}")
37
+ # NOTE: User should clear cache for nodes they would like to re-rerun
38
+ input_hash = hashlib.sha256(str(_input).encode()).hexdigest()
39
+ memo_output = self.get_from_memo(node_name, input_hash)
40
+ if memo_output is None:
41
+ print("=== FYI Writing output to cache")
42
+ res = extractor_construct().extract(input=_input, params=params)
43
+ self.put_into_memo(node_name, input_hash, pickle.dumps(res))
44
+ else:
45
+ print("=== Reading output from cache")
46
+ res = pickle.loads(memo_output)
47
+
48
+ if not isinstance(res, list):
49
+ res = [res]
21
50
 
22
- res = extractor_construct().extract(content=content, params=params)
51
+ res_data = [i for i in res if not isinstance(i, Feature)]
52
+ res_features = [i for i in res if isinstance(i, Feature)]
23
53
 
24
- self.results[node_name].extend(res)
54
+ self.results[node_name].extend(res_data)
55
+
56
+ for f in res_features:
57
+ _input.meta[f.name] = f.value
58
+
59
+ # this assume that if an extractor emits features then the next edge will always process
60
+ # the edges
61
+ data_to_process = res_data
62
+ if len(res_features) > 0:
63
+ data_to_process.append(_input)
25
64
 
26
65
  for out_edge, pre_filter_predicate in g.edges[node_name]:
27
66
  # TODO there are no reductions yet, each recursion finishes it's path and returns
28
- for r in res:
67
+ for r in data_to_process:
29
68
  if self._prefilter_content(content=r, prefilter_predicate=pre_filter_predicate):
30
69
  continue
31
70
 
32
- self._run(g, content=r, node_name=out_edge)
71
+ self._run(g, _input=r, node_name=out_edge)
33
72
 
34
73
  """
35
74
  Returns True if content should be filtered
36
75
  """
37
- def _prefilter_content(self, content: Content, prefilter_predicate: Optional[str]) -> bool:
76
+ def _prefilter_content(self, content: BaseData, prefilter_predicate: Optional[str]) -> bool:
38
77
  if prefilter_predicate is None:
39
78
  return False
40
79
 
41
- atoms = prefilter_predicate.split('and')
80
+ atoms = prefilter_predicate.split("and")
42
81
  if len(atoms) == 0:
43
82
  return False
44
83
 
45
84
  # TODO For now only support `and` and `=` and `string values`
46
85
  bools = []
47
- for feature in content.features:
48
- if feature.feature_type == 'metadata':
49
- predicates = json.loads(feature.value)
86
+ metadata = content.get_features()['metadata']
87
+ for atom in atoms:
88
+ l, r = atom.split('=')
89
+ if l in metadata:
90
+ bools.append(metadata[l] != r)
50
91
 
51
- print(f"predicates {predicates}")
92
+ return all(bools)
52
93
 
53
- for atom in atoms:
54
- l, r = atom.split('=')
55
- if l in predicates:
56
- print(f'predicates[l], r: {predicates[l], r}')
57
- bools.append(predicates[l] != r)
94
+ def get_result(self, node: Union[extractor, Extractor]) -> Any:
95
+ node_name = node.name
96
+ return self.results[node_name]
58
97
 
59
- print(bools)
98
+ def deleted_from_memo(self, node_name):
99
+ path_prefix = f"./indexify_local_runner_cache/{node_name}"
60
100
 
61
- return all(bools)
101
+ if os.path.exists(path_prefix) and os.path.isdir(path_prefix):
102
+ shutil.rmtree(path_prefix)
62
103
 
63
- def get_result(self, node: Extractor) -> Content:
64
- node_name = node._extractor_name
65
- return self.results[node_name]
104
+ def get_from_memo(self, node_name, input_hash):
105
+ path_prefix = f"./indexify_local_runner_cache/{node_name}"
106
+ file_name = f"{input_hash}"
107
+ file_path = f"{path_prefix}/{file_name}"
108
+
109
+ if not os.path.exists(file_path):
110
+ return None
111
+
112
+ with open(file_path, 'rb') as f:
113
+ return f.read()
114
+
115
+ def put_into_memo(self, node_name, input_hash, output):
116
+ path_prefix = f"./indexify_local_runner_cache/{node_name}"
117
+ file_name = f"{input_hash}"
118
+ file_path = f"{path_prefix}/{file_name}"
119
+
120
+ os.makedirs(path_prefix, exist_ok=True)
121
+
122
+ Path(file_path).touch()
123
+
124
+ with open(file_path, 'wb') as f:
125
+ return f.write(output)
indexify/run_graph.py ADDED
@@ -0,0 +1,122 @@
1
+ import json
2
+
3
+ from .extractor_sdk import Content, extractor, Extractor
4
+
5
+ from collections import defaultdict
6
+ from typing import Any, Dict, List, Optional, Type, Union
7
+ from pydantic import BaseModel
8
+
9
+ import itertools
10
+
11
+ from .runner import Runner
12
+
13
+ @extractor(description="id function")
14
+ def _id(content: Content) -> List[Content]:
15
+ return [content]
16
+
17
+
18
+ class RunGraph:
19
+ def __init__(self, name: str, input: Type[BaseModel], start_node: extractor, runner: Runner):
20
+ # TODO check for cycles
21
+ self.name = name
22
+
23
+ self.nodes: Dict[str, Union[extractor, Extractor]] = {}
24
+ self.params: Dict[str, Any] = {}
25
+
26
+ self.edges: Dict[str, List[(str, str)]] = defaultdict(list)
27
+
28
+ self.nodes["start"] = _id
29
+ self.nodes["end"] = _id
30
+
31
+ self._topo_counter = defaultdict(int)
32
+
33
+ self._start_node = None
34
+ self._input = input
35
+
36
+ self.runner = runner
37
+
38
+ def _node(self, extractor: Union[extractor, Extractor], params: Any = None) -> 'RunGraph':
39
+ name = extractor.name
40
+
41
+ # if you've already inserted a node just ignore the new insertion.
42
+ if name in self.nodes:
43
+ return
44
+
45
+ self.nodes[name] = extractor
46
+ self.params[name] = extractor.__dict__.get("params", None)
47
+
48
+ # assign each node a rank of 1 to init the graph
49
+ self._topo_counter[name] = 1
50
+
51
+ return self
52
+
53
+ def add_edge(
54
+ self,
55
+ from_node: extractor,
56
+ to_node: extractor,
57
+ prefilter_predicates: Optional[str] = None,
58
+ ) -> 'RunGraph':
59
+
60
+ self._node(from_node)
61
+ self._node(to_node)
62
+
63
+ from_node_name = from_node.name
64
+ to_node_name = to_node.name
65
+
66
+ self.edges[from_node_name].append((to_node_name, prefilter_predicates))
67
+
68
+ self._topo_counter[to_node_name] += 1
69
+
70
+ return self
71
+
72
+ """
73
+ Connect nodes as a fan out from one `from_node` to multiple `to_nodes` and respective `prefilter_predicates`.
74
+ Note: The user has to match the sizes of the lists to make sure they line up otherwise a None is used as a default.
75
+ """
76
+
77
+ def steps(
78
+ self,
79
+ from_node: extractor,
80
+ to_nodes: List[extractor],
81
+ prefilter_predicates: List[str] = [],
82
+ ) -> 'RunGraph':
83
+ print(f"{to_nodes}, {prefilter_predicates}, {prefilter_predicates}")
84
+ for t_n, p in itertools.zip_longest(
85
+ to_nodes, prefilter_predicates, fillvalue=None
86
+ ):
87
+ self.step(from_node=from_node, to_node=t_n, prefilter_predicates=p)
88
+
89
+ return self
90
+
91
+ def add_param(self, node: extractor, params: Dict[str, Any]):
92
+ try:
93
+ # check if the params can be serialized since the server needs this
94
+ json.dumps(params)
95
+ except Exception:
96
+ raise Exception(f"For node {node.name}, cannot serialize params as json.")
97
+
98
+ self.params[node.name] = params
99
+
100
+ def run(self, wf_input, local):
101
+ self._assign_start_node()
102
+ # self.runner = LocalRunner()
103
+ self.runner.run(self, wf_input=wf_input)
104
+ pass
105
+
106
+ def clear_cache_for_node(self, node: Union[extractor, Extractor]):
107
+ if node.name not in self.nodes.keys():
108
+ raise Exception(f"Node with name {node.name} not found in graph")
109
+
110
+ self.runner.deleted_from_memo(node.name)
111
+
112
+ def clear_cache_for_all_nodes(self):
113
+ for node_name in self.nodes:
114
+ self.runner.deleted_from_memo(node_name=node_name)
115
+
116
+ def get_result(self, node: Union[extractor, Extractor]) -> Any:
117
+ return self.runner.results[node.name]
118
+
119
+ def _assign_start_node(self):
120
+ # this method should be called before a graph can be run
121
+ nodes = sorted(self._topo_counter.items(), key=lambda x: x[1])
122
+ self._start_node = nodes[0][0]
indexify/runner.py ADDED
@@ -0,0 +1,22 @@
1
+ from abc import ABC
2
+
3
+ from indexify.extractor_sdk.data import BaseData
4
+ from indexify.extractor_sdk.extractor import extractor, Extractor
5
+
6
+ from typing import Any, Union
7
+
8
+ class Runner(ABC):
9
+ def run(self, g, wf_input: BaseData):
10
+ raise NotImplementedError()
11
+
12
+ def get_result(self, node: Union[extractor, Extractor]) -> Any:
13
+ raise NotImplementedError()
14
+
15
+ def deleted_from_memo(self, node_name):
16
+ raise NotImplementedError()
17
+
18
+ def get_from_memo(self, node_name, input_hash):
19
+ raise NotImplementedError()
20
+
21
+ def put_into_memo(self, node_name, input_hash, output):
22
+ raise NotImplementedError()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: indexify
3
- Version: 0.0.37
3
+ Version: 0.0.39
4
4
  Summary: Python Client for Indexify
5
5
  Home-page: https://github.com/tensorlakeai/indexify
6
6
  License: Apache 2.0
@@ -13,9 +13,9 @@ Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
- Requires-Dist: httpx[http2] (>=0.26,<0.27)
16
+ Requires-Dist: httpx[http2] (>=0,<1)
17
17
  Requires-Dist: pydantic (>=2.8,<3.0)
18
- Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
18
+ Requires-Dist: pyyaml (>=6,<7)
19
19
  Project-URL: Repository, https://github.com/tensorlakeai/indexify
20
20
  Description-Content-Type: text/markdown
21
21
 
@@ -0,0 +1,23 @@
1
+ indexify/__init__.py,sha256=ZDpPkRz4hBo6eqArhVBxqIscLSiD20q5rOHPYyOTloE,503
2
+ indexify/base_client.py,sha256=Db-BNYQ6yNmOIXPaQN8W5qjTYvfFvPzoxC9206YRc-U,2755
3
+ indexify/client.py,sha256=FPCO2DN6RstKLasmNrPxRhzBXDgM14tbc3eDDxl8J_A,25998
4
+ indexify/data_loaders/__init__.py,sha256=TmOJLgKC5gM7_1n7zxYiuza3fOilIiYYupxBGd31PfA,1339
5
+ indexify/data_loaders/local_directory_loader.py,sha256=0X_FgLS5unisJSij8LICv1htp8IdW09LbTIJ2wvVJg4,1246
6
+ indexify/data_loaders/url_loader.py,sha256=shjw6dYBlaxA_PzP6qCB9TTtbPiY4h6FV7uopDbRQCc,1546
7
+ indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
8
+ indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
9
+ indexify/extraction_policy.py,sha256=awNDqwCz0tr4jTQmGf7s8_s6vcEuxMb0xynEl7b7iPI,2076
10
+ indexify/extractor_sdk/__init__.py,sha256=T512UtvFPUXEXlnT9HHHLHPcEau1Acoac_ksByuo7jA,348
11
+ indexify/extractor_sdk/data.py,sha256=632fY4S_F_aYPLtOl_7dZnSAyMvVZY8ujSSIWJ9k104,2781
12
+ indexify/extractor_sdk/extractor.py,sha256=CtlRn8JC8vGn9fm4QameA47x9T1l_cRpkJMUYYpetco,10457
13
+ indexify/extractor_sdk/utils.py,sha256=_j8WflgOM0Qkf2NjhK2p1xXuwq4drLxO0mgKVPEHhlw,6594
14
+ indexify/graph.py,sha256=fVZeGIcSqO3p8dGIQOdbuFYQ-8QaTQ7Jr37OefA2Phk,549
15
+ indexify/local_runner.py,sha256=Ri-Wpw2qgnQ4I3fRR9qdXXRDASuZnu4-VR2xECG9gnY,4346
16
+ indexify/run_graph.py,sha256=gw3IEf8-myVaHUV7g6LPt8-uSMIVr7S0Zs62aT7UB90,3757
17
+ indexify/runner.py,sha256=M_3_GWYyPpb4lR5KFTpW8OAgp-fm9kYd_5xEqmiCBU4,637
18
+ indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
19
+ indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
20
+ indexify-0.0.39.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
21
+ indexify-0.0.39.dist-info/METADATA,sha256=EvEM7lkuDP1YJsh0wskXIBMQxivHYPKfPNERLV0eaa0,1877
22
+ indexify-0.0.39.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
23
+ indexify-0.0.39.dist-info/RECORD,,
indexify/extractor.py DELETED
@@ -1,122 +0,0 @@
1
- from typing import Union, Optional, List, Type, Tuple, Callable, get_type_hints, Dict
2
- import inspect
3
- from pydantic import BaseModel
4
- from abc import ABC, abstractmethod
5
- from .data import Content, Feature
6
- import json
7
-
8
- class EmbeddingSchema(BaseModel):
9
- dimension: int
10
-
11
- class Extractor(ABC):
12
- name: str = ""
13
-
14
- version: str = "0.0.0"
15
-
16
- system_dependencies: List[str] = []
17
-
18
- python_dependencies: List[str] = []
19
-
20
- description: str = ""
21
-
22
- input_mime_types = ["text/plain"]
23
-
24
- def extract(
25
- self, input: Type[BaseModel], params: Type[BaseModel] = None
26
- ) -> List[Union[Feature, Type[BaseModel]]]:
27
- """
28
- Extracts information from the content. Returns a list of features to add
29
- to the content.
30
- It can also return a list of Content objects, which will be added to storage
31
- and any extraction policies defined will be applied to them.
32
- """
33
- pass
34
-
35
- def extract_batch(
36
- self, input_list: List[Type[BaseModel]], params: List[Type[BaseModel]] = None
37
- ) -> List[List[Union[Feature, Type[BaseModel]]]]:
38
- """
39
- Extracts information from the content. Returns a list of features to add
40
- to the content.
41
- It can also return a list of Content objects, which will be added to storage
42
- and any extraction policies defined will be applied to them.
43
- """
44
- pass
45
-
46
- @classmethod
47
- @abstractmethod
48
- def sample_input(cls) -> Tuple[Content, Type[BaseModel]]:
49
- pass
50
-
51
- @classmethod
52
- @abstractmethod
53
- def embedding_schemas(cls) -> Dict[str, EmbeddingSchema]:
54
- raise NotImplementedError
55
-
56
- def describe(self) -> Dict:
57
- embedding_schemas = {}
58
- try:
59
- embedding_schemas = self.embedding_schemas()
60
- except NotImplementedError:
61
- pass
62
-
63
- json_schema = (
64
- self._param_cls.model_json_schema() if self._param_cls is not None else None
65
- )
66
-
67
- return {
68
- "name": self.name,
69
- "version": self.version,
70
- "description": self.description,
71
- "system_dependencies": self.system_dependencies,
72
- "python_dependencies": self.python_dependencies,
73
- "input_mime_types": self.input_mime_types,
74
- "embedding_schemas": embedding_schemas,
75
- "input_params": json.dumps(json_schema),
76
- }
77
-
78
- def extractor(
79
- name: Optional[str] = None,
80
- description: Optional[str] = "",
81
- version: Optional[str] = "",
82
- python_dependencies: Optional[List[str]] = None,
83
- system_dependencies: Optional[List[str]] = None,
84
- input_mime_types: Optional[List[str]] = None,
85
- embedding_schemas: Optional[Dict[str, EmbeddingSchema]] = None,
86
- sample_content: Optional[Callable] = None,
87
- ):
88
- args = locals()
89
- del args["sample_content"]
90
-
91
- def construct(fn):
92
- def wrapper():
93
- hint = get_type_hints(fn).get("params", dict)
94
-
95
- if not args.get("name"):
96
- args["name"] = (
97
- f"{inspect.getmodule(inspect.stack()[1][0]).__name__}:{fn.__name__}"
98
- )
99
-
100
- class DecoratedFn(Extractor):
101
- @classmethod
102
- def extract(cls, input: Type[BaseModel], params: Type[BaseModel]=None) -> List[Content]: # type: ignore
103
- # TODO we can force all the functions to take in a parms object
104
- # or check if someone adds a params
105
- if params is None:
106
- return fn(input)
107
- else:
108
- return fn(input, params)
109
-
110
- def sample_input(self) -> Content:
111
- return sample_content() if sample_content else self.sample_text()
112
-
113
- for key, val in args.items():
114
- setattr(DecoratedFn, key, val)
115
-
116
- return DecoratedFn
117
-
118
- wrapper._extractor_name = fn.__name__
119
-
120
- return wrapper
121
-
122
- return construct
@@ -1,18 +0,0 @@
1
- indexify/__init__.py,sha256=W58FqmnKHIx-gHKTBDQa1QI49Gi8f1rw90yDg31jwgQ,743
2
- indexify/client.py,sha256=faGiWAtdXkL4Vmx6xr0iHJLIBwhS2XZbQ6ld_7sMsBc,25874
3
- indexify/data.py,sha256=91We7J2QAKBOTu1yF3ApTl4yl4C-nDL2WSXhBdekLWg,2334
4
- indexify/data_loaders/__init__.py,sha256=EiYemxCP4zRfDWnDKiX6-SFwXVmv1TSdcXHBQRbE_Uw,1309
5
- indexify/data_loaders/local_directory_loader.py,sha256=kF7VwkuOJFBrhKrR7IOOdZ4TDAItw_CyUOfcuej1CKI,1080
6
- indexify/error.py,sha256=3umTeYb0ugtUyehV1ibfvaeACxAONPyWPc-1HRN4d1M,856
7
- indexify/exceptions.py,sha256=vjd5SPPNFIEW35GorSIodsqvm9RKHQm9kdp8t9gv-WM,111
8
- indexify/extraction_policy.py,sha256=awNDqwCz0tr4jTQmGf7s8_s6vcEuxMb0xynEl7b7iPI,2076
9
- indexify/extractor.py,sha256=HnLot4DQv7aVI3FwFNH83LzKjq7DlSR1-wmpcVC89tE,3930
10
- indexify/extractor_utils.py,sha256=68V5vZB9GYx648dyyVKAia0M4pG_R31QPqUQz3ZZ1FQ,6593
11
- indexify/graph.py,sha256=hUGTpaI3ale54sQ90u5P3-RJCwsSlEJg1V1R0rmCZE0,2576
12
- indexify/local_runner.py,sha256=VV4Ff_ctibw0ZL4u1wVA7drRx4zLTgNmT_qLX3Cq2SY,2167
13
- indexify/settings.py,sha256=LSaWZ0ADIVmUv6o6dHWRC3-Ry5uLbCw2sBSg1e_U7UM,99
14
- indexify/utils.py,sha256=rDN2lrsAs9noJEIjfx6ukmC2SAIyrlUt7QU-kaBjujM,125
15
- indexify-0.0.37.dist-info/LICENSE.txt,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
16
- indexify-0.0.37.dist-info/METADATA,sha256=_3uThIPuUiPQ9BBVoqoEEo5Prqp_LHx59jHrZ2CpSgk,1891
17
- indexify-0.0.37.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
18
- indexify-0.0.37.dist-info/RECORD,,