nv-ingest-client 2025.11.17.dev20251117__py3-none-any.whl → 2025.12.17.dev20251217__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,7 @@ from typing import Dict
10
10
  from typing import List
11
11
  from typing import Optional
12
12
  from typing import Union
13
+ from typing import Tuple
13
14
  from uuid import UUID
14
15
 
15
16
  from nv_ingest_client.primitives.tasks import Task
@@ -222,7 +223,9 @@ class BatchJobSpec:
222
223
  A dictionary that maps document types to a list of `JobSpec` instances.
223
224
  """
224
225
 
225
- def __init__(self, job_specs_or_files: Optional[Union[List[JobSpec], List[str]]] = None) -> None:
226
+ def __init__(
227
+ self, job_specs_or_files: Optional[Union[List[JobSpec], List[str], List[Tuple[str, BytesIO]]]] = None
228
+ ) -> None:
226
229
  """
227
230
  Initializes the BatchJobSpec instance.
228
231
 
@@ -239,6 +242,13 @@ class BatchJobSpec:
239
242
  self.from_job_specs(job_specs_or_files)
240
243
  elif isinstance(job_specs_or_files[0], str):
241
244
  self.from_files(job_specs_or_files)
245
+ elif (
246
+ isinstance(job_specs_or_files[0], tuple)
247
+ and len(job_specs_or_files[0]) == 2
248
+ and isinstance(job_specs_or_files[0][0], str)
249
+ and isinstance(job_specs_or_files[0][1], BytesIO)
250
+ ):
251
+ self.from_buffers(job_specs_or_files)
242
252
  else:
243
253
  raise ValueError("Invalid input type for job_specs. Must be a list of JobSpec or file paths.")
244
254
 
@@ -282,6 +292,21 @@ class BatchJobSpec:
282
292
  for job_spec in job_specs:
283
293
  self.add_job_spec(job_spec)
284
294
 
295
+ def from_buffers(self, buffers: List[Tuple[str, BytesIO]]) -> None:
296
+ """
297
+ Initializes the batch from a list of buffers.
298
+
299
+ Parameters
300
+ ----------
301
+ buffers : List[Tuple[str, BytesIO]]
302
+ A list of tuples containing the name of the buffer and the BytesIO object.
303
+ """
304
+ from nv_ingest_client.util.util import create_job_specs_for_buffers
305
+
306
+ job_specs = create_job_specs_for_buffers(buffers)
307
+ for job_spec in job_specs:
308
+ self.add_job_spec(job_spec)
309
+
285
310
  def _from_dataset(self, dataset: str, shuffle_dataset: bool = True) -> None:
286
311
  """
287
312
  Internal method to initialize the batch from a dataset.
@@ -22,18 +22,24 @@ class CaptionTask(Task):
22
22
  api_key: str = None,
23
23
  endpoint_url: str = None,
24
24
  prompt: str = None,
25
+ system_prompt: str = None,
25
26
  model_name: str = None,
26
27
  ) -> None:
27
28
  super().__init__()
28
29
 
29
30
  # Use the API schema for validation
30
31
  validated_data = IngestTaskCaptionSchema(
31
- api_key=api_key, endpoint_url=endpoint_url, prompt=prompt, model_name=model_name
32
+ api_key=api_key,
33
+ endpoint_url=endpoint_url,
34
+ prompt=prompt,
35
+ system_prompt=system_prompt,
36
+ model_name=model_name,
32
37
  )
33
38
 
34
39
  self._api_key = validated_data.api_key
35
40
  self._endpoint_url = validated_data.endpoint_url
36
41
  self._prompt = validated_data.prompt
42
+ self._system_prompt = validated_data.system_prompt
37
43
  self._model_name = validated_data.model_name
38
44
 
39
45
  def __str__(self) -> str:
@@ -49,6 +55,8 @@ class CaptionTask(Task):
49
55
  info += f" endpoint_url: {self._endpoint_url}\n"
50
56
  if self._prompt:
51
57
  info += f" prompt: {self._prompt}\n"
58
+ if self._system_prompt:
59
+ info += f" system_prompt: {self._system_prompt}\n"
52
60
  if self._model_name:
53
61
  info += f" model_name: {self._model_name}\n"
54
62
 
@@ -69,6 +77,9 @@ class CaptionTask(Task):
69
77
  if self._prompt:
70
78
  task_properties["prompt"] = self._prompt
71
79
 
80
+ if self._system_prompt:
81
+ task_properties["system_prompt"] = self._system_prompt
82
+
72
83
  if self._model_name:
73
84
  task_properties["model_name"] = self._model_name
74
85
 
@@ -8,6 +8,8 @@
8
8
 
9
9
  import logging
10
10
  import os
11
+ import warnings
12
+ from typing import get_args
11
13
  from typing import Any
12
14
  from typing import Dict
13
15
  from typing import Literal
@@ -52,15 +54,27 @@ _DEFAULT_EXTRACTOR_MAP = {
52
54
 
53
55
  _Type_Extract_Method_PDF = Literal[
54
56
  "adobe",
55
- "nemoretriever_parse",
57
+ "nemotron_parse",
56
58
  "haystack",
57
59
  "llama_parse",
58
60
  "pdfium",
59
61
  "tika",
60
62
  "unstructured_io",
63
+ "unstructured_local",
64
+ "pdfium_hybrid",
61
65
  "ocr",
62
66
  ]
63
67
 
68
+ _Type_Extract_Method_DOCX = Literal[
69
+ "python_docx",
70
+ "render_as_pdf",
71
+ ]
72
+
73
+ _Type_Extract_Method_PPTX = Literal[
74
+ "python_pptx",
75
+ "render_as_pdf",
76
+ ]
77
+
64
78
  _Type_Extract_Images_Method = Literal["group", "yolox"]
65
79
 
66
80
  _Type_Extract_Tables_Method_PDF = Literal["yolox", "paddle"]
@@ -74,7 +88,7 @@ class ExtractTask(Task):
74
88
  def __init__(
75
89
  self,
76
90
  document_type,
77
- extract_method: _Type_Extract_Method_PDF = None,
91
+ extract_method: Optional[str] = None,
78
92
  extract_text: bool = False,
79
93
  extract_images: bool = False,
80
94
  extract_tables: bool = False,
@@ -109,6 +123,12 @@ class ExtractTask(Task):
109
123
  )
110
124
  extract_method = _DEFAULT_EXTRACTOR_MAP[document_type_lower]
111
125
 
126
+ if extract_method == "nemoretriever_parse":
127
+ logger.warning("'nemoretriever_parse' is deprecated. Please use 'nemotron_parse' instead.")
128
+ extract_method = "nemotron_parse"
129
+
130
+ self._validate_extract_method(document_type, extract_method)
131
+
112
132
  # Set default extract_charts if None
113
133
  if extract_charts is None:
114
134
  extract_charts = extract_tables
@@ -240,3 +260,31 @@ class ExtractTask(Task):
240
260
  @property
241
261
  def document_type(self):
242
262
  return self._document_type.value
263
+
264
+ def _validate_extract_method(self, document_type: str, extract_method: str):
265
+ doc_type = document_type.lower()
266
+
267
+ valid_docx = set(get_args(_Type_Extract_Method_DOCX))
268
+ valid_pptx = set(get_args(_Type_Extract_Method_PPTX))
269
+ valid_pdf = set(get_args(_Type_Extract_Method_PDF))
270
+
271
+ if doc_type == "docx" and extract_method not in valid_docx:
272
+ raise ValueError(f"'{extract_method}' is invalid for DOCX. Options: {valid_docx}")
273
+
274
+ elif doc_type == "pptx" and extract_method not in valid_pptx:
275
+ raise ValueError(f"'{extract_method}' is invalid for PPTX. Options: {valid_pptx}")
276
+
277
+ elif doc_type == "pdf" and extract_method not in valid_pdf:
278
+ raise ValueError(f"'{extract_method}' is invalid for PDF. Options: {valid_pdf}")
279
+
280
+ elif doc_type not in ["docx", "pptx", "pdf"]:
281
+ is_docx_method = extract_method in valid_docx
282
+ is_pptx_method = extract_method in valid_pptx
283
+ is_pdf_method = extract_method in valid_pdf
284
+
285
+ if (is_docx_method or is_pptx_method) and not is_pdf_method:
286
+ warnings.warn(
287
+ f"extract_method '{extract_method}' is valid for Office documents but NOT for PDFs. "
288
+ "If your batch includes PDFs, extraction may fail for those files. "
289
+ "Consider leaving extract_method=None for mixed batches."
290
+ )
@@ -7,8 +7,7 @@
7
7
  # pylint: disable=too-many-arguments
8
8
 
9
9
  import logging
10
- from typing import Dict
11
- from typing import Literal
10
+ from typing import Dict, Literal, Optional
12
11
 
13
12
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskStoreSchema
14
13
  from nv_ingest_api.internal.schemas.meta.ingest_job_schema import IngestTaskStoreEmbedSchema
@@ -17,23 +16,19 @@ from .task_base import Task
17
16
 
18
17
  logger = logging.getLogger(__name__)
19
18
 
20
- _DEFAULT_STORE_METHOD = "minio"
21
-
22
19
 
23
20
  class StoreTask(Task):
24
21
  """
25
22
  Object for image storage task.
26
23
  """
27
24
 
28
- _Type_Content_Type = Literal["image",]
29
-
30
- _Type_Store_Method = Literal["minio",]
31
-
32
25
  def __init__(
33
26
  self,
34
27
  structured: bool = True,
35
28
  images: bool = False,
36
- store_method: _Type_Store_Method = None,
29
+ storage_uri: Optional[str] = None,
30
+ storage_options: Optional[dict] = None,
31
+ public_base_url: Optional[str] = None,
37
32
  params: dict = None,
38
33
  **extra_params,
39
34
  ) -> None:
@@ -51,12 +46,19 @@ class StoreTask(Task):
51
46
 
52
47
  # Use the API schema for validation
53
48
  validated_data = IngestTaskStoreSchema(
54
- structured=structured, images=images, method=store_method or _DEFAULT_STORE_METHOD, params=merged_params
49
+ structured=structured,
50
+ images=images,
51
+ storage_uri=storage_uri,
52
+ storage_options=storage_options or {},
53
+ public_base_url=public_base_url,
54
+ params=merged_params,
55
55
  )
56
56
 
57
57
  self._structured = validated_data.structured
58
58
  self._images = validated_data.images
59
- self._store_method = validated_data.method
59
+ self._storage_uri = validated_data.storage_uri
60
+ self._storage_options = validated_data.storage_options
61
+ self._public_base_url = validated_data.public_base_url
60
62
  self._params = validated_data.params
61
63
  self._extra_params = extra_params
62
64
 
@@ -68,7 +70,8 @@ class StoreTask(Task):
68
70
  info += "Store Task:\n"
69
71
  info += f" store structured types: {self._structured}\n"
70
72
  info += f" store image types: {self._images}\n"
71
- info += f" store method: {self._store_method}\n"
73
+ info += f" storage uri: {self._storage_uri}\n"
74
+ info += f" public base url: {self._public_base_url}\n"
72
75
  for key, value in self._extra_params.items():
73
76
  info += f" {key}: {value}\n"
74
77
  for key, value in self._params.items():
@@ -81,9 +84,11 @@ class StoreTask(Task):
81
84
  """
82
85
 
83
86
  task_properties = {
84
- "method": self._store_method,
85
87
  "structured": self._structured,
86
88
  "images": self._images,
89
+ "storage_uri": self._storage_uri,
90
+ "storage_options": self._storage_options,
91
+ "public_base_url": self._public_base_url,
87
92
  "params": self._params,
88
93
  **self._extra_params,
89
94
  }
@@ -145,3 +145,26 @@ def extract_file_content(path: str) -> Tuple[str, DocumentTypeEnum]:
145
145
 
146
146
  logger.debug(f"Content extracted from '{path}'")
147
147
  return content, DocumentTypeEnum(document_type)
148
+
149
+
150
+ def extract_content_from_buffer(buffer: Tuple[str, BytesIO]) -> Tuple[str, str]:
151
+ """
152
+ Extracts the content and type from a buffer.
153
+ """
154
+ document_type = get_or_infer_file_type(buffer[0])
155
+ try:
156
+ if document_type in [
157
+ DocumentTypeEnum.TXT,
158
+ DocumentTypeEnum.MD,
159
+ DocumentTypeEnum.HTML,
160
+ ]:
161
+ content = detect_encoding_and_read_text_file(buffer[1])
162
+ else:
163
+ content = serialize_to_base64(buffer[1])
164
+ except Exception as e:
165
+ logger.error(f"Error processing buffer {buffer[0]}: {e}")
166
+
167
+ raise ValueError(f"Failed to extract content from buffer {buffer[0]}") from e
168
+
169
+ logger.debug(f"Content extracted from '{buffer[0]}'")
170
+ return content, DocumentTypeEnum(document_type)
@@ -12,10 +12,12 @@ import math
12
12
  import heapq
13
13
  from typing import Dict
14
14
  from typing import List
15
+ from typing import Tuple
16
+ from io import BytesIO
15
17
 
16
18
  from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
17
19
  from nv_ingest_client.primitives.jobs.job_spec import JobSpec
18
- from nv_ingest_client.util.file_processing.extract import extract_file_content
20
+ from nv_ingest_client.util.file_processing.extract import extract_file_content, extract_content_from_buffer
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23
 
@@ -350,6 +352,37 @@ def create_job_specs_for_batch(files_batch: List[str]) -> List[JobSpec]:
350
352
  return job_specs
351
353
 
352
354
 
355
+ def create_job_specs_for_buffers(buffers: List[Tuple[str, BytesIO]]) -> List[JobSpec]:
356
+ """
357
+ Create and job specifications (JobSpecs) for a list of buffers.
358
+ This function takes a list of buffers, processes each buffer to extract its content and type,
359
+ creates a job specification (JobSpec) for each buffer.
360
+
361
+ Parameters
362
+ ----------
363
+ buffers : List[Tuple[str, BytesIO]]
364
+ A list of tuples containing the name of the buffer and the BytesIO object.
365
+
366
+ Returns
367
+ -------
368
+ List[JobSpec]
369
+ A list of JobSpecs.
370
+ """
371
+
372
+ job_specs = []
373
+ for name, buffer in buffers:
374
+ content, file_type = extract_content_from_buffer((name, buffer))
375
+ job_spec = JobSpec(
376
+ document_type=file_type,
377
+ payload=content,
378
+ source_id=name,
379
+ source_name=name,
380
+ )
381
+ job_specs.append(job_spec)
382
+
383
+ return job_specs
384
+
385
+
353
386
  def apply_pdf_split_config_to_job_specs(job_specs: List[JobSpec], pages_per_chunk: int) -> None:
354
387
  """
355
388
  Apply PDF split configuration to a list of JobSpec objects.
@@ -1,27 +1,243 @@
1
1
  from abc import ABC, abstractmethod
2
2
 
3
3
 
4
+ """Abstract Vector Database (VDB) operator API.
5
+
6
+ This module defines the `VDB` abstract base class which specifies the
7
+ interface that custom vector-database operators must implement to integrate
8
+ with NV-Ingest.
9
+
10
+ The implementation details and an example OpenSearch operator are described
11
+ in the `examples/building_vdb_operator.ipynb` notebook in this repository, and a
12
+ production-ready OpenSearch implementation is available at
13
+ `client/src/nv_ingest_client/util/vdb/opensearch.py`.
14
+
15
+ Design goals:
16
+ - Provide a small, well-documented interface that supports common vector
17
+ database operations: index creation, batch ingestion, nearest-neighbor
18
+ retrieval, and a simple `run` orchestration entry-point used by the
19
+ NV-Ingest pipeline.
20
+ - Keep the API flexible by accepting `**kwargs` on methods so implementers can
21
+ pass database-specific options without changing the interface.
22
+
23
+ Typical implementation notes (inferred from the example OpenSearch operator):
24
+ - Constructor accepts connection and index configuration parameters such as
25
+ `host`, `port`, `index_name`, `dense_dim` and feature toggles for content
26
+ types (e.g. `enable_text`, `enable_images`).
27
+ - `create_index` should be able to create (and optionally recreate) an
28
+ index with appropriate vector settings (k-NN, HNSW/FAISS parameters, etc.).
29
+ - `write_to_index` should accept batches of NV-Ingest records, perform
30
+ validation/transformation, and write documents into the database efficiently
31
+ (bulk APIs are recommended).
32
+ - `retrieval` should accept a list of textual queries, convert them to
33
+ embeddings (by calling an external embedding service or model), perform a
34
+ vector search (top-k), and return cleaned results (e.g., removing stored
35
+ dense vectors from returned payloads).
36
+
37
+ """
38
+
39
+
4
40
  class VDB(ABC):
41
+ """Abstract base class for Vector Database operators.
42
+
43
+ Subclasses must implement the abstract methods below. The interface is
44
+ intentionally small and uses `**kwargs` to allow operator-specific
45
+ configuration without changing the common API.
46
+
47
+ Example (high level):
48
+
49
+ class OpenSearch(VDB):
50
+ def __init__(self, **kwargs):
51
+ # parse kwargs, initialize client, call super().__init__(**kwargs)
52
+ ...
53
+
54
+ def create_index(self, **kwargs):
55
+ # create index, mappings, settings
56
+ ...
57
+
58
+ def write_to_index(self, records: list, **kwargs):
59
+ # transform NV-Ingest records and write to database
60
+ ...
61
+
62
+ def retrieval(self, queries: list, **kwargs):
63
+ # convert queries to embeddings, k-NN search, format results
64
+ ...
65
+
66
+ def run(self, records):
67
+ # orchestrate create_index + write_to_index
68
+ ...
69
+
70
+ Notes on recommended constructor parameters (not enforced by this ABC):
71
+ - host (str): database hostname (default: 'localhost')
72
+ - port (int): database port (default: 9200 for OpenSearch/Elasticsearch)
73
+ - index_name (str): base index name used by the operator
74
+ - dense_dim (int): dimensionality of stored dense embeddings
75
+ - enable_text/enable_images/... (bool): content-type toggles used when
76
+ extracting text from NV-Ingest records before indexing
77
+
78
+ The concrete operator may accept additional parameters (username,
79
+ password, ssl options, client-specific flags). Passing these via
80
+ `**kwargs` is the intended pattern.
81
+ """
5
82
 
6
83
  @abstractmethod
7
84
  def __init__(self, **kwargs):
85
+ """Initialize the VDB operator.
86
+
87
+ Implementations should extract configuration values from `kwargs`
88
+ (or use defaults) and initialize any client connections required to
89
+ talk to the target vector database. Implementations are encouraged to
90
+ call `super().__init__(**kwargs)` only if they want the base-class
91
+ behavior of storing kwargs on the instance (the base class itself does
92
+ not require that behavior).
93
+
94
+ Parameters (suggested/common):
95
+ - host (str): database host
96
+ - port (int): database port
97
+ - index_name (str): base name for created indices
98
+ - dense_dim (int): embedding vector dimension
99
+ - enable_text (bool): whether text content should be extracted/indexed
100
+ - enable_images (bool), enable_audio (bool), etc.: other toggles
101
+
102
+ The constructor should not perform heavy operations (like creating
103
+ indices) unless explicitly desired; prefer leaving that work to
104
+ `create_index` to make the operator easier to test.
105
+ """
8
106
  self.__dict__.update(kwargs)
9
107
 
10
108
  @abstractmethod
11
109
  def create_index(self, **kwargs):
110
+ """Create and configure the index(es) required by this operator.
111
+
112
+ Implementations must ensure an appropriate index (or indices) exist
113
+ before data ingestion. For vector indexes this typically means
114
+ creating settings and mappings that enable k-NN/vector search (for
115
+ example, enabling an HNSW/FAISS engine, setting `dimension`, and any
116
+ engine-specific parameters).
117
+
118
+ Common keyword arguments (operator-specific):
119
+ - recreate (bool): if True, delete and recreate the index even if it
120
+ already exists (default: False)
121
+ - index_name (str): override the operator's configured index name for
122
+ this call
123
+
124
+ Returns:
125
+ implementation-specific result (e.g., a boolean, the created
126
+ index name, or the raw response from the database client). There
127
+ is no strict requirement here because different DB clients return
128
+ different values; document behavior in concrete implementations.
129
+ """
12
130
  pass
13
131
 
14
132
  @abstractmethod
15
133
  def write_to_index(self, records: list, **kwargs):
134
+ """Write a batch of NV-Ingest records to the vector database.
135
+
136
+ This method receives `records` formatted as NV-Ingest provides them
137
+ (commonly a list of record-sets). Implementations are responsible for
138
+ transforming each record into the target database document format,
139
+ validating the presence of embeddings and content, and using the most
140
+ efficient ingestion API available (for example a bulk endpoint).
141
+
142
+ Expected behavior:
143
+ - Iterate over the provided `records` (which can be nested lists of
144
+ record dictionaries) and transform each record to the DB document
145
+ structure (fields such as `dense` for the vector, `text` for the
146
+ content, and `metadata` for auxiliary fields are common in the
147
+ repository examples).
148
+ - Skip records missing required fields (for example, missing
149
+ embeddings) and log or report failures as appropriate.
150
+ - Use batching / bulk APIs to reduce overhead when writing large
151
+ volumes of documents.
152
+
153
+ Parameters:
154
+ - records (list): NV-Ingest records (see repository examples for
155
+ structure)
156
+ - batch_size (int, optional): how many documents to send per bulk
157
+ request; database-specific implementations can use this hint
158
+
159
+ Returns:
160
+ implementation-specific result (e.g., number of documents
161
+ indexed, client response for bulk API). Concrete implementations
162
+ should document exact return values and failure semantics.
163
+ """
16
164
  pass
17
165
 
18
166
  @abstractmethod
19
167
  def retrieval(self, queries: list, **kwargs):
168
+ """Perform similarity search for a list of text queries.
169
+
170
+ The typical retrieval flow implemented by operators in this ecosystem
171
+ is:
172
+ 1. Convert each textual `query` into a dense embedding using an
173
+ external embedding model or service (the example uses an NVIDIA
174
+ embedding model via `llama_index.embeddings.nvidia.NVIDIAEmbedding`).
175
+ 2. Issue a vector (k-NN) search to the database using the generated
176
+ embedding, requesting the top-k (configurable) neighbors.
177
+ 3. Post-process results (for example, remove stored dense vectors
178
+ from returned documents to reduce payload size) and return a
179
+ list-of-lists of result documents aligned with the input `queries`.
180
+
181
+ Keyword arguments (common):
182
+ - index_name (str): index to search (default: operator's configured
183
+ index_name)
184
+ - top_k (int): number of nearest neighbors to return (default: 10)
185
+ - embedding_endpoint / model_name / nvidia_api_key: parameters needed
186
+ when the operator integrates with an external embedding service.
187
+
188
+ Parameters:
189
+ - queries (list[str]): list of text queries to be vectorized and
190
+ searched
191
+
192
+ Returns:
193
+ - results (list[list[dict]]): for each query, a list of hit documents
194
+ (concrete implementations should specify the document shape they
195
+ return). Operators should remove large binary/vector fields from
196
+ responses where possible.
197
+ """
20
198
  pass
21
199
 
22
200
  @abstractmethod
23
201
  def run(self, records):
202
+ """Main entry point used by the NV-Ingest pipeline.
203
+
204
+ The `run` method is intended to be a simple orchestration layer that
205
+ ensures the index exists and then ingests provided records. A minimal
206
+ recommended implementation is::
207
+
208
+ def run(self, records):
209
+ self.create_index()
210
+ self.write_to_index(records)
211
+
212
+ Implementers can add pre/post hooks, metrics, retries, or error
213
+ handling as needed for production readiness. Keep `run` simple so the
214
+ pipeline orchestration remains predictable.
215
+
216
+ Parameters:
217
+ - records: NV-Ingest records to index (format follows repository
218
+ conventions)
219
+
220
+ Returns:
221
+ - implementation-specific result (for example, a summary dict or
222
+ boolean success flag).
223
+ """
24
224
  pass
25
225
 
26
226
  def reindex(self, records: list, **kwargs):
227
+ """Optional helper to rebuild or re-populate indexes with new data.
228
+
229
+ This non-abstract method is provided as an optional hook that concrete
230
+ classes may override. A typical reindex implementation will:
231
+ - optionally delete the existing index and recreate it (via
232
+ `create_index(recreate=True)`)
233
+ - call `write_to_index(records)` to populate the new index
234
+
235
+ Parameters:
236
+ - records (list): records used to populate the index
237
+ - recreate (bool, optional): whether to delete and recreate the
238
+ index before writing
239
+
240
+ Returns:
241
+ - implementation-specific result
242
+ """
27
243
  pass