sie-haystack 0.1.9__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -245,6 +245,9 @@ Thumbs.db
245
245
  # VIM
246
246
  *.swp
247
247
 
248
+ # kilocode
249
+ .kilo/
250
+
248
251
  # Worktree metadata
249
252
  .base-branch
250
253
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sie-haystack
3
- Version: 0.1.9
3
+ Version: 0.2.0
4
4
  Summary: SIE integration for Haystack
5
5
  Author-email: Superlinked <dev@superlinked.com>
6
6
  License: Apache-2.0
@@ -30,11 +30,31 @@ SIE integration for Haystack.
30
30
  pip install sie-haystack
31
31
  ```
32
32
 
33
+ ## Imports
34
+
35
+ Preferred import paths follow Haystack's namespace convention:
36
+
37
+ ```python
38
+ from haystack_integrations.components.embedders.sie import (
39
+ SIEDocumentEmbedder,
40
+ SIETextEmbedder,
41
+ )
42
+ from haystack_integrations.components.rankers.sie import SIERanker
43
+ from haystack_integrations.components.extractors.sie import SIEExtractor
44
+ ```
45
+
46
+ The legacy flat imports remain supported for compatibility:
47
+
48
+ ```python
49
+ from sie_haystack import SIEDocumentEmbedder, SIEExtractor, SIERanker, SIETextEmbedder
50
+ ```
51
+
33
52
  ## Usage
34
53
 
35
54
  ```python
36
55
  from haystack import Document
37
- from sie_haystack import SIETextEmbedder, SIEDocumentEmbedder, SIERanker
56
+ from haystack_integrations.components.embedders.sie import SIEDocumentEmbedder, SIETextEmbedder
57
+ from haystack_integrations.components.rankers.sie import SIERanker
38
58
 
39
59
  # Embed a query
40
60
  text_embedder = SIETextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
@@ -8,11 +8,31 @@ SIE integration for Haystack.
8
8
  pip install sie-haystack
9
9
  ```
10
10
 
11
+ ## Imports
12
+
13
+ Preferred import paths follow Haystack's namespace convention:
14
+
15
+ ```python
16
+ from haystack_integrations.components.embedders.sie import (
17
+ SIEDocumentEmbedder,
18
+ SIETextEmbedder,
19
+ )
20
+ from haystack_integrations.components.rankers.sie import SIERanker
21
+ from haystack_integrations.components.extractors.sie import SIEExtractor
22
+ ```
23
+
24
+ The legacy flat imports remain supported for compatibility:
25
+
26
+ ```python
27
+ from sie_haystack import SIEDocumentEmbedder, SIEExtractor, SIERanker, SIETextEmbedder
28
+ ```
29
+
11
30
  ## Usage
12
31
 
13
32
  ```python
14
33
  from haystack import Document
15
- from sie_haystack import SIETextEmbedder, SIEDocumentEmbedder, SIERanker
34
+ from haystack_integrations.components.embedders.sie import SIEDocumentEmbedder, SIETextEmbedder
35
+ from haystack_integrations.components.rankers.sie import SIERanker
16
36
 
17
37
  # Embed a query
18
38
  text_embedder = SIETextEmbedder(base_url="http://localhost:8080", model="BAAI/bge-m3")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sie-haystack"
3
- version = "0.1.9"
3
+ version = "0.2.0"
4
4
  description = "SIE integration for Haystack"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -33,7 +33,7 @@ requires = ["hatchling"]
33
33
  build-backend = "hatchling.build"
34
34
 
35
35
  [tool.hatch.build.targets.wheel]
36
- packages = ["src/sie_haystack"]
36
+ packages = ["src/sie_haystack", "src/haystack_integrations"]
37
37
 
38
38
  [tool.pytest.ini_options]
39
39
  asyncio_mode = "auto"
@@ -0,0 +1,25 @@
1
+ """Haystack namespace exports for SIE embedders.
2
+
3
+ This mirrors Haystack's `haystack_integrations.components.*` convention
4
+ while keeping the existing `sie_haystack` imports available.
5
+ """
6
+
7
+ from sie_haystack.embedders import (
8
+ SIEDocumentEmbedder,
9
+ SIEImageEmbedder,
10
+ SIEMultivectorDocumentEmbedder,
11
+ SIEMultivectorTextEmbedder,
12
+ SIESparseDocumentEmbedder,
13
+ SIESparseTextEmbedder,
14
+ SIETextEmbedder,
15
+ )
16
+
17
+ __all__ = [
18
+ "SIEDocumentEmbedder",
19
+ "SIEImageEmbedder",
20
+ "SIEMultivectorDocumentEmbedder",
21
+ "SIEMultivectorTextEmbedder",
22
+ "SIESparseDocumentEmbedder",
23
+ "SIESparseTextEmbedder",
24
+ "SIETextEmbedder",
25
+ ]
@@ -0,0 +1,11 @@
1
+ """Haystack namespace exports for SIE extractors."""
2
+
3
+ from sie_haystack.extractors import Classification, DetectedObject, Entity, Relation, SIEExtractor
4
+
5
+ __all__ = [
6
+ "Classification",
7
+ "DetectedObject",
8
+ "Entity",
9
+ "Relation",
10
+ "SIEExtractor",
11
+ ]
@@ -0,0 +1,5 @@
1
+ """Haystack namespace exports for SIE rankers."""
2
+
3
+ from sie_haystack.rankers import SIERanker
4
+
5
+ __all__ = ["SIERanker"]
@@ -10,6 +10,10 @@ Sparse Embedders (for hybrid search):
10
10
  - SIESparseTextEmbedder: Sparse embeddings for queries
11
11
  - SIESparseDocumentEmbedder: Sparse embeddings for documents
12
12
 
13
+ Multivector Embedders (ColBERT):
14
+ - SIEMultivectorTextEmbedder: Per-token embeddings for queries
15
+ - SIEMultivectorDocumentEmbedder: Per-token embeddings for documents
16
+
13
17
  Rankers and Extractors:
14
18
  - SIERanker: Reranks documents by relevance to a query
15
19
  - SIEExtractor: Extracts entities from text
@@ -45,16 +49,26 @@ Hybrid search example:
45
49
 
46
50
  from sie_haystack.embedders import (
47
51
  SIEDocumentEmbedder,
52
+ SIEImageEmbedder,
53
+ SIEMultivectorDocumentEmbedder,
54
+ SIEMultivectorTextEmbedder,
48
55
  SIESparseDocumentEmbedder,
49
56
  SIESparseTextEmbedder,
50
57
  SIETextEmbedder,
51
58
  )
52
- from sie_haystack.extractors import SIEExtractor
59
+ from sie_haystack.extractors import Classification, DetectedObject, Entity, Relation, SIEExtractor
53
60
  from sie_haystack.rankers import SIERanker
54
61
 
55
62
  __all__ = [
63
+ "Classification",
64
+ "DetectedObject",
65
+ "Entity",
66
+ "Relation",
56
67
  "SIEDocumentEmbedder",
57
68
  "SIEExtractor",
69
+ "SIEImageEmbedder",
70
+ "SIEMultivectorDocumentEmbedder",
71
+ "SIEMultivectorTextEmbedder",
58
72
  "SIERanker",
59
73
  "SIESparseDocumentEmbedder",
60
74
  "SIESparseTextEmbedder",
@@ -5,6 +5,8 @@ Provides embedder components following Haystack's conventions:
5
5
  - SIEDocumentEmbedder: For embedding documents - dense embeddings
6
6
  - SIESparseTextEmbedder: For sparse embeddings of queries (hybrid search)
7
7
  - SIESparseDocumentEmbedder: For sparse embeddings of documents (hybrid search)
8
+ - SIEMultivectorTextEmbedder: For multivector (ColBERT) embeddings of queries
9
+ - SIEMultivectorDocumentEmbedder: For multivector (ColBERT) embeddings of documents
8
10
  """
9
11
 
10
12
  from __future__ import annotations
@@ -12,6 +14,9 @@ from __future__ import annotations
12
14
  from typing import Any
13
15
 
14
16
  from haystack import Document, component
17
+ from sie_sdk import SIEClient
18
+ from sie_sdk.encoding import dense_embedding, multivector_embedding, sparse_embedding
19
+ from sie_sdk.types import Item
15
20
 
16
21
 
17
22
  @component
@@ -55,8 +60,6 @@ class SIETextEmbedder:
55
60
  def client(self) -> Any:
56
61
  """Lazily initialize the SIE client."""
57
62
  if self._client is None:
58
- from sie_sdk import SIEClient
59
-
60
63
  self._client = SIEClient(
61
64
  self._base_url,
62
65
  timeout_s=self._timeout_s,
@@ -79,24 +82,13 @@ class SIETextEmbedder:
79
82
  Returns:
80
83
  Dictionary with "embedding" key containing the embedding vector.
81
84
  """
82
- from sie_sdk.types import Item
83
-
84
85
  result = self.client.encode(
85
86
  self._model,
86
87
  Item(text=text),
87
88
  output_types=["dense"],
88
89
  options={"is_query": True},
89
90
  )
90
- embedding = self._extract_dense(result)
91
- return {"embedding": embedding}
92
-
93
- def _extract_dense(self, result: Any) -> list[float]:
94
- """Extract dense embedding from SDK result."""
95
- # SDK returns {"dense": np.ndarray, ...}
96
- dense = result.get("dense") if isinstance(result, dict) else getattr(result, "dense", None)
97
- if dense is None:
98
- return []
99
- return dense.tolist() if hasattr(dense, "tolist") else list(dense)
91
+ return {"embedding": dense_embedding(result)}
100
92
 
101
93
 
102
94
  @component
@@ -146,8 +138,6 @@ class SIEDocumentEmbedder:
146
138
  def client(self) -> Any:
147
139
  """Lazily initialize the SIE client."""
148
140
  if self._client is None:
149
- from sie_sdk import SIEClient
150
-
151
141
  self._client = SIEClient(
152
142
  self._base_url,
153
143
  timeout_s=self._timeout_s,
@@ -173,8 +163,6 @@ class SIEDocumentEmbedder:
173
163
  if not documents:
174
164
  return {"documents": []}
175
165
 
176
- from sie_sdk.types import Item
177
-
178
166
  # Build text to embed for each document
179
167
  texts = [self._build_text(doc) for doc in documents]
180
168
  items = [Item(text=text) for text in texts]
@@ -188,7 +176,7 @@ class SIEDocumentEmbedder:
188
176
 
189
177
  # Store embeddings on documents
190
178
  for doc, result in zip(documents, results, strict=True):
191
- doc.embedding = self._extract_dense(result)
179
+ doc.embedding = dense_embedding(result)
192
180
 
193
181
  return {"documents": documents}
194
182
 
@@ -201,14 +189,6 @@ class SIEDocumentEmbedder:
201
189
  parts.append(doc.content or "")
202
190
  return " ".join(parts)
203
191
 
204
- def _extract_dense(self, result: Any) -> list[float]:
205
- """Extract dense embedding from SDK result."""
206
- # SDK returns {"dense": np.ndarray, ...}
207
- dense = result.get("dense") if isinstance(result, dict) else getattr(result, "dense", None)
208
- if dense is None:
209
- return []
210
- return dense.tolist() if hasattr(dense, "tolist") else list(dense)
211
-
212
192
 
213
193
  @component
214
194
  class SIESparseTextEmbedder:
@@ -252,8 +232,6 @@ class SIESparseTextEmbedder:
252
232
  def client(self) -> Any:
253
233
  """Lazily initialize the SIE client."""
254
234
  if self._client is None:
255
- from sie_sdk import SIEClient
256
-
257
235
  self._client = SIEClient(
258
236
  self._base_url,
259
237
  timeout_s=self._timeout_s,
@@ -276,29 +254,13 @@ class SIESparseTextEmbedder:
276
254
  Returns:
277
255
  Dictionary with "sparse_embedding" key containing dict with "indices" and "values" lists.
278
256
  """
279
- from sie_sdk.types import Item
280
-
281
257
  result = self.client.encode(
282
258
  self._model,
283
259
  Item(text=text),
284
260
  output_types=["sparse"],
285
261
  options={"is_query": True},
286
262
  )
287
- sparse_embedding = self._extract_sparse(result)
288
- return {"sparse_embedding": sparse_embedding}
289
-
290
- def _extract_sparse(self, result: Any) -> dict[str, list]:
291
- """Extract sparse embedding from SDK result."""
292
- # SDK returns {"sparse": {"indices": np.ndarray, "values": np.ndarray}, ...}
293
- sparse = result.get("sparse") if isinstance(result, dict) else getattr(result, "sparse", None)
294
- if sparse is None:
295
- return {"indices": [], "values": []}
296
- indices = sparse.get("indices") if isinstance(sparse, dict) else getattr(sparse, "indices", None)
297
- values = sparse.get("values") if isinstance(sparse, dict) else getattr(sparse, "values", None)
298
- return {
299
- "indices": indices.tolist() if hasattr(indices, "tolist") else list(indices or []),
300
- "values": values.tolist() if hasattr(values, "tolist") else list(values or []),
301
- }
263
+ return {"sparse_embedding": sparse_embedding(result)}
302
264
 
303
265
 
304
266
  @component
@@ -349,8 +311,6 @@ class SIESparseDocumentEmbedder:
349
311
  def client(self) -> Any:
350
312
  """Lazily initialize the SIE client."""
351
313
  if self._client is None:
352
- from sie_sdk import SIEClient
353
-
354
314
  self._client = SIEClient(
355
315
  self._base_url,
356
316
  timeout_s=self._timeout_s,
@@ -376,8 +336,6 @@ class SIESparseDocumentEmbedder:
376
336
  if not documents:
377
337
  return {"documents": []}
378
338
 
379
- from sie_sdk.types import Item
380
-
381
339
  # Build text to embed for each document
382
340
  texts = [self._build_text(doc) for doc in documents]
383
341
  items = [Item(text=text) for text in texts]
@@ -391,7 +349,7 @@ class SIESparseDocumentEmbedder:
391
349
 
392
350
  # Store sparse embeddings on documents in meta
393
351
  for doc, result in zip(documents, results, strict=True):
394
- doc.meta["_sparse_embedding"] = self._extract_sparse(result)
352
+ doc.meta["_sparse_embedding"] = sparse_embedding(result)
395
353
 
396
354
  return {"documents": documents}
397
355
 
@@ -404,15 +362,257 @@ class SIESparseDocumentEmbedder:
404
362
  parts.append(doc.content or "")
405
363
  return " ".join(parts)
406
364
 
407
- def _extract_sparse(self, result: Any) -> dict[str, list]:
408
- """Extract sparse embedding from SDK result."""
409
- # SDK returns {"sparse": {"indices": np.ndarray, "values": np.ndarray}, ...}
410
- sparse = result.get("sparse") if isinstance(result, dict) else getattr(result, "sparse", None)
411
- if sparse is None:
412
- return {"indices": [], "values": []}
413
- indices = sparse.get("indices") if isinstance(sparse, dict) else getattr(sparse, "indices", None)
414
- values = sparse.get("values") if isinstance(sparse, dict) else getattr(sparse, "values", None)
415
- return {
416
- "indices": indices.tolist() if hasattr(indices, "tolist") else list(indices or []),
417
- "values": values.tolist() if hasattr(values, "tolist") else list(values or []),
418
- }
365
+
366
+ @component
367
+ class SIEImageEmbedder:
368
+ """Embeds images using SIE multimodal models (CLIP, SigLIP, ColPali).
369
+
370
+ Use this component in Haystack pipelines for image embedding with models
371
+ that support image input.
372
+
373
+ Example:
374
+ >>> embedder = SIEImageEmbedder(
375
+ ... base_url="http://localhost:8080",
376
+ ... model="openai/clip-vit-large-patch14",
377
+ ... )
378
+ >>> result = embedder.run(images=["/path/to/photo.jpg"])
379
+ >>> embeddings = result["embeddings"] # list[list[float]]
380
+
381
+ Args:
382
+ base_url: URL of the SIE server.
383
+ model: Model name to use for encoding. Must support image input.
384
+ gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
385
+ options: Model-specific options. Passed to SDK as default.
386
+ timeout_s: Request timeout in seconds.
387
+ """
388
+
389
+ def __init__(
390
+ self,
391
+ base_url: str = "http://localhost:8080",
392
+ model: str = "openai/clip-vit-large-patch14",
393
+ *,
394
+ gpu: str | None = None,
395
+ options: dict[str, Any] | None = None,
396
+ timeout_s: float = 180.0,
397
+ ) -> None:
398
+ self._base_url = base_url
399
+ self._model = model
400
+ self._gpu = gpu
401
+ self._options = options
402
+ self._timeout_s = timeout_s
403
+ self._client: Any = None
404
+
405
+ @property
406
+ def client(self) -> Any:
407
+ """Lazily initialize the SIE client."""
408
+ if self._client is None:
409
+ self._client = SIEClient(
410
+ self._base_url,
411
+ timeout_s=self._timeout_s,
412
+ gpu=self._gpu,
413
+ options=self._options,
414
+ )
415
+ return self._client
416
+
417
+ def warm_up(self) -> None:
418
+ """Warm up the component by initializing the client."""
419
+ _ = self.client
420
+
421
+ @component.output_types(embeddings=list[list[float]])
422
+ def run(self, images: list[str | bytes]) -> dict[str, list[list[float]]]:
423
+ """Embed images and return their embeddings.
424
+
425
+ Args:
426
+ images: List of image file paths (str) or raw image bytes.
427
+
428
+ Returns:
429
+ Dictionary with "embeddings" key containing list of embedding vectors.
430
+ """
431
+ if not images:
432
+ return {"embeddings": []}
433
+
434
+ items = [Item(images=[img]) for img in images]
435
+
436
+ results = self.client.encode(
437
+ self._model,
438
+ items,
439
+ output_types=["dense"],
440
+ )
441
+
442
+ return {"embeddings": [dense_embedding(result) for result in results]}
443
+
444
+
445
+ @component
446
+ class SIEMultivectorTextEmbedder:
447
+ """Embeds a single text string using SIE multivector (ColBERT) embeddings.
448
+
449
+ Produces per-token embeddings for late-interaction retrieval and scoring.
450
+ Use this component for embedding queries in ColBERT pipelines.
451
+
452
+ Example:
453
+ >>> embedder = SIEMultivectorTextEmbedder(
454
+ ... base_url="http://localhost:8080",
455
+ ... model="jinaai/jina-colbert-v2",
456
+ ... )
457
+ >>> result = embedder.run(text="What is vector search?")
458
+ >>> multivector = result["multivector_embedding"] # list[list[float]]
459
+ """
460
+
461
+ def __init__(
462
+ self,
463
+ base_url: str = "http://localhost:8080",
464
+ model: str = "jinaai/jina-colbert-v2",
465
+ *,
466
+ gpu: str | None = None,
467
+ options: dict[str, Any] | None = None,
468
+ timeout_s: float = 180.0,
469
+ ) -> None:
470
+ """Initialize the multivector text embedder.
471
+
472
+ Args:
473
+ base_url: URL of the SIE server.
474
+ model: Model name to use for encoding. Must support multivector output
475
+ (e.g., jinaai/jina-colbert-v2).
476
+ gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
477
+ options: Model-specific options. Passed to SDK as default.
478
+ timeout_s: Request timeout in seconds.
479
+ """
480
+ self._base_url = base_url
481
+ self._model = model
482
+ self._gpu = gpu
483
+ self._options = options
484
+ self._timeout_s = timeout_s
485
+ self._client: Any = None
486
+
487
+ @property
488
+ def client(self) -> Any:
489
+ """Lazily initialize the SIE client."""
490
+ if self._client is None:
491
+ self._client = SIEClient(
492
+ self._base_url,
493
+ timeout_s=self._timeout_s,
494
+ gpu=self._gpu,
495
+ options=self._options,
496
+ )
497
+ return self._client
498
+
499
+ def warm_up(self) -> None:
500
+ """Warm up the component by initializing the client."""
501
+ _ = self.client
502
+
503
+ @component.output_types(multivector_embedding=list)
504
+ def run(self, text: str) -> dict[str, list[list[float]]]:
505
+ """Embed a single text string with multivector (ColBERT) embeddings.
506
+
507
+ Args:
508
+ text: The text to embed.
509
+
510
+ Returns:
511
+ Dictionary with "multivector_embedding" key containing per-token embeddings.
512
+ """
513
+ result = self.client.encode(
514
+ self._model,
515
+ Item(text=text),
516
+ output_types=["multivector"],
517
+ options={"is_query": True},
518
+ )
519
+ return {"multivector_embedding": multivector_embedding(result["multivector"])}
520
+
521
+
522
+ @component
523
+ class SIEMultivectorDocumentEmbedder:
524
+ """Embeds documents using SIE multivector (ColBERT) embeddings.
525
+
526
+ Produces per-token embeddings for late-interaction retrieval. Stores
527
+ multivector embeddings on each document's metadata.
528
+
529
+ Example:
530
+ >>> from haystack import Document
531
+ >>> embedder = SIEMultivectorDocumentEmbedder(
532
+ ... base_url="http://localhost:8080",
533
+ ... model="jinaai/jina-colbert-v2",
534
+ ... )
535
+ >>> docs = [Document(content="Python is a programming language.")]
536
+ >>> result = embedder.run(documents=docs)
537
+ >>> embedded_docs = result["documents"]
538
+ >>> print(embedded_docs[0].meta["_multivector_embedding"]) # list[list[float]]
539
+ """
540
+
541
+ def __init__(
542
+ self,
543
+ base_url: str = "http://localhost:8080",
544
+ model: str = "jinaai/jina-colbert-v2",
545
+ *,
546
+ gpu: str | None = None,
547
+ options: dict[str, Any] | None = None,
548
+ timeout_s: float = 180.0,
549
+ meta_fields_to_embed: list[str] | None = None,
550
+ ) -> None:
551
+ """Initialize the multivector document embedder.
552
+
553
+ Args:
554
+ base_url: URL of the SIE server.
555
+ model: Model name to use for encoding. Must support multivector output
556
+ (e.g., jinaai/jina-colbert-v2).
557
+ gpu: GPU type to use (e.g., "l4", "a100"). Passed to SDK as default.
558
+ options: Model-specific options. Passed to SDK as default.
559
+ timeout_s: Request timeout in seconds.
560
+ meta_fields_to_embed: List of metadata fields to include in embedding.
561
+ """
562
+ self._base_url = base_url
563
+ self._model = model
564
+ self._gpu = gpu
565
+ self._options = options
566
+ self._timeout_s = timeout_s
567
+ self._meta_fields_to_embed = meta_fields_to_embed or []
568
+ self._client: Any = None
569
+
570
+ @property
571
+ def client(self) -> Any:
572
+ """Lazily initialize the SIE client."""
573
+ if self._client is None:
574
+ self._client = SIEClient(
575
+ self._base_url,
576
+ timeout_s=self._timeout_s,
577
+ gpu=self._gpu,
578
+ options=self._options,
579
+ )
580
+ return self._client
581
+
582
+ def warm_up(self) -> None:
583
+ """Warm up the component by initializing the client."""
584
+ _ = self.client
585
+
586
+ @component.output_types(documents=list[Document])
587
+ def run(self, documents: list[Document]) -> dict[str, list[Document]]:
588
+ """Embed documents with multivector (ColBERT) embeddings.
589
+
590
+ Args:
591
+ documents: List of documents to embed.
592
+
593
+ Returns:
594
+ Dictionary with "documents" key containing documents with multivector embeddings
595
+ stored in meta["_multivector_embedding"].
596
+ """
597
+ if not documents:
598
+ return {"documents": []}
599
+
600
+ texts = [self._build_text(doc) for doc in documents]
601
+ items = [Item(text=text) for text in texts]
602
+
603
+ results = self.client.encode(
604
+ self._model,
605
+ items,
606
+ output_types=["multivector"],
607
+ )
608
+
609
+ for doc, result in zip(documents, results, strict=True):
610
+ doc.meta["_multivector_embedding"] = multivector_embedding(result["multivector"])
611
+
612
+ return {"documents": documents}
613
+
614
+ def _build_text(self, doc: Document) -> str:
615
+ """Build the text to embed for a document, optionally including metadata fields."""
616
+ parts = [str(doc.meta[field]) for field in self._meta_fields_to_embed if field in doc.meta]
617
+ parts.append(doc.content or "")
618
+ return " ".join(parts)