cocoindex 0.2.16__cp311-abi3-win_amd64.whl → 0.2.18__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.pyd +0 -0
- cocoindex/auth_registry.py +1 -1
- cocoindex/cli.py +121 -41
- cocoindex/engine_object.py +272 -0
- cocoindex/{convert.py → engine_value.py} +64 -208
- cocoindex/flow.py +7 -2
- cocoindex/functions/__init__.py +40 -0
- cocoindex/functions/_engine_builtin_specs.py +66 -0
- cocoindex/functions/colpali.py +250 -0
- cocoindex/functions/sbert.py +63 -0
- cocoindex/lib.py +1 -1
- cocoindex/op.py +7 -3
- cocoindex/sources/__init__.py +5 -0
- cocoindex/{sources.py → sources/_engine_builtin_specs.py} +3 -3
- cocoindex/targets/_engine_builtin_specs.py +19 -1
- cocoindex/tests/test_engine_object.py +331 -0
- cocoindex/tests/{test_convert.py → test_engine_value.py} +150 -26
- cocoindex/typing.py +125 -3
- {cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/METADATA +4 -1
- cocoindex-0.2.18.dist-info/RECORD +43 -0
- {cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/WHEEL +1 -1
- {cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/licenses/THIRD_PARTY_NOTICES.html +54 -21
- cocoindex/tests/test_load_convert.py +0 -118
- cocoindex-0.2.16.dist-info/RECORD +0 -37
- {cocoindex-0.2.16.dist-info → cocoindex-0.2.18.dist-info}/entry_points.txt +0 -0
@@ -1,18 +1,15 @@
|
|
1
1
|
"""
|
2
|
-
Utilities to
|
2
|
+
Utilities to encode/decode values in cocoindex (for data).
|
3
3
|
"""
|
4
4
|
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
7
|
import dataclasses
|
8
|
-
import datetime
|
9
8
|
import inspect
|
10
9
|
import warnings
|
11
|
-
from
|
12
|
-
from typing import Any, Callable, Mapping, get_origin, TypeVar, overload
|
10
|
+
from typing import Any, Callable, Mapping, TypeVar
|
13
11
|
|
14
12
|
import numpy as np
|
15
|
-
|
16
13
|
from .typing import (
|
17
14
|
AnalyzedAnyType,
|
18
15
|
AnalyzedBasicType,
|
@@ -22,18 +19,17 @@ from .typing import (
|
|
22
19
|
AnalyzedTypeInfo,
|
23
20
|
AnalyzedUnionType,
|
24
21
|
AnalyzedUnknownType,
|
25
|
-
EnrichedValueType,
|
26
22
|
analyze_type_info,
|
27
|
-
encode_enriched_type,
|
28
23
|
is_namedtuple_type,
|
24
|
+
is_pydantic_model,
|
29
25
|
is_numpy_number_type,
|
30
|
-
extract_ndarray_elem_dtype,
|
31
26
|
ValueType,
|
32
27
|
FieldSchema,
|
33
28
|
BasicValueType,
|
34
29
|
StructType,
|
35
30
|
TableType,
|
36
31
|
)
|
32
|
+
from .engine_object import get_auto_default_for_type
|
37
33
|
|
38
34
|
|
39
35
|
T = TypeVar("T")
|
@@ -167,6 +163,29 @@ def make_engine_value_encoder(type_info: AnalyzedTypeInfo) -> Callable[[Any], An
|
|
167
163
|
|
168
164
|
return encode_namedtuple
|
169
165
|
|
166
|
+
elif is_pydantic_model(struct_type):
|
167
|
+
# Type guard: ensure we have model_fields attribute
|
168
|
+
if hasattr(struct_type, "model_fields"):
|
169
|
+
field_names = list(struct_type.model_fields.keys()) # type: ignore[attr-defined]
|
170
|
+
field_encoders = [
|
171
|
+
make_engine_value_encoder(
|
172
|
+
analyze_type_info(struct_type.model_fields[name].annotation) # type: ignore[attr-defined]
|
173
|
+
)
|
174
|
+
for name in field_names
|
175
|
+
]
|
176
|
+
else:
|
177
|
+
raise ValueError(f"Invalid Pydantic model: {struct_type}")
|
178
|
+
|
179
|
+
def encode_pydantic(value: Any) -> Any:
|
180
|
+
if value is None:
|
181
|
+
return None
|
182
|
+
return [
|
183
|
+
encoder(getattr(value, name))
|
184
|
+
for encoder, name in zip(field_encoders, field_names)
|
185
|
+
]
|
186
|
+
|
187
|
+
return encode_pydantic
|
188
|
+
|
170
189
|
def encode_basic_value(value: Any) -> Any:
|
171
190
|
if isinstance(value, np.number):
|
172
191
|
return value.item()
|
@@ -420,30 +439,6 @@ def make_engine_value_decoder(
|
|
420
439
|
return lambda value: value
|
421
440
|
|
422
441
|
|
423
|
-
def _get_auto_default_for_type(
|
424
|
-
type_info: AnalyzedTypeInfo,
|
425
|
-
) -> tuple[Any, bool]:
|
426
|
-
"""
|
427
|
-
Get an auto-default value for a type annotation if it's safe to do so.
|
428
|
-
|
429
|
-
Returns:
|
430
|
-
A tuple of (default_value, is_supported) where:
|
431
|
-
- default_value: The default value if auto-defaulting is supported
|
432
|
-
- is_supported: True if auto-defaulting is supported for this type
|
433
|
-
"""
|
434
|
-
# Case 1: Nullable types (Optional[T] or T | None)
|
435
|
-
if type_info.nullable:
|
436
|
-
return None, True
|
437
|
-
|
438
|
-
# Case 2: Table types (KTable or LTable) - check if it's a list or dict type
|
439
|
-
if isinstance(type_info.variant, AnalyzedListType):
|
440
|
-
return [], True
|
441
|
-
elif isinstance(type_info.variant, AnalyzedDictType):
|
442
|
-
return {}, True
|
443
|
-
|
444
|
-
return None, False
|
445
|
-
|
446
|
-
|
447
442
|
def make_engine_struct_decoder(
|
448
443
|
field_path: list[str],
|
449
444
|
src_fields: list[FieldSchema],
|
@@ -472,7 +467,7 @@ def make_engine_struct_decoder(
|
|
472
467
|
if not isinstance(dst_type_variant, AnalyzedStructType):
|
473
468
|
raise ValueError(
|
474
469
|
f"Type mismatch for `{''.join(field_path)}`: "
|
475
|
-
f"declared `{dst_type_info.core_type}`, a dataclass, NamedTuple or dict[str, Any] expected"
|
470
|
+
f"declared `{dst_type_info.core_type}`, a dataclass, NamedTuple, Pydantic model or dict[str, Any] expected"
|
476
471
|
)
|
477
472
|
|
478
473
|
src_name_to_idx = {f.name: i for i, f in enumerate(src_fields)}
|
@@ -495,6 +490,26 @@ def make_engine_struct_decoder(
|
|
495
490
|
)
|
496
491
|
for name in fields
|
497
492
|
}
|
493
|
+
elif is_pydantic_model(dst_struct_type):
|
494
|
+
# For Pydantic models, we can use model_fields to get field information
|
495
|
+
parameters = {}
|
496
|
+
# Type guard: ensure we have model_fields attribute
|
497
|
+
if hasattr(dst_struct_type, "model_fields"):
|
498
|
+
model_fields = dst_struct_type.model_fields # type: ignore[attr-defined]
|
499
|
+
else:
|
500
|
+
model_fields = {}
|
501
|
+
for name, field_info in model_fields.items():
|
502
|
+
default_value = (
|
503
|
+
field_info.default
|
504
|
+
if field_info.default is not ...
|
505
|
+
else inspect.Parameter.empty
|
506
|
+
)
|
507
|
+
parameters[name] = inspect.Parameter(
|
508
|
+
name=name,
|
509
|
+
kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
510
|
+
default=default_value,
|
511
|
+
annotation=field_info.annotation,
|
512
|
+
)
|
498
513
|
else:
|
499
514
|
raise ValueError(f"Unsupported struct type: {dst_struct_type}")
|
500
515
|
|
@@ -518,7 +533,7 @@ def make_engine_struct_decoder(
|
|
518
533
|
if default_value is not inspect.Parameter.empty:
|
519
534
|
return lambda _: default_value
|
520
535
|
|
521
|
-
auto_default, is_supported =
|
536
|
+
auto_default, is_supported = get_auto_default_for_type(type_info)
|
522
537
|
if is_supported:
|
523
538
|
warnings.warn(
|
524
539
|
f"Field '{name}' (type {param.annotation}) without default value is missing in input: "
|
@@ -536,9 +551,21 @@ def make_engine_struct_decoder(
|
|
536
551
|
make_closure_for_field(name, param) for (name, param) in parameters.items()
|
537
552
|
]
|
538
553
|
|
539
|
-
|
540
|
-
|
541
|
-
|
554
|
+
# Different construction for different struct types
|
555
|
+
if is_pydantic_model(dst_struct_type):
|
556
|
+
# Pydantic models prefer keyword arguments
|
557
|
+
field_names = list(parameters.keys())
|
558
|
+
return lambda values: dst_struct_type(
|
559
|
+
**{
|
560
|
+
field_names[i]: decoder(values)
|
561
|
+
for i, decoder in enumerate(field_value_decoder)
|
562
|
+
}
|
563
|
+
)
|
564
|
+
else:
|
565
|
+
# Dataclasses and NamedTuples can use positional arguments
|
566
|
+
return lambda values: dst_struct_type(
|
567
|
+
*(decoder(values) for decoder in field_value_decoder)
|
568
|
+
)
|
542
569
|
|
543
570
|
|
544
571
|
def _make_engine_struct_to_dict_decoder(
|
@@ -606,174 +633,3 @@ def _make_engine_struct_to_tuple_decoder(
|
|
606
633
|
)
|
607
634
|
|
608
635
|
return decode_to_tuple
|
609
|
-
|
610
|
-
|
611
|
-
def dump_engine_object(v: Any) -> Any:
|
612
|
-
"""Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
|
613
|
-
if v is None:
|
614
|
-
return None
|
615
|
-
elif isinstance(v, EnrichedValueType):
|
616
|
-
return v.encode()
|
617
|
-
elif isinstance(v, FieldSchema):
|
618
|
-
return v.encode()
|
619
|
-
elif isinstance(v, type) or get_origin(v) is not None:
|
620
|
-
return encode_enriched_type(v)
|
621
|
-
elif isinstance(v, Enum):
|
622
|
-
return v.value
|
623
|
-
elif isinstance(v, datetime.timedelta):
|
624
|
-
total_secs = v.total_seconds()
|
625
|
-
secs = int(total_secs)
|
626
|
-
nanos = int((total_secs - secs) * 1e9)
|
627
|
-
return {"secs": secs, "nanos": nanos}
|
628
|
-
elif is_namedtuple_type(type(v)):
|
629
|
-
# Handle NamedTuple objects specifically to use dict format
|
630
|
-
field_names = list(getattr(type(v), "_fields", ()))
|
631
|
-
result = {}
|
632
|
-
for name in field_names:
|
633
|
-
val = getattr(v, name)
|
634
|
-
result[name] = dump_engine_object(val) # Include all values, including None
|
635
|
-
if hasattr(v, "kind") and "kind" not in result:
|
636
|
-
result["kind"] = v.kind
|
637
|
-
return result
|
638
|
-
elif hasattr(v, "__dict__"): # for dataclass-like objects
|
639
|
-
s = {}
|
640
|
-
for k, val in v.__dict__.items():
|
641
|
-
if val is None:
|
642
|
-
# Skip None values
|
643
|
-
continue
|
644
|
-
s[k] = dump_engine_object(val)
|
645
|
-
if hasattr(v, "kind") and "kind" not in s:
|
646
|
-
s["kind"] = v.kind
|
647
|
-
return s
|
648
|
-
elif isinstance(v, (list, tuple)):
|
649
|
-
return [dump_engine_object(item) for item in v]
|
650
|
-
elif isinstance(v, np.ndarray):
|
651
|
-
return v.tolist()
|
652
|
-
elif isinstance(v, dict):
|
653
|
-
return {k: dump_engine_object(v) for k, v in v.items()}
|
654
|
-
return v
|
655
|
-
|
656
|
-
|
657
|
-
@overload
|
658
|
-
def load_engine_object(expected_type: type[T], v: Any) -> T: ...
|
659
|
-
@overload
|
660
|
-
def load_engine_object(expected_type: Any, v: Any) -> Any: ...
|
661
|
-
def load_engine_object(expected_type: Any, v: Any) -> Any:
|
662
|
-
"""Recursively load an object that was produced by dump_engine_object().
|
663
|
-
|
664
|
-
Args:
|
665
|
-
expected_type: The Python type annotation to reconstruct to.
|
666
|
-
v: The engine-facing Pythonized object (e.g., dict/list/primitive) to convert.
|
667
|
-
|
668
|
-
Returns:
|
669
|
-
A Python object matching the expected_type where possible.
|
670
|
-
"""
|
671
|
-
# Fast path
|
672
|
-
if v is None:
|
673
|
-
return None
|
674
|
-
|
675
|
-
type_info = analyze_type_info(expected_type)
|
676
|
-
variant = type_info.variant
|
677
|
-
|
678
|
-
if type_info.core_type is EnrichedValueType:
|
679
|
-
return EnrichedValueType.decode(v)
|
680
|
-
if type_info.core_type is FieldSchema:
|
681
|
-
return FieldSchema.decode(v)
|
682
|
-
|
683
|
-
# Any or unknown → return as-is
|
684
|
-
if isinstance(variant, AnalyzedAnyType) or type_info.base_type is Any:
|
685
|
-
return v
|
686
|
-
|
687
|
-
# Enum handling
|
688
|
-
if isinstance(expected_type, type) and issubclass(expected_type, Enum):
|
689
|
-
return expected_type(v)
|
690
|
-
|
691
|
-
# TimeDelta special form {secs, nanos}
|
692
|
-
if isinstance(variant, AnalyzedBasicType) and variant.kind == "TimeDelta":
|
693
|
-
if isinstance(v, Mapping) and "secs" in v and "nanos" in v:
|
694
|
-
secs = int(v["secs"]) # type: ignore[index]
|
695
|
-
nanos = int(v["nanos"]) # type: ignore[index]
|
696
|
-
return datetime.timedelta(seconds=secs, microseconds=nanos / 1_000)
|
697
|
-
return v
|
698
|
-
|
699
|
-
# List, NDArray (Vector-ish), or general sequences
|
700
|
-
if isinstance(variant, AnalyzedListType):
|
701
|
-
elem_type = variant.elem_type if variant.elem_type else Any
|
702
|
-
if type_info.base_type is np.ndarray:
|
703
|
-
# Reconstruct NDArray with appropriate dtype if available
|
704
|
-
try:
|
705
|
-
dtype = extract_ndarray_elem_dtype(type_info.core_type)
|
706
|
-
except (TypeError, ValueError, AttributeError):
|
707
|
-
dtype = None
|
708
|
-
return np.array(v, dtype=dtype)
|
709
|
-
# Regular Python list
|
710
|
-
return [load_engine_object(elem_type, item) for item in v]
|
711
|
-
|
712
|
-
# Dict / Mapping
|
713
|
-
if isinstance(variant, AnalyzedDictType):
|
714
|
-
key_t = variant.key_type
|
715
|
-
val_t = variant.value_type
|
716
|
-
return {
|
717
|
-
load_engine_object(key_t, k): load_engine_object(val_t, val)
|
718
|
-
for k, val in v.items()
|
719
|
-
}
|
720
|
-
|
721
|
-
# Structs (dataclass or NamedTuple)
|
722
|
-
if isinstance(variant, AnalyzedStructType):
|
723
|
-
struct_type = variant.struct_type
|
724
|
-
if dataclasses.is_dataclass(struct_type):
|
725
|
-
if not isinstance(v, Mapping):
|
726
|
-
raise ValueError(f"Expected dict for dataclass, got {type(v)}")
|
727
|
-
# Drop auxiliary discriminator "kind" if present
|
728
|
-
dc_init_kwargs: dict[str, Any] = {}
|
729
|
-
field_types = {f.name: f.type for f in dataclasses.fields(struct_type)}
|
730
|
-
for name, f_type in field_types.items():
|
731
|
-
if name in v:
|
732
|
-
dc_init_kwargs[name] = load_engine_object(f_type, v[name])
|
733
|
-
return struct_type(**dc_init_kwargs)
|
734
|
-
elif is_namedtuple_type(struct_type):
|
735
|
-
if not isinstance(v, Mapping):
|
736
|
-
raise ValueError(f"Expected dict for NamedTuple, got {type(v)}")
|
737
|
-
# Dict format (from dump/load functions)
|
738
|
-
annotations = getattr(struct_type, "__annotations__", {})
|
739
|
-
field_names = list(getattr(struct_type, "_fields", ()))
|
740
|
-
nt_init_kwargs: dict[str, Any] = {}
|
741
|
-
for name in field_names:
|
742
|
-
f_type = annotations.get(name, Any)
|
743
|
-
if name in v:
|
744
|
-
nt_init_kwargs[name] = load_engine_object(f_type, v[name])
|
745
|
-
return struct_type(**nt_init_kwargs)
|
746
|
-
return v
|
747
|
-
|
748
|
-
# Union with discriminator support via "kind"
|
749
|
-
if isinstance(variant, AnalyzedUnionType):
|
750
|
-
if isinstance(v, Mapping) and "kind" in v:
|
751
|
-
discriminator = v["kind"]
|
752
|
-
for typ in variant.variant_types:
|
753
|
-
t_info = analyze_type_info(typ)
|
754
|
-
if isinstance(t_info.variant, AnalyzedStructType):
|
755
|
-
t_struct = t_info.variant.struct_type
|
756
|
-
candidate_kind = getattr(t_struct, "kind", None)
|
757
|
-
if candidate_kind == discriminator:
|
758
|
-
# Remove discriminator for constructor
|
759
|
-
v_wo_kind = dict(v)
|
760
|
-
v_wo_kind.pop("kind", None)
|
761
|
-
return load_engine_object(t_struct, v_wo_kind)
|
762
|
-
# Fallback: try each variant until one succeeds
|
763
|
-
for typ in variant.variant_types:
|
764
|
-
try:
|
765
|
-
return load_engine_object(typ, v)
|
766
|
-
except (TypeError, ValueError):
|
767
|
-
continue
|
768
|
-
return v
|
769
|
-
|
770
|
-
# Basic types and everything else: handle numpy scalars and passthrough
|
771
|
-
if isinstance(v, np.ndarray) and type_info.base_type is list:
|
772
|
-
return v.tolist()
|
773
|
-
if isinstance(v, (list, tuple)) and type_info.base_type not in (list, tuple):
|
774
|
-
# If a non-sequence basic type expected, attempt direct cast
|
775
|
-
try:
|
776
|
-
return type_info.core_type(v)
|
777
|
-
except (TypeError, ValueError):
|
778
|
-
return v
|
779
|
-
return v
|
cocoindex/flow.py
CHANGED
@@ -31,8 +31,8 @@ from . import _engine # type: ignore
|
|
31
31
|
from . import index
|
32
32
|
from . import op
|
33
33
|
from . import setting
|
34
|
-
from .
|
35
|
-
|
34
|
+
from .engine_object import dump_engine_object
|
35
|
+
from .engine_value import (
|
36
36
|
make_engine_value_decoder,
|
37
37
|
make_engine_value_encoder,
|
38
38
|
)
|
@@ -405,6 +405,7 @@ class DataCollector:
|
|
405
405
|
/,
|
406
406
|
*,
|
407
407
|
primary_key_fields: Sequence[str],
|
408
|
+
attachments: Sequence[op.TargetAttachmentSpec] = (),
|
408
409
|
vector_indexes: Sequence[index.VectorIndexDef] = (),
|
409
410
|
vector_index: Sequence[tuple[str, index.VectorSimilarityMetric]] = (),
|
410
411
|
setup_by_user: bool = False,
|
@@ -436,6 +437,10 @@ class DataCollector:
|
|
436
437
|
target_name,
|
437
438
|
_spec_kind(target_spec),
|
438
439
|
dump_engine_object(target_spec),
|
440
|
+
[
|
441
|
+
{"kind": _spec_kind(att), **dump_engine_object(att)}
|
442
|
+
for att in attachments
|
443
|
+
],
|
439
444
|
dump_engine_object(index_options),
|
440
445
|
self._engine_data_collector,
|
441
446
|
setup_by_user,
|
@@ -0,0 +1,40 @@
|
|
1
|
+
"""Functions module for cocoindex.
|
2
|
+
|
3
|
+
This module provides various function specifications and executors for data processing,
|
4
|
+
including embedding functions, text processing, and multimodal operations.
|
5
|
+
"""
|
6
|
+
|
7
|
+
# Import all engine builtin function specs
|
8
|
+
from ._engine_builtin_specs import *
|
9
|
+
|
10
|
+
# Import SentenceTransformer embedding functionality
|
11
|
+
from .sbert import (
|
12
|
+
SentenceTransformerEmbed,
|
13
|
+
SentenceTransformerEmbedExecutor,
|
14
|
+
)
|
15
|
+
|
16
|
+
# Import ColPali multimodal embedding functionality
|
17
|
+
from .colpali import (
|
18
|
+
ColPaliEmbedImage,
|
19
|
+
ColPaliEmbedImageExecutor,
|
20
|
+
ColPaliEmbedQuery,
|
21
|
+
ColPaliEmbedQueryExecutor,
|
22
|
+
)
|
23
|
+
|
24
|
+
__all__ = [
|
25
|
+
# Engine builtin specs
|
26
|
+
"DetectProgrammingLanguage",
|
27
|
+
"EmbedText",
|
28
|
+
"ExtractByLlm",
|
29
|
+
"ParseJson",
|
30
|
+
"SplitBySeparators",
|
31
|
+
"SplitRecursively",
|
32
|
+
# SentenceTransformer
|
33
|
+
"SentenceTransformerEmbed",
|
34
|
+
"SentenceTransformerEmbedExecutor",
|
35
|
+
# ColPali
|
36
|
+
"ColPaliEmbedImage",
|
37
|
+
"ColPaliEmbedImageExecutor",
|
38
|
+
"ColPaliEmbedQuery",
|
39
|
+
"ColPaliEmbedQueryExecutor",
|
40
|
+
]
|
@@ -0,0 +1,66 @@
|
|
1
|
+
"""All builtin function specs."""
|
2
|
+
|
3
|
+
import dataclasses
|
4
|
+
from typing import Literal
|
5
|
+
|
6
|
+
from .. import llm, op
|
7
|
+
|
8
|
+
|
9
|
+
class ParseJson(op.FunctionSpec):
|
10
|
+
"""Parse a text into a JSON object."""
|
11
|
+
|
12
|
+
|
13
|
+
@dataclasses.dataclass
|
14
|
+
class CustomLanguageSpec:
|
15
|
+
"""Custom language specification."""
|
16
|
+
|
17
|
+
language_name: str
|
18
|
+
separators_regex: list[str]
|
19
|
+
aliases: list[str] = dataclasses.field(default_factory=list)
|
20
|
+
|
21
|
+
|
22
|
+
class DetectProgrammingLanguage(op.FunctionSpec):
|
23
|
+
"""Detect the programming language of a file."""
|
24
|
+
|
25
|
+
|
26
|
+
class SplitRecursively(op.FunctionSpec):
|
27
|
+
"""Split a document (in string) recursively."""
|
28
|
+
|
29
|
+
custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
|
30
|
+
|
31
|
+
|
32
|
+
class SplitBySeparators(op.FunctionSpec):
|
33
|
+
"""
|
34
|
+
Split text by specified regex separators only.
|
35
|
+
Output schema matches SplitRecursively for drop-in compatibility:
|
36
|
+
KTable rows with fields: location (Range), text (Str), start, end.
|
37
|
+
Args:
|
38
|
+
separators_regex: list[str] # e.g., [r"\\n\\n+"]
|
39
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
40
|
+
include_empty: bool = False
|
41
|
+
trim: bool = True
|
42
|
+
"""
|
43
|
+
|
44
|
+
separators_regex: list[str] = dataclasses.field(default_factory=list)
|
45
|
+
keep_separator: Literal["NONE", "LEFT", "RIGHT"] = "NONE"
|
46
|
+
include_empty: bool = False
|
47
|
+
trim: bool = True
|
48
|
+
|
49
|
+
|
50
|
+
class EmbedText(op.FunctionSpec):
|
51
|
+
"""Embed a text into a vector space."""
|
52
|
+
|
53
|
+
api_type: llm.LlmApiType
|
54
|
+
model: str
|
55
|
+
address: str | None = None
|
56
|
+
output_dimension: int | None = None
|
57
|
+
task_type: str | None = None
|
58
|
+
api_config: llm.VertexAiConfig | None = None
|
59
|
+
|
60
|
+
|
61
|
+
class ExtractByLlm(op.FunctionSpec):
|
62
|
+
"""Extract information from a text using a LLM."""
|
63
|
+
|
64
|
+
llm_spec: llm.LlmSpec
|
65
|
+
output_type: type
|
66
|
+
instruction: str | None = None
|