cocoindex 0.1.52__cp313-cp313-macosx_10_12_x86_64.whl → 0.1.54__cp313-cp313-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/cli.py CHANGED
@@ -1,19 +1,19 @@
1
- import click
1
+ import atexit
2
2
  import datetime
3
- import sys
4
3
  import importlib.util
5
4
  import os
6
- import atexit
5
+ import sys
7
6
  import types
7
+ from typing import Any
8
8
 
9
- from dotenv import load_dotenv, find_dotenv
9
+ import click
10
+ from dotenv import find_dotenv, load_dotenv
10
11
  from rich.console import Console
11
12
  from rich.panel import Panel
12
13
  from rich.table import Table
13
- from typing import Any
14
14
 
15
15
  from . import flow, lib, setting
16
- from .setup import sync_setup, drop_setup, flow_names_with_setup, apply_setup_changes
16
+ from .setup import apply_setup_changes, drop_setup, flow_names_with_setup, sync_setup
17
17
 
18
18
  # Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
19
19
  COCOINDEX_HOST = "https://cocoindex.io"
cocoindex/convert.py CHANGED
@@ -6,17 +6,20 @@ import dataclasses
6
6
  import datetime
7
7
  import inspect
8
8
  import uuid
9
+ from enum import Enum
10
+ from typing import Any, Callable, Mapping, get_origin
11
+
9
12
  import numpy as np
10
13
 
11
- from enum import Enum
12
- from typing import Any, Callable, get_origin, Mapping
13
14
  from .typing import (
15
+ KEY_FIELD_NAME,
16
+ TABLE_TYPES,
17
+ AnalyzedTypeInfo,
18
+ DtypeRegistry,
14
19
  analyze_type_info,
15
20
  encode_enriched_type,
21
+ extract_ndarray_scalar_dtype,
16
22
  is_namedtuple_type,
17
- TABLE_TYPES,
18
- KEY_FIELD_NAME,
19
- DtypeRegistry,
20
23
  )
21
24
 
22
25
 
@@ -29,6 +32,8 @@ def encode_engine_value(value: Any) -> Any:
29
32
  ]
30
33
  if is_namedtuple_type(type(value)):
31
34
  return [encode_engine_value(getattr(value, name)) for name in value._fields]
35
+ if isinstance(value, np.number):
36
+ return value.item()
32
37
  if isinstance(value, np.ndarray):
33
38
  return value
34
39
  if isinstance(value, (list, tuple)):
@@ -42,6 +47,19 @@ def encode_engine_value(value: Any) -> Any:
42
47
  return value
43
48
 
44
49
 
50
+ _CONVERTIBLE_KINDS = {
51
+ ("Float32", "Float64"),
52
+ ("LocalDateTime", "OffsetDateTime"),
53
+ }
54
+
55
+
56
+ def _is_type_kind_convertible_to(src_type_kind: str, dst_type_kind: str) -> bool:
57
+ return (
58
+ src_type_kind == dst_type_kind
59
+ or (src_type_kind, dst_type_kind) in _CONVERTIBLE_KINDS
60
+ )
61
+
62
+
45
63
  def make_engine_value_decoder(
46
64
  field_path: list[str],
47
65
  src_type: dict[str, Any],
@@ -61,11 +79,23 @@ def make_engine_value_decoder(
61
79
 
62
80
  src_type_kind = src_type["kind"]
63
81
 
82
+ dst_type_info: AnalyzedTypeInfo | None = None
64
83
  if (
65
- dst_annotation is None
66
- or dst_annotation is inspect.Parameter.empty
67
- or dst_annotation is Any
84
+ dst_annotation is not None
85
+ and dst_annotation is not inspect.Parameter.empty
86
+ and dst_annotation is not Any
68
87
  ):
88
+ dst_type_info = analyze_type_info(dst_annotation)
89
+ if not _is_type_kind_convertible_to(src_type_kind, dst_type_info.kind):
90
+ raise ValueError(
91
+ f"Type mismatch for `{''.join(field_path)}`: "
92
+ f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})"
93
+ )
94
+
95
+ if src_type_kind == "Uuid":
96
+ return lambda value: uuid.UUID(bytes=value)
97
+
98
+ if dst_type_info is None:
69
99
  if src_type_kind == "Struct" or src_type_kind in TABLE_TYPES:
70
100
  raise ValueError(
71
101
  f"Missing type annotation for `{''.join(field_path)}`."
@@ -73,14 +103,62 @@ def make_engine_value_decoder(
73
103
  )
74
104
  return lambda value: value
75
105
 
76
- dst_type_info = analyze_type_info(dst_annotation)
106
+ if dst_type_info.kind in ("Float32", "Float64", "Int64"):
107
+ dst_core_type = dst_type_info.core_type
108
+
109
+ def decode_scalar(value: Any) -> Any | None:
110
+ if value is None:
111
+ if dst_type_info.nullable:
112
+ return None
113
+ raise ValueError(
114
+ f"Received null for non-nullable scalar `{''.join(field_path)}`"
115
+ )
116
+ return dst_core_type(value)
117
+
118
+ return decode_scalar
77
119
 
78
- if src_type_kind != dst_type_info.kind:
79
- raise ValueError(
80
- f"Type mismatch for `{''.join(field_path)}`: "
81
- f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})"
120
+ if src_type_kind == "Vector":
121
+ field_path_str = "".join(field_path)
122
+ expected_dim = (
123
+ dst_type_info.vector_info.dim if dst_type_info.vector_info else None
82
124
  )
83
125
 
126
+ elem_decoder = None
127
+ scalar_dtype = None
128
+ if dst_type_info.np_number_type is None: # for Non-NDArray vector
129
+ elem_decoder = make_engine_value_decoder(
130
+ field_path + ["[*]"],
131
+ src_type["element_type"],
132
+ dst_type_info.elem_type,
133
+ )
134
+ else: # for NDArray vector
135
+ scalar_dtype = extract_ndarray_scalar_dtype(dst_type_info.np_number_type)
136
+ _ = DtypeRegistry.validate_dtype_and_get_kind(scalar_dtype)
137
+
138
+ def decode_vector(value: Any) -> Any | None:
139
+ if value is None:
140
+ if dst_type_info.nullable:
141
+ return None
142
+ raise ValueError(
143
+ f"Received null for non-nullable vector `{field_path_str}`"
144
+ )
145
+ if not isinstance(value, (np.ndarray, list)):
146
+ raise TypeError(
147
+ f"Expected NDArray or list for vector `{field_path_str}`, got {type(value)}"
148
+ )
149
+ if expected_dim is not None and len(value) != expected_dim:
150
+ raise ValueError(
151
+ f"Vector dimension mismatch for `{field_path_str}`: "
152
+ f"expected {expected_dim}, got {len(value)}"
153
+ )
154
+
155
+ if elem_decoder is not None: # for Non-NDArray vector
156
+ return [elem_decoder(v) for v in value]
157
+ else: # for NDArray vector
158
+ return np.array(value, dtype=scalar_dtype)
159
+
160
+ return decode_vector
161
+
84
162
  if dst_type_info.struct_type is not None:
85
163
  return _make_engine_struct_value_decoder(
86
164
  field_path, src_type["fields"], dst_type_info.struct_type
@@ -123,39 +201,8 @@ def make_engine_value_decoder(
123
201
  field_path.pop()
124
202
  return decode
125
203
 
126
- if src_type_kind == "Uuid":
127
- return lambda value: uuid.UUID(bytes=value)
128
-
129
- if src_type_kind == "Vector":
130
- dtype_info = DtypeRegistry.get_by_dtype(dst_type_info.np_number_type)
131
-
132
- def decode_vector(value: Any) -> Any | None:
133
- if value is None:
134
- if dst_type_info.nullable:
135
- return None
136
- raise ValueError(
137
- f"Received null for non-nullable vector `{''.join(field_path)}`"
138
- )
139
-
140
- if not isinstance(value, (np.ndarray, list)):
141
- raise TypeError(
142
- f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
143
- )
144
- expected_dim = (
145
- dst_type_info.vector_info.dim if dst_type_info.vector_info else None
146
- )
147
- if expected_dim is not None and len(value) != expected_dim:
148
- raise ValueError(
149
- f"Vector dimension mismatch for `{''.join(field_path)}`: "
150
- f"expected {expected_dim}, got {len(value)}"
151
- )
152
-
153
- # Use NDArray for supported numeric dtypes, else return list
154
- if dtype_info is not None:
155
- return np.array(value, dtype=dtype_info.numpy_dtype)
156
- return value
157
-
158
- return decode_vector
204
+ if src_type_kind == "Union":
205
+ return lambda value: value[1]
159
206
 
160
207
  return lambda value: value
161
208
 
cocoindex/flow.py CHANGED
@@ -92,6 +92,7 @@ def _spec_kind(spec: Any) -> str:
92
92
 
93
93
 
94
94
  T = TypeVar("T")
95
+ S = TypeVar("S")
95
96
 
96
97
 
97
98
  class _DataSliceState:
@@ -185,7 +186,7 @@ class DataSlice(Generic[T]):
185
186
 
186
187
  def transform(
187
188
  self, fn_spec: op.FunctionSpec, *args: Any, **kwargs: Any
188
- ) -> DataSlice[T]:
189
+ ) -> DataSlice[Any]:
189
190
  """
190
191
  Apply a function to the data slice.
191
192
  """
@@ -216,7 +217,7 @@ class DataSlice(Generic[T]):
216
217
  ),
217
218
  )
218
219
 
219
- def call(self, func: Callable[[DataSlice[T]], T], *args: Any, **kwargs: Any) -> T:
220
+ def call(self, func: Callable[..., S], *args: Any, **kwargs: Any) -> S:
220
221
  """
221
222
  Call a function with the data slice.
222
223
  """
cocoindex/functions.py CHANGED
@@ -32,6 +32,16 @@ class SplitRecursively(op.FunctionSpec):
32
32
  custom_languages: list[CustomLanguageSpec] = dataclasses.field(default_factory=list)
33
33
 
34
34
 
35
+ class EmbedText(op.FunctionSpec):
36
+ """Embed a text into a vector space."""
37
+
38
+ api_type: llm.LlmApiType
39
+ model: str
40
+ address: str | None = None
41
+ output_dimension: int | None = None
42
+ task_type: str | None = None
43
+
44
+
35
45
  class ExtractByLlm(op.FunctionSpec):
36
46
  """Extract information from a text using a LLM."""
37
47
 
cocoindex/llm.py CHANGED
@@ -9,6 +9,9 @@ class LlmApiType(Enum):
9
9
  OLLAMA = "Ollama"
10
10
  GEMINI = "Gemini"
11
11
  ANTHROPIC = "Anthropic"
12
+ LITE_LLM = "LiteLlm"
13
+ OPEN_ROUTER = "OpenRouter"
14
+ VOYAGE = "Voyage"
12
15
 
13
16
 
14
17
  @dataclass
@@ -1 +0,0 @@
1
-