cocoindex 0.1.49__cp311-cp311-win_amd64.whl → 0.1.51__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
4
 
5
- from . import functions, query, sources, storages, cli, utils
5
+ from . import functions, sources, storages, cli, utils
6
6
 
7
7
  from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
8
8
  from .flow import FlowBuilder, DataScope, DataSlice, Flow, transform_flow
@@ -15,3 +15,54 @@ from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
15
15
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
16
16
  from .setting import get_app_namespace
17
17
  from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
18
+
19
+ __all__ = [
20
+ # Submodules
21
+ "_engine",
22
+ "functions",
23
+ "sources",
24
+ "storages",
25
+ "cli",
26
+ "utils",
27
+ # Auth registry
28
+ "AuthEntryReference",
29
+ "add_auth_entry",
30
+ "ref_auth_entry",
31
+ # Flow
32
+ "FlowBuilder",
33
+ "DataScope",
34
+ "DataSlice",
35
+ "Flow",
36
+ "transform_flow",
37
+ "flow_def",
38
+ "EvaluateAndDumpOptions",
39
+ "GeneratedField",
40
+ "update_all_flows_async",
41
+ "FlowLiveUpdater",
42
+ "FlowLiveUpdaterOptions",
43
+ # Lib
44
+ "init",
45
+ "start_server",
46
+ "stop",
47
+ "main_fn",
48
+ # LLM
49
+ "LlmSpec",
50
+ "LlmApiType",
51
+ # Index
52
+ "VectorSimilarityMetric",
53
+ "VectorIndexDef",
54
+ "IndexOptions",
55
+ # Settings
56
+ "DatabaseConnectionSpec",
57
+ "Settings",
58
+ "ServerSettings",
59
+ "get_app_namespace",
60
+ # Typing
61
+ "Float32",
62
+ "Float64",
63
+ "LocalDateTime",
64
+ "OffsetDateTime",
65
+ "Range",
66
+ "Vector",
67
+ "Json",
68
+ ]
Binary file
cocoindex/cli.py CHANGED
@@ -237,7 +237,15 @@ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
237
237
 
238
238
  @cli.command()
239
239
  @click.argument("app_target", type=str)
240
- def setup(app_target: str) -> None:
240
+ @click.option(
241
+ "-f",
242
+ "--force",
243
+ is_flag=True,
244
+ show_default=True,
245
+ default=False,
246
+ help="Force setup without confirmation prompts.",
247
+ )
248
+ def setup(app_target: str, force: bool) -> None:
241
249
  """
242
250
  Check and apply backend setup changes for flows, including the internal and target storage
243
251
  (to export).
@@ -252,7 +260,7 @@ def setup(app_target: str) -> None:
252
260
  if setup_status.is_up_to_date():
253
261
  click.echo("No changes need to be pushed.")
254
262
  return
255
- if not click.confirm(
263
+ if not force and not click.confirm(
256
264
  "Changes need to be pushed. Continue? [yes/N]",
257
265
  default=False,
258
266
  show_default=False,
@@ -275,7 +283,17 @@ def setup(app_target: str) -> None:
275
283
  "even if not defined in the current process."
276
284
  "If used, APP_TARGET and any listed flow names are ignored.",
277
285
  )
278
- def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool) -> None:
286
+ @click.option(
287
+ "-f",
288
+ "--force",
289
+ is_flag=True,
290
+ show_default=True,
291
+ default=False,
292
+ help="Force drop without confirmation prompts.",
293
+ )
294
+ def drop(
295
+ app_target: str | None, flow_name: tuple[str, ...], drop_all: bool, force: bool
296
+ ) -> None:
279
297
  """
280
298
  Drop the backend setup for flows.
281
299
 
@@ -328,7 +346,7 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool) ->
328
346
  if setup_status.is_up_to_date():
329
347
  click.echo("No flows need to be dropped.")
330
348
  return
331
- if not click.confirm(
349
+ if not force and not click.confirm(
332
350
  f"\nThis will apply changes to drop setup for: {', '.join(flow_names)}. Continue? [yes/N]",
333
351
  default=False,
334
352
  show_default=False,
cocoindex/convert.py CHANGED
@@ -6,6 +6,7 @@ import dataclasses
6
6
  import datetime
7
7
  import inspect
8
8
  import uuid
9
+ import numpy as np
9
10
 
10
11
  from enum import Enum
11
12
  from typing import Any, Callable, get_origin, Mapping
@@ -15,6 +16,7 @@ from .typing import (
15
16
  is_namedtuple_type,
16
17
  TABLE_TYPES,
17
18
  KEY_FIELD_NAME,
19
+ DtypeRegistry,
18
20
  )
19
21
 
20
22
 
@@ -27,6 +29,8 @@ def encode_engine_value(value: Any) -> Any:
27
29
  ]
28
30
  if is_namedtuple_type(type(value)):
29
31
  return [encode_engine_value(getattr(value, name)) for name in value._fields]
32
+ if isinstance(value, np.ndarray):
33
+ return value
30
34
  if isinstance(value, (list, tuple)):
31
35
  return [encode_engine_value(v) for v in value]
32
36
  if isinstance(value, dict):
@@ -122,6 +126,37 @@ def make_engine_value_decoder(
122
126
  if src_type_kind == "Uuid":
123
127
  return lambda value: uuid.UUID(bytes=value)
124
128
 
129
+ if src_type_kind == "Vector":
130
+ dtype_info = DtypeRegistry.get_by_dtype(dst_type_info.np_number_type)
131
+
132
+ def decode_vector(value: Any) -> Any | None:
133
+ if value is None:
134
+ if dst_type_info.nullable:
135
+ return None
136
+ raise ValueError(
137
+ f"Received null for non-nullable vector `{''.join(field_path)}`"
138
+ )
139
+
140
+ if not isinstance(value, (np.ndarray, list)):
141
+ raise TypeError(
142
+ f"Expected NDArray or list for vector `{''.join(field_path)}`, got {type(value)}"
143
+ )
144
+ expected_dim = (
145
+ dst_type_info.vector_info.dim if dst_type_info.vector_info else None
146
+ )
147
+ if expected_dim is not None and len(value) != expected_dim:
148
+ raise ValueError(
149
+ f"Vector dimension mismatch for `{''.join(field_path)}`: "
150
+ f"expected {expected_dim}, got {len(value)}"
151
+ )
152
+
153
+ # Use NDArray for supported numeric dtypes, else return list
154
+ if dtype_info is not None:
155
+ return np.array(value, dtype=dtype_info.numpy_dtype)
156
+ return value
157
+
158
+ return decode_vector
159
+
125
160
  return lambda value: value
126
161
 
127
162
 
@@ -201,7 +236,12 @@ def dump_engine_object(v: Any) -> Any:
201
236
  nanos = int((total_secs - secs) * 1e9)
202
237
  return {"secs": secs, "nanos": nanos}
203
238
  elif hasattr(v, "__dict__"):
204
- s = {k: dump_engine_object(v) for k, v in v.__dict__.items()}
239
+ s = {}
240
+ for k, val in v.__dict__.items():
241
+ if val is None:
242
+ # Skip None values
243
+ continue
244
+ s[k] = dump_engine_object(val)
205
245
  if hasattr(v, "kind") and "kind" not in s:
206
246
  s["kind"] = v.kind
207
247
  return s
cocoindex/functions.py CHANGED
@@ -1,6 +1,8 @@
1
1
  """All builtin functions."""
2
2
 
3
- from typing import Annotated, Any, TYPE_CHECKING
3
+ from typing import Annotated, Any, TYPE_CHECKING, Literal
4
+ import numpy as np
5
+ from numpy.typing import NDArray
4
6
  import dataclasses
5
7
 
6
8
  from .typing import Float32, Vector, TypeAttr
@@ -66,11 +68,11 @@ class SentenceTransformerEmbedExecutor:
66
68
  self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
67
69
  dim = self._model.get_sentence_embedding_dimension()
68
70
  result: type = Annotated[
69
- Vector[Float32, dim], # type: ignore
71
+ Vector[np.float32, Literal[dim]], # type: ignore
70
72
  TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value),
71
73
  ]
72
74
  return result
73
75
 
74
- def __call__(self, text: str) -> list[Float32]:
75
- result: list[Float32] = self._model.encode(text).tolist()
76
+ def __call__(self, text: str) -> NDArray[np.float32]:
77
+ result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
76
78
  return result
cocoindex/lib.py CHANGED
@@ -6,7 +6,7 @@ import warnings
6
6
  from typing import Callable, Any
7
7
 
8
8
  from . import _engine # type: ignore
9
- from . import flow, query, setting
9
+ from . import flow, setting
10
10
  from .convert import dump_engine_object
11
11
 
12
12
 
@@ -24,7 +24,6 @@ def init(settings: setting.Settings | None = None) -> None:
24
24
  def start_server(settings: setting.ServerSettings) -> None:
25
25
  """Start the cocoindex server."""
26
26
  flow.ensure_all_flows_built()
27
- query.ensure_all_handlers_built()
28
27
  _engine.start_server(settings.__dict__)
29
28
 
30
29
 
cocoindex/setting.py CHANGED
@@ -62,18 +62,22 @@ def _load_field(
62
62
  class Settings:
63
63
  """Settings for the cocoindex library."""
64
64
 
65
- database: DatabaseConnectionSpec
65
+ database: DatabaseConnectionSpec | None = None
66
66
  app_namespace: str = ""
67
67
 
68
68
  @classmethod
69
69
  def from_env(cls) -> Self:
70
70
  """Load settings from environment variables."""
71
71
 
72
- db_kwargs: dict[str, str] = dict()
73
- _load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
74
- _load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
75
- _load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
76
- database = DatabaseConnectionSpec(**db_kwargs)
72
+ database_url = os.getenv("COCOINDEX_DATABASE_URL")
73
+ if database_url is not None:
74
+ db_kwargs: dict[str, str] = dict()
75
+ _load_field(db_kwargs, "url", "COCOINDEX_DATABASE_URL", required=True)
76
+ _load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
77
+ _load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
78
+ database = DatabaseConnectionSpec(**db_kwargs)
79
+ else:
80
+ database = None
77
81
 
78
82
  app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", "")
79
83