cocoindex 0.3.4__cp311-abi3-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. cocoindex/__init__.py +114 -0
  2. cocoindex/_engine.abi3.so +0 -0
  3. cocoindex/auth_registry.py +44 -0
  4. cocoindex/cli.py +830 -0
  5. cocoindex/engine_object.py +214 -0
  6. cocoindex/engine_value.py +550 -0
  7. cocoindex/flow.py +1281 -0
  8. cocoindex/functions/__init__.py +40 -0
  9. cocoindex/functions/_engine_builtin_specs.py +66 -0
  10. cocoindex/functions/colpali.py +247 -0
  11. cocoindex/functions/sbert.py +77 -0
  12. cocoindex/index.py +50 -0
  13. cocoindex/lib.py +75 -0
  14. cocoindex/llm.py +47 -0
  15. cocoindex/op.py +1047 -0
  16. cocoindex/py.typed +0 -0
  17. cocoindex/query_handler.py +57 -0
  18. cocoindex/runtime.py +78 -0
  19. cocoindex/setting.py +171 -0
  20. cocoindex/setup.py +92 -0
  21. cocoindex/sources/__init__.py +5 -0
  22. cocoindex/sources/_engine_builtin_specs.py +120 -0
  23. cocoindex/subprocess_exec.py +277 -0
  24. cocoindex/targets/__init__.py +5 -0
  25. cocoindex/targets/_engine_builtin_specs.py +153 -0
  26. cocoindex/targets/lancedb.py +466 -0
  27. cocoindex/tests/__init__.py +0 -0
  28. cocoindex/tests/test_engine_object.py +331 -0
  29. cocoindex/tests/test_engine_value.py +1724 -0
  30. cocoindex/tests/test_optional_database.py +249 -0
  31. cocoindex/tests/test_transform_flow.py +300 -0
  32. cocoindex/tests/test_typing.py +553 -0
  33. cocoindex/tests/test_validation.py +134 -0
  34. cocoindex/typing.py +834 -0
  35. cocoindex/user_app_loader.py +53 -0
  36. cocoindex/utils.py +20 -0
  37. cocoindex/validation.py +104 -0
  38. cocoindex-0.3.4.dist-info/METADATA +288 -0
  39. cocoindex-0.3.4.dist-info/RECORD +42 -0
  40. cocoindex-0.3.4.dist-info/WHEEL +4 -0
  41. cocoindex-0.3.4.dist-info/entry_points.txt +2 -0
  42. cocoindex-0.3.4.dist-info/licenses/THIRD_PARTY_NOTICES.html +13249 -0
@@ -0,0 +1,214 @@
1
+ """
2
+ Utilities to dump/load objects (for configs, specs).
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime
8
+ from enum import Enum
9
+ from typing import Any, Mapping, TypeVar, overload, get_origin
10
+
11
+ import numpy as np
12
+
13
+ from .typing import (
14
+ AnalyzedAnyType,
15
+ AnalyzedBasicType,
16
+ AnalyzedDictType,
17
+ AnalyzedListType,
18
+ AnalyzedStructType,
19
+ AnalyzedTypeInfo,
20
+ AnalyzedUnionType,
21
+ EnrichedValueType,
22
+ FieldSchema,
23
+ analyze_type_info,
24
+ encode_enriched_type,
25
+ is_namedtuple_type,
26
+ extract_ndarray_elem_dtype,
27
+ )
28
+
29
+
30
+ T = TypeVar("T")
31
+
32
+
33
+ def get_auto_default_for_type(
34
+ type_info: AnalyzedTypeInfo,
35
+ ) -> tuple[Any, bool]:
36
+ """
37
+ Get an auto-default value for a type annotation if it's safe to do so.
38
+
39
+ Returns:
40
+ A tuple of (default_value, is_supported) where:
41
+ - default_value: The default value if auto-defaulting is supported
42
+ - is_supported: True if auto-defaulting is supported for this type
43
+ """
44
+ # Case 1: Nullable types (Optional[T] or T | None)
45
+ if type_info.nullable:
46
+ return None, True
47
+
48
+ # Case 2: Table types (KTable or LTable) - check if it's a list or dict type
49
+ if isinstance(type_info.variant, AnalyzedListType):
50
+ return [], True
51
+ elif isinstance(type_info.variant, AnalyzedDictType):
52
+ return {}, True
53
+
54
+ return None, False
55
+
56
+
57
+ def dump_engine_object(v: Any) -> Any:
58
+ """Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
59
+ if v is None:
60
+ return None
61
+ elif isinstance(v, EnrichedValueType):
62
+ return v.encode()
63
+ elif isinstance(v, FieldSchema):
64
+ return v.encode()
65
+ elif isinstance(v, type) or get_origin(v) is not None:
66
+ return encode_enriched_type(v)
67
+ elif isinstance(v, Enum):
68
+ return v.value
69
+ elif isinstance(v, datetime.timedelta):
70
+ total_secs = v.total_seconds()
71
+ secs = int(total_secs)
72
+ nanos = int((total_secs - secs) * 1e9)
73
+ return {"secs": secs, "nanos": nanos}
74
+ elif is_namedtuple_type(type(v)):
75
+ # Handle NamedTuple objects specifically to use dict format
76
+ field_names = list(getattr(type(v), "_fields", ()))
77
+ result = {}
78
+ for name in field_names:
79
+ val = getattr(v, name)
80
+ result[name] = dump_engine_object(val) # Include all values, including None
81
+ if hasattr(v, "kind") and "kind" not in result:
82
+ result["kind"] = v.kind
83
+ return result
84
+ elif hasattr(v, "__dict__"): # for dataclass-like objects
85
+ s = {}
86
+ for k, val in v.__dict__.items():
87
+ if val is None:
88
+ # Skip None values
89
+ continue
90
+ s[k] = dump_engine_object(val)
91
+ if hasattr(v, "kind") and "kind" not in s:
92
+ s["kind"] = v.kind
93
+ return s
94
+ elif isinstance(v, (list, tuple)):
95
+ return [dump_engine_object(item) for item in v]
96
+ elif isinstance(v, np.ndarray):
97
+ return v.tolist()
98
+ elif isinstance(v, dict):
99
+ return {k: dump_engine_object(v) for k, v in v.items()}
100
+ return v
101
+
102
+
103
+ @overload
104
+ def load_engine_object(expected_type: type[T], v: Any) -> T: ...
105
+ @overload
106
+ def load_engine_object(expected_type: Any, v: Any) -> Any: ...
107
+ def load_engine_object(expected_type: Any, v: Any) -> Any:
108
+ """Recursively load an object that was produced by dump_engine_object().
109
+
110
+ Args:
111
+ expected_type: The Python type annotation to reconstruct to.
112
+ v: The engine-facing Pythonized object (e.g., dict/list/primitive) to convert.
113
+
114
+ Returns:
115
+ A Python object matching the expected_type where possible.
116
+ """
117
+ # Fast path
118
+ if v is None:
119
+ return None
120
+
121
+ type_info = analyze_type_info(expected_type)
122
+ variant = type_info.variant
123
+
124
+ if type_info.core_type is EnrichedValueType:
125
+ return EnrichedValueType.decode(v)
126
+ if type_info.core_type is FieldSchema:
127
+ return FieldSchema.decode(v)
128
+
129
+ # Any or unknown → return as-is
130
+ if isinstance(variant, AnalyzedAnyType) or type_info.base_type is Any:
131
+ return v
132
+
133
+ # Enum handling
134
+ if isinstance(expected_type, type) and issubclass(expected_type, Enum):
135
+ return expected_type(v)
136
+
137
+ # TimeDelta special form {secs, nanos}
138
+ if isinstance(variant, AnalyzedBasicType) and variant.kind == "TimeDelta":
139
+ if isinstance(v, Mapping) and "secs" in v and "nanos" in v:
140
+ secs = int(v["secs"]) # type: ignore[index]
141
+ nanos = int(v["nanos"]) # type: ignore[index]
142
+ return datetime.timedelta(seconds=secs, microseconds=nanos / 1_000)
143
+ return v
144
+
145
+ # List, NDArray (Vector-ish), or general sequences
146
+ if isinstance(variant, AnalyzedListType):
147
+ elem_type = variant.elem_type if variant.elem_type else Any
148
+ if type_info.base_type is np.ndarray:
149
+ # Reconstruct NDArray with appropriate dtype if available
150
+ try:
151
+ dtype = extract_ndarray_elem_dtype(type_info.core_type)
152
+ except (TypeError, ValueError, AttributeError):
153
+ dtype = None
154
+ return np.array(v, dtype=dtype)
155
+ # Regular Python list
156
+ return [load_engine_object(elem_type, item) for item in v]
157
+
158
+ # Dict / Mapping
159
+ if isinstance(variant, AnalyzedDictType):
160
+ key_t = variant.key_type
161
+ val_t = variant.value_type
162
+ return {
163
+ load_engine_object(key_t, k): load_engine_object(val_t, val)
164
+ for k, val in v.items()
165
+ }
166
+
167
+ # Structs (dataclass, NamedTuple, or Pydantic)
168
+ if isinstance(variant, AnalyzedStructType):
169
+ struct_type = variant.struct_type
170
+ init_kwargs: dict[str, Any] = {}
171
+ for field_info in variant.fields:
172
+ if field_info.name in v:
173
+ init_kwargs[field_info.name] = load_engine_object(
174
+ field_info.type_hint, v[field_info.name]
175
+ )
176
+ else:
177
+ type_info = analyze_type_info(field_info.type_hint)
178
+ auto_default, is_supported = get_auto_default_for_type(type_info)
179
+ if is_supported:
180
+ init_kwargs[field_info.name] = auto_default
181
+ return struct_type(**init_kwargs)
182
+
183
+ # Union with discriminator support via "kind"
184
+ if isinstance(variant, AnalyzedUnionType):
185
+ if isinstance(v, Mapping) and "kind" in v:
186
+ discriminator = v["kind"]
187
+ for typ in variant.variant_types:
188
+ t_info = analyze_type_info(typ)
189
+ if isinstance(t_info.variant, AnalyzedStructType):
190
+ t_struct = t_info.variant.struct_type
191
+ candidate_kind = getattr(t_struct, "kind", None)
192
+ if candidate_kind == discriminator:
193
+ # Remove discriminator for constructor
194
+ v_wo_kind = dict(v)
195
+ v_wo_kind.pop("kind", None)
196
+ return load_engine_object(t_struct, v_wo_kind)
197
+ # Fallback: try each variant until one succeeds
198
+ for typ in variant.variant_types:
199
+ try:
200
+ return load_engine_object(typ, v)
201
+ except (TypeError, ValueError):
202
+ continue
203
+ return v
204
+
205
+ # Basic types and everything else: handle numpy scalars and passthrough
206
+ if isinstance(v, np.ndarray) and type_info.base_type is list:
207
+ return v.tolist()
208
+ if isinstance(v, (list, tuple)) and type_info.base_type not in (list, tuple):
209
+ # If a non-sequence basic type expected, attempt direct cast
210
+ try:
211
+ return type_info.core_type(v)
212
+ except (TypeError, ValueError):
213
+ return v
214
+ return v