cocoindex 0.1.70__cp311-cp311-win_amd64.whl → 0.1.72__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/__init__.py CHANGED
@@ -38,6 +38,7 @@ __all__ = [
38
38
  "targets",
39
39
  "storages",
40
40
  "cli",
41
+ "op",
41
42
  "utils",
42
43
  # Auth registry
43
44
  "AuthEntryReference",
Binary file
cocoindex/convert.py CHANGED
@@ -92,10 +92,14 @@ def make_engine_value_decoder(
92
92
  if src_type_kind == "Struct":
93
93
  return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"])
94
94
  if src_type_kind in TABLE_TYPES:
95
- raise ValueError(
96
- f"Missing type annotation for `{''.join(field_path)}`."
97
- f"It's required for {src_type_kind} type."
98
- )
95
+ if src_type_kind == "LTable":
96
+ return _make_engine_ltable_to_list_dict_decoder(
97
+ field_path, src_type["row"]["fields"]
98
+ )
99
+ elif src_type_kind == "KTable":
100
+ return _make_engine_ktable_to_dict_dict_decoder(
101
+ field_path, src_type["row"]["fields"]
102
+ )
99
103
  return lambda value: value
100
104
 
101
105
  # Handle struct -> dict binding for explicit dict annotations
@@ -340,6 +344,77 @@ def _make_engine_struct_to_dict_decoder(
340
344
  return decode_to_dict
341
345
 
342
346
 
347
+ def _make_engine_ltable_to_list_dict_decoder(
348
+ field_path: list[str],
349
+ src_fields: list[dict[str, Any]],
350
+ ) -> Callable[[list[Any] | None], list[dict[str, Any]] | None]:
351
+ """Make a decoder from engine LTable values to a list of dicts."""
352
+
353
+ # Create a decoder for each row (struct) to dict
354
+ row_decoder = _make_engine_struct_to_dict_decoder(field_path, src_fields)
355
+
356
+ def decode_to_list_dict(values: list[Any] | None) -> list[dict[str, Any]] | None:
357
+ if values is None:
358
+ return None
359
+ result = []
360
+ for i, row_values in enumerate(values):
361
+ decoded_row = row_decoder(row_values)
362
+ if decoded_row is None:
363
+ raise ValueError(
364
+ f"LTable row at index {i} decoded to None, which is not allowed."
365
+ )
366
+ result.append(decoded_row)
367
+ return result
368
+
369
+ return decode_to_list_dict
370
+
371
+
372
+ def _make_engine_ktable_to_dict_dict_decoder(
373
+ field_path: list[str],
374
+ src_fields: list[dict[str, Any]],
375
+ ) -> Callable[[list[Any] | None], dict[Any, dict[str, Any]] | None]:
376
+ """Make a decoder from engine KTable values to a dict of dicts."""
377
+
378
+ if not src_fields:
379
+ raise ValueError("KTable must have at least one field for the key")
380
+
381
+ # First field is the key, remaining fields are the value
382
+ key_field_schema = src_fields[0]
383
+ value_fields_schema = src_fields[1:]
384
+
385
+ # Create decoders
386
+ field_path.append(f".{key_field_schema.get('name', KEY_FIELD_NAME)}")
387
+ key_decoder = make_engine_value_decoder(field_path, key_field_schema["type"], Any)
388
+ field_path.pop()
389
+
390
+ value_decoder = _make_engine_struct_to_dict_decoder(field_path, value_fields_schema)
391
+
392
+ def decode_to_dict_dict(
393
+ values: list[Any] | None,
394
+ ) -> dict[Any, dict[str, Any]] | None:
395
+ if values is None:
396
+ return None
397
+ result = {}
398
+ for row_values in values:
399
+ if not row_values:
400
+ raise ValueError("KTable row must have at least 1 value (the key)")
401
+ key = key_decoder(row_values[0])
402
+ if len(row_values) == 1:
403
+ value: dict[str, Any] = {}
404
+ else:
405
+ tmp = value_decoder(row_values[1:])
406
+ if tmp is None:
407
+ value = {}
408
+ else:
409
+ value = tmp
410
+ if isinstance(key, dict):
411
+ key = tuple(key.values())
412
+ result[key] = value
413
+ return result
414
+
415
+ return decode_to_dict_dict
416
+
417
+
343
418
  def dump_engine_object(v: Any) -> Any:
344
419
  """Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
345
420
  if v is None:
cocoindex/flow.py CHANGED
@@ -10,6 +10,13 @@ import functools
10
10
  import inspect
11
11
  import re
12
12
 
13
+ from .validation import (
14
+ validate_flow_name,
15
+ NamingError,
16
+ validate_full_flow_name,
17
+ validate_target_name,
18
+ )
19
+
13
20
  from dataclasses import dataclass
14
21
  from enum import Enum
15
22
  from threading import Lock
@@ -300,6 +307,9 @@ class DataScope:
300
307
  )
301
308
 
302
309
  def __setitem__(self, field_name: str, value: DataSlice[T]) -> None:
310
+ from .validation import validate_field_name
311
+
312
+ validate_field_name(field_name)
303
313
  value._state.attach_to_scope(self._engine_data_scope, field_name)
304
314
 
305
315
  def __enter__(self) -> DataScope:
@@ -367,7 +377,7 @@ class DataCollector:
367
377
 
368
378
  def export(
369
379
  self,
370
- name: str,
380
+ target_name: str,
371
381
  target_spec: op.TargetSpec,
372
382
  /,
373
383
  *,
@@ -381,6 +391,8 @@ class DataCollector:
381
391
 
382
392
  `vector_index` is for backward compatibility only. Please use `vector_indexes` instead.
383
393
  """
394
+
395
+ validate_target_name(target_name)
384
396
  if not isinstance(target_spec, op.TargetSpec):
385
397
  raise ValueError(
386
398
  "export() can only be called on a CocoIndex target storage"
@@ -398,7 +410,7 @@ class DataCollector:
398
410
  vector_indexes=vector_indexes,
399
411
  )
400
412
  self._flow_builder_state.engine_flow_builder.export(
401
- name,
413
+ target_name,
402
414
  _spec_kind(target_spec),
403
415
  dump_engine_object(target_spec),
404
416
  dump_engine_object(index_options),
@@ -660,6 +672,8 @@ class Flow:
660
672
  def __init__(
661
673
  self, name: str, full_name: str, engine_flow_creator: Callable[[], _engine.Flow]
662
674
  ):
675
+ validate_flow_name(name)
676
+ validate_full_flow_name(full_name)
663
677
  self._name = name
664
678
  self._full_name = full_name
665
679
  engine_flow = None
@@ -831,11 +845,6 @@ def get_flow_full_name(name: str) -> str:
831
845
 
832
846
 
833
847
  def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
834
- """Add a flow definition to the cocoindex library."""
835
- if not all(c.isalnum() or c == "_" for c in name):
836
- raise ValueError(
837
- f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed."
838
- )
839
848
  with _flows_lock:
840
849
  if name in _flows:
841
850
  raise KeyError(f"Flow with name {name} already exists")
cocoindex/functions.py CHANGED
@@ -66,14 +66,19 @@ class SentenceTransformerEmbed(op.FunctionSpec):
66
66
  args: dict[str, Any] | None = None
67
67
 
68
68
 
69
- @op.executor_class(gpu=True, cache=True, behavior_version=1)
69
+ @op.executor_class(
70
+ gpu=True,
71
+ cache=True,
72
+ behavior_version=1,
73
+ arg_relationship=(op.ArgRelationship.EMBEDDING_ORIGIN_TEXT, "text"),
74
+ )
70
75
  class SentenceTransformerEmbedExecutor:
71
76
  """Executor for SentenceTransformerEmbed."""
72
77
 
73
78
  spec: SentenceTransformerEmbed
74
79
  _model: Any | None = None
75
80
 
76
- def analyze(self, text: Any) -> type:
81
+ def analyze(self, _text: Any) -> type:
77
82
  try:
78
83
  # Only import sentence_transformers locally when it's needed, as its import is very slow.
79
84
  import sentence_transformers # pylint: disable=import-outside-toplevel
@@ -88,11 +93,7 @@ class SentenceTransformerEmbedExecutor:
88
93
  args = self.spec.args or {}
89
94
  self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
90
95
  dim = self._model.get_sentence_embedding_dimension()
91
- result: type = Annotated[
92
- Vector[np.float32, Literal[dim]], # type: ignore
93
- TypeAttr("cocoindex.io/vector_origin_text", text.analyzed_value),
94
- ]
95
- return result
96
+ return Vector[np.float32, Literal[dim]] # type: ignore
96
97
 
97
98
  def __call__(self, text: str) -> NDArray[np.float32]:
98
99
  assert self._model is not None
cocoindex/op.py CHANGED
@@ -6,11 +6,11 @@ import asyncio
6
6
  import dataclasses
7
7
  import inspect
8
8
  from enum import Enum
9
- from typing import Any, Awaitable, Callable, Protocol, dataclass_transform
9
+ from typing import Any, Awaitable, Callable, Protocol, dataclass_transform, Annotated
10
10
 
11
11
  from . import _engine # type: ignore
12
12
  from .convert import encode_engine_value, make_engine_value_decoder
13
- from .typing import encode_enriched_type, resolve_forward_ref
13
+ from .typing import TypeAttr, encode_enriched_type, resolve_forward_ref
14
14
 
15
15
 
16
16
  class OpCategory(Enum):
@@ -85,6 +85,17 @@ class _FunctionExecutorFactory:
85
85
  _gpu_dispatch_lock = asyncio.Lock()
86
86
 
87
87
 
88
+ _COCOINDEX_ATTR_PREFIX = "cocoindex.io/"
89
+
90
+
91
+ class ArgRelationship(Enum):
92
+ """Specifies the relationship between an input argument and the output."""
93
+
94
+ EMBEDDING_ORIGIN_TEXT = _COCOINDEX_ATTR_PREFIX + "embedding_origin_text"
95
+ CHUNKS_BASE_TEXT = _COCOINDEX_ATTR_PREFIX + "chunk_base_text"
96
+ RECTS_BASE_IMAGE = _COCOINDEX_ATTR_PREFIX + "rects_base_image"
97
+
98
+
88
99
  @dataclasses.dataclass
89
100
  class OpArgs:
90
101
  """
@@ -92,11 +103,15 @@ class OpArgs:
92
103
  - cache: Whether the executor will be cached.
93
104
  - behavior_version: The behavior version of the executor. Cache will be invalidated if it
94
105
  changes. Must be provided if `cache` is True.
106
+ - arg_relationship: It specifies the relationship between an input argument and the output,
107
+ e.g. `(ArgRelationship.CHUNKS_BASE_TEXT, "content")` means the output is chunks for the
108
+ input argument with name `content`.
95
109
  """
96
110
 
97
111
  gpu: bool = False
98
112
  cache: bool = False
99
113
  behavior_version: int | None = None
114
+ arg_relationship: tuple[ArgRelationship, str] | None = None
100
115
 
101
116
 
102
117
  def _to_async_call(call: Callable[..., Any]) -> Callable[..., Awaitable[Any]]:
@@ -143,6 +158,15 @@ def _register_op_factory(
143
158
  """
144
159
  self._args_decoders = []
145
160
  self._kwargs_decoders = {}
161
+ attributes = []
162
+
163
+ def process_attribute(arg_name: str, arg: _engine.OpArgSchema) -> None:
164
+ if op_args.arg_relationship is not None:
165
+ related_attr, related_arg_name = op_args.arg_relationship
166
+ if related_arg_name == arg_name:
167
+ attributes.append(
168
+ TypeAttr(related_attr.value, arg.analyzed_value)
169
+ )
146
170
 
147
171
  # Match arguments with parameters.
148
172
  next_param_idx = 0
@@ -164,6 +188,7 @@ def _register_op_factory(
164
188
  [arg_name], arg.value_type["type"], arg_param.annotation
165
189
  )
166
190
  )
191
+ process_attribute(arg_name, arg)
167
192
  if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
168
193
  next_param_idx += 1
169
194
 
@@ -194,6 +219,7 @@ def _register_op_factory(
194
219
  self._kwargs_decoders[kwarg_name] = make_engine_value_decoder(
195
220
  [kwarg_name], kwarg.value_type["type"], arg_param.annotation
196
221
  )
222
+ process_attribute(kwarg_name, kwarg)
197
223
 
198
224
  missing_args = [
199
225
  name
@@ -216,9 +242,12 @@ def _register_op_factory(
216
242
 
217
243
  prepare_method = getattr(executor_cls, "analyze", None)
218
244
  if prepare_method is not None:
219
- return prepare_method(self, *args, **kwargs)
245
+ result = prepare_method(self, *args, **kwargs)
220
246
  else:
221
- return expected_return
247
+ result = expected_return
248
+ if len(attributes) > 0:
249
+ result = Annotated[result, *attributes]
250
+ return result
222
251
 
223
252
  async def prepare(self) -> None:
224
253
  """
cocoindex/setting.py CHANGED
@@ -6,6 +6,7 @@ import os
6
6
 
7
7
  from typing import Callable, Self, Any, overload
8
8
  from dataclasses import dataclass
9
+ from .validation import validate_app_namespace_name
9
10
 
10
11
  _app_namespace: str = ""
11
12
 
@@ -27,6 +28,8 @@ def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
27
28
 
28
29
  def set_app_namespace(app_namespace: str) -> None:
29
30
  """Set the application namespace."""
31
+ if app_namespace:
32
+ validate_app_namespace_name(app_namespace)
30
33
  global _app_namespace # pylint: disable=global-statement
31
34
  _app_namespace = app_namespace
32
35
 
@@ -1341,3 +1341,130 @@ def test_roundtrip_namedtuple_to_dict_binding() -> None:
1341
1341
  validate_full_roundtrip(
1342
1342
  instance, Point, (expected_dict, dict), (expected_dict, Any)
1343
1343
  )
1344
+
1345
+
1346
+ def test_roundtrip_ltable_to_list_dict_binding() -> None:
1347
+ """Test LTable -> list[dict] binding with Any annotation."""
1348
+
1349
+ @dataclass
1350
+ class User:
1351
+ id: str
1352
+ name: str
1353
+ age: int
1354
+
1355
+ users = [User("u1", "Alice", 25), User("u2", "Bob", 30), User("u3", "Charlie", 35)]
1356
+ expected_list_dict = [
1357
+ {"id": "u1", "name": "Alice", "age": 25},
1358
+ {"id": "u2", "name": "Bob", "age": 30},
1359
+ {"id": "u3", "name": "Charlie", "age": 35},
1360
+ ]
1361
+
1362
+ # Test Any annotation
1363
+ validate_full_roundtrip(users, list[User], (expected_list_dict, Any))
1364
+
1365
+
1366
+ def test_roundtrip_ktable_to_dict_dict_binding() -> None:
1367
+ """Test KTable -> dict[K, dict] binding with Any annotation."""
1368
+
1369
+ @dataclass
1370
+ class Product:
1371
+ name: str
1372
+ price: float
1373
+ active: bool
1374
+
1375
+ products = {
1376
+ "p1": Product("Widget", 29.99, True),
1377
+ "p2": Product("Gadget", 49.99, False),
1378
+ "p3": Product("Tool", 19.99, True),
1379
+ }
1380
+ expected_dict_dict = {
1381
+ "p1": {"name": "Widget", "price": 29.99, "active": True},
1382
+ "p2": {"name": "Gadget", "price": 49.99, "active": False},
1383
+ "p3": {"name": "Tool", "price": 19.99, "active": True},
1384
+ }
1385
+
1386
+ # Test Any annotation
1387
+ validate_full_roundtrip(products, dict[str, Product], (expected_dict_dict, Any))
1388
+
1389
+
1390
+ def test_roundtrip_ktable_with_complex_key() -> None:
1391
+ """Test KTable with complex key types -> dict binding."""
1392
+
1393
+ @dataclass(frozen=True)
1394
+ class OrderKey:
1395
+ shop_id: str
1396
+ version: int
1397
+
1398
+ @dataclass
1399
+ class Order:
1400
+ customer: str
1401
+ total: float
1402
+
1403
+ orders = {
1404
+ OrderKey("shop1", 1): Order("Alice", 100.0),
1405
+ OrderKey("shop2", 2): Order("Bob", 200.0),
1406
+ }
1407
+ expected_dict_dict = {
1408
+ ("shop1", 1): {"customer": "Alice", "total": 100.0},
1409
+ ("shop2", 2): {"customer": "Bob", "total": 200.0},
1410
+ }
1411
+
1412
+ # Test Any annotation
1413
+ validate_full_roundtrip(orders, dict[OrderKey, Order], (expected_dict_dict, Any))
1414
+
1415
+
1416
+ def test_roundtrip_ltable_with_nested_structs() -> None:
1417
+ """Test LTable with nested structs -> list[dict] binding."""
1418
+
1419
+ @dataclass
1420
+ class Address:
1421
+ street: str
1422
+ city: str
1423
+
1424
+ @dataclass
1425
+ class Person:
1426
+ name: str
1427
+ age: int
1428
+ address: Address
1429
+
1430
+ people = [
1431
+ Person("John", 30, Address("123 Main St", "Anytown")),
1432
+ Person("Jane", 25, Address("456 Oak Ave", "Somewhere")),
1433
+ ]
1434
+ expected_list_dict = [
1435
+ {
1436
+ "name": "John",
1437
+ "age": 30,
1438
+ "address": {"street": "123 Main St", "city": "Anytown"},
1439
+ },
1440
+ {
1441
+ "name": "Jane",
1442
+ "age": 25,
1443
+ "address": {"street": "456 Oak Ave", "city": "Somewhere"},
1444
+ },
1445
+ ]
1446
+
1447
+ # Test Any annotation
1448
+ validate_full_roundtrip(people, list[Person], (expected_list_dict, Any))
1449
+
1450
+
1451
+ def test_roundtrip_ktable_with_list_fields() -> None:
1452
+ """Test KTable with list fields -> dict binding."""
1453
+
1454
+ @dataclass
1455
+ class Team:
1456
+ name: str
1457
+ members: list[str]
1458
+ active: bool
1459
+
1460
+ teams = {
1461
+ "team1": Team("Dev Team", ["Alice", "Bob"], True),
1462
+ "team2": Team("QA Team", ["Charlie", "David"], False),
1463
+ }
1464
+ expected_dict_dict = {
1465
+ "team1": {"name": "Dev Team", "members": ["Alice", "Bob"], "active": True},
1466
+ "team2": {"name": "QA Team", "members": ["Charlie", "David"], "active": False},
1467
+ }
1468
+
1469
+ # Test Any annotation
1470
+ validate_full_roundtrip(teams, dict[str, Team], (expected_dict_dict, Any))
@@ -0,0 +1,134 @@
1
+ """Tests for naming validation functionality."""
2
+
3
+ import pytest
4
+ from cocoindex.validation import (
5
+ validate_field_name,
6
+ validate_flow_name,
7
+ validate_full_flow_name,
8
+ validate_app_namespace_name,
9
+ validate_target_name,
10
+ NamingError,
11
+ validate_identifier_name,
12
+ )
13
+
14
+
15
+ class TestValidateIdentifierName:
16
+ """Test the core validation function."""
17
+
18
+ def test_valid_names(self) -> None:
19
+ """Test that valid names pass validation."""
20
+ valid_names = [
21
+ "field1",
22
+ "field_name",
23
+ "_private",
24
+ "a",
25
+ "field123",
26
+ "FIELD_NAME",
27
+ "MyField",
28
+ "field_123_test",
29
+ ]
30
+
31
+ for name in valid_names:
32
+ result = validate_identifier_name(name)
33
+ assert result is None, f"Valid name '{name}' failed validation: {result}"
34
+
35
+ def test_valid_names_with_dots(self) -> None:
36
+ """Test that valid names with dots pass validation when allowed."""
37
+ valid_names = ["app.flow", "my_app.my_flow", "namespace.sub.flow", "a.b.c.d"]
38
+
39
+ for name in valid_names:
40
+ result = validate_identifier_name(name, allow_dots=True)
41
+ assert result is None, (
42
+ f"Valid dotted name '{name}' failed validation: {result}"
43
+ )
44
+
45
+ def test_invalid_starting_characters(self) -> None:
46
+ """Test names with invalid starting characters."""
47
+ invalid_names = [
48
+ "123field", # starts with digit
49
+ ".field", # starts with dot
50
+ "-field", # starts with dash
51
+ " field", # starts with space
52
+ ]
53
+
54
+ for name in invalid_names:
55
+ result = validate_identifier_name(name)
56
+ assert result is not None, (
57
+ f"Invalid name '{name}' should have failed validation"
58
+ )
59
+
60
+ def test_double_underscore_restriction(self) -> None:
61
+ """Test double underscore restriction."""
62
+ invalid_names = ["__reserved", "__internal", "__test"]
63
+
64
+ for name in invalid_names:
65
+ result = validate_identifier_name(name)
66
+ assert result is not None
67
+ assert "double underscores" in result.lower()
68
+
69
+ def test_length_restriction(self) -> None:
70
+ """Test maximum length restriction."""
71
+ long_name = "a" * 65
72
+ result = validate_identifier_name(long_name, max_length=64)
73
+ assert result is not None
74
+ assert "maximum length" in result.lower()
75
+
76
+
77
+ class TestSpecificValidators:
78
+ """Test the specific validation functions."""
79
+
80
+ def test_valid_field_names(self) -> None:
81
+ """Test valid field names."""
82
+ valid_names = ["field1", "field_name", "_private", "FIELD"]
83
+ for name in valid_names:
84
+ validate_field_name(name) # Should not raise
85
+
86
+ def test_invalid_field_names(self) -> None:
87
+ """Test invalid field names raise NamingError."""
88
+ invalid_names = ["123field", "field-name", "__reserved", "a" * 65]
89
+
90
+ for name in invalid_names:
91
+ with pytest.raises(NamingError):
92
+ validate_field_name(name)
93
+
94
+ def test_flow_validation(self) -> None:
95
+ """Test flow name validation."""
96
+ # Valid flow names
97
+ validate_flow_name("MyFlow")
98
+ validate_flow_name("my_flow_123")
99
+
100
+ # Invalid flow names
101
+ with pytest.raises(NamingError):
102
+ validate_flow_name("123flow")
103
+
104
+ with pytest.raises(NamingError):
105
+ validate_flow_name("__reserved_flow")
106
+
107
+ def test_full_flow_name_allows_dots(self) -> None:
108
+ """Test that full flow names allow dots."""
109
+ validate_full_flow_name("app.my_flow")
110
+ validate_full_flow_name("namespace.subnamespace.flow")
111
+
112
+ # But still reject invalid patterns
113
+ with pytest.raises(NamingError):
114
+ validate_full_flow_name("123.invalid")
115
+
116
+ def test_target_validation(self) -> None:
117
+ """Test target name validation."""
118
+ validate_target_name("my_target")
119
+ validate_target_name("output_table")
120
+
121
+ with pytest.raises(NamingError):
122
+ validate_target_name("123target")
123
+
124
+ def test_app_namespace_validation(self) -> None:
125
+ """Test app namespace validation."""
126
+ validate_app_namespace_name("myapp")
127
+ validate_app_namespace_name("my_app_123")
128
+
129
+ # Should not allow dots in app namespace
130
+ with pytest.raises(NamingError):
131
+ validate_app_namespace_name("my.app")
132
+
133
+ with pytest.raises(NamingError):
134
+ validate_app_namespace_name("123app")
@@ -0,0 +1,104 @@
1
+ """
2
+ Naming validation for CocoIndex identifiers.
3
+
4
+ This module enforces naming conventions for flow names, field names,
5
+ target names, and app namespace names as specified in issue #779.
6
+ """
7
+
8
+ import re
9
+ from typing import Optional
10
+
11
+ _IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
12
+ _IDENTIFIER_WITH_DOTS_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.]*$")
13
+
14
+
15
+ class NamingError(ValueError):
16
+ """Exception raised for naming convention violations."""
17
+
18
+ pass
19
+
20
+
21
+ def validate_identifier_name(
22
+ name: str,
23
+ max_length: int = 64,
24
+ allow_dots: bool = False,
25
+ identifier_type: str = "identifier",
26
+ ) -> Optional[str]:
27
+ """
28
+ Validate identifier names according to CocoIndex naming rules.
29
+
30
+ Args:
31
+ name: The name to validate
32
+ max_length: Maximum allowed length (default 64)
33
+ allow_dots: Whether to allow dots in the name (for full flow names)
34
+ identifier_type: Type of identifier for error messages
35
+
36
+ Returns:
37
+ None if valid, error message string if invalid
38
+ """
39
+ if not name:
40
+ return f"{identifier_type} name cannot be empty"
41
+
42
+ if len(name) > max_length:
43
+ return f"{identifier_type} name '{name}' exceeds maximum length of {max_length} characters"
44
+
45
+ if name.startswith("__"):
46
+ return f"{identifier_type} name '{name}' cannot start with double underscores (reserved for internal usage)"
47
+
48
+ # Define allowed pattern
49
+ if allow_dots:
50
+ pattern = _IDENTIFIER_WITH_DOTS_PATTERN
51
+ allowed_chars = "letters, digits, underscores, and dots"
52
+ else:
53
+ pattern = _IDENTIFIER_PATTERN
54
+ allowed_chars = "letters, digits, and underscores"
55
+
56
+ if not pattern.match(name):
57
+ return f"{identifier_type} name '{name}' must start with a letter or underscore and contain only {allowed_chars}"
58
+
59
+ return None
60
+
61
+
62
+ def validate_field_name(name: str) -> None:
63
+ """Validate field names."""
64
+ error = validate_identifier_name(
65
+ name, max_length=64, allow_dots=False, identifier_type="Field"
66
+ )
67
+ if error:
68
+ raise NamingError(error)
69
+
70
+
71
+ def validate_flow_name(name: str) -> None:
72
+ """Validate flow names."""
73
+ error = validate_identifier_name(
74
+ name, max_length=64, allow_dots=False, identifier_type="Flow"
75
+ )
76
+ if error:
77
+ raise NamingError(error)
78
+
79
+
80
+ def validate_full_flow_name(name: str) -> None:
81
+ """Validate full flow names (can contain dots for namespacing)."""
82
+ error = validate_identifier_name(
83
+ name, max_length=64, allow_dots=True, identifier_type="Full flow"
84
+ )
85
+ if error:
86
+ raise NamingError(error)
87
+
88
+
89
+ def validate_app_namespace_name(name: str) -> None:
90
+ """Validate app namespace names."""
91
+ error = validate_identifier_name(
92
+ name, max_length=64, allow_dots=False, identifier_type="App namespace"
93
+ )
94
+ if error:
95
+ raise NamingError(error)
96
+
97
+
98
+ def validate_target_name(name: str) -> None:
99
+ """Validate target names."""
100
+ error = validate_identifier_name(
101
+ name, max_length=64, allow_dots=False, identifier_type="Target"
102
+ )
103
+ if error:
104
+ raise NamingError(error)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.70
3
+ Version: 0.1.72
4
4
  Requires-Dist: click>=8.1.8
5
5
  Requires-Dist: rich>=14.0.0
6
6
  Requires-Dist: python-dotenv>=1.1.0
@@ -52,18 +52,18 @@ Ultra performant data transformation framework for AI, with core engine written
52
52
  ⭐ Drop a star to help us grow!
53
53
 
54
54
  <div align="center">
55
-
55
+
56
56
  <!-- Keep these links. Translations will automatically update with the README. -->
57
- [Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
58
- [English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
59
- [Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
60
- [français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
61
- [日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
62
- [한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
63
- [Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
64
- [Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
57
+ [Deutsch](https://readme-i18n.com/cocoindex-io/cocoindex?lang=de) |
58
+ [English](https://readme-i18n.com/cocoindex-io/cocoindex?lang=en) |
59
+ [Español](https://readme-i18n.com/cocoindex-io/cocoindex?lang=es) |
60
+ [français](https://readme-i18n.com/cocoindex-io/cocoindex?lang=fr) |
61
+ [日本語](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ja) |
62
+ [한국어](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ko) |
63
+ [Português](https://readme-i18n.com/cocoindex-io/cocoindex?lang=pt) |
64
+ [Русский](https://readme-i18n.com/cocoindex-io/cocoindex?lang=ru) |
65
65
  [中文](https://readme-i18n.com/cocoindex-io/cocoindex?lang=zh)
66
-
66
+
67
67
  </div>
68
68
 
69
69
  </br>
@@ -208,6 +208,7 @@ It defines an index flow like this:
208
208
  | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
209
209
  | [Product Recommendation](examples/product_recommendation) | Build real-time product recommendations with LLM and graph database|
210
210
  | [Image Search with Vision API](examples/image_search) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
211
+ | [Face Recognition](examples/face_recognition) | Recognize faces in images and build embedding index |
211
212
  | [Paper Metadata](examples/paper_metadata) | Index papers in PDF files, and build metadata tables for each paper |
212
213
 
213
214
  More coming and stay tuned 👀!
@@ -1,28 +1,30 @@
1
- cocoindex-0.1.70.dist-info/METADATA,sha256=4mFNelKh4RZNnPXiqDr1KOQoyBpL6ysW0ff7M5-MMIQ,11426
2
- cocoindex-0.1.70.dist-info/WHEEL,sha256=auo2gA2SV-bvS4ssY6DIG7dtu3SpmB5FcBkwIwO6YZk,96
3
- cocoindex-0.1.70.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
- cocoindex-0.1.70.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
5
- cocoindex/__init__.py,sha256=gN0v1r7nktqXGcVld4s1fmdCG9l1J_xRO56Jxu_yXOQ,2193
6
- cocoindex/_engine.cp311-win_amd64.pyd,sha256=UOvbowU48GTZmnbGHlokQgu5cG3-djm7B1Ycykxpkp8,70170624
1
+ cocoindex-0.1.72.dist-info/METADATA,sha256=qD-MFwBDoEkZmGKDcz0wB9FiQuXsXP6nMApfhSoLkks,11515
2
+ cocoindex-0.1.72.dist-info/WHEEL,sha256=auo2gA2SV-bvS4ssY6DIG7dtu3SpmB5FcBkwIwO6YZk,96
3
+ cocoindex-0.1.72.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
4
+ cocoindex-0.1.72.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
5
+ cocoindex/__init__.py,sha256=1u3kotLS9MDAj4jY4jN5rRuu39B3DH_wL2LAS2VFC5Y,2204
6
+ cocoindex/_engine.cp311-win_amd64.pyd,sha256=PHp3WdbsiMxuRgsv5dAtsRCeJ0t2rKnLwlLzULayzcw,70187520
7
7
  cocoindex/auth_registry.py,sha256=Qq1IVZb-7K4luRrQSDlOPbISnGEZ4kIDsrCU8H2ARw0,1529
8
8
  cocoindex/cli.py,sha256=zjZv7EH-ZLoO1-3Ua2U7Yt3n2A_T7pN2NCuv7FB8WA0,23097
9
- cocoindex/convert.py,sha256=iz7lYydzh1ClMdXl2sc8HiidPp186YY4qXVI133g5u0,13487
10
- cocoindex/flow.py,sha256=On0J6-3_3uPg-2TjzU3yans_4t_9vchrkH5BpGkbLMo,37203
11
- cocoindex/functions.py,sha256=pt4Zs_ybz1fBPrwLbgTxHN4yL41SX4IrNGeB5tahzno,3262
9
+ cocoindex/convert.py,sha256=xZXkzZiUQktrGYY0GYGMeK2xi5cTyUN55g2FkA5F80w,16251
10
+ cocoindex/flow.py,sha256=GF7YlgdZfK1F50yXJaSrWmCWP1g6CeUCp7ay1c4okYc,37293
11
+ cocoindex/functions.py,sha256=ERtlJJ_D5xNSL4nE6FiqMseX4aAuqsTG6RvJVBC_LPE,3210
12
12
  cocoindex/index.py,sha256=GrqTm1rLwICQ8hadtNvJAxVg7GWMvtMmFcbiNtNzmP0,569
13
13
  cocoindex/lib.py,sha256=cZosix4nwROvod4QJOwCzrm6U1CVy_wKMMk7sDDG_Z0,849
14
14
  cocoindex/llm.py,sha256=TDUyTxW7ooFmpA_jDNezABhReXksOXN8F8zY9aiuNTw,709
15
- cocoindex/op.py,sha256=h1bp56NEVxCRrOjzyta1h52u6d9Vol_Qau9Pv1sUlVE,12141
15
+ cocoindex/op.py,sha256=6RRlzNf3rq5q1TNxNB8nv9OOee22dHyk9LZ2fD-K20s,13522
16
16
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  cocoindex/runtime.py,sha256=6mE-jR1Kh5c4GETDvBgwjXZq69TK5rh1qNpaseRDZnw,1117
18
- cocoindex/setting.py,sha256=E_5Z_MqDt6vaMcqPNA67QB7h_36OBcNu50EN3WjNDts,4898
18
+ cocoindex/setting.py,sha256=KkWDDJ6-BVtzW41ygnpxFjx3mzVRbA3F8LnhzAkTSDc,5026
19
19
  cocoindex/setup.py,sha256=KbJvmeFu0NbeoH-5iDmHZP86f26HIId8kHmGUNZAePI,3160
20
20
  cocoindex/sources.py,sha256=DEEfJGrz0eG9dFF-sjn7ddbebHfqPpbyN5KXYagbZ50,2152
21
21
  cocoindex/targets.py,sha256=7FfG9kuEf5KTXtLwXMFaPFIut3PsIbpb3XIEjjeF7Bg,2931
22
22
  cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- cocoindex/tests/test_convert.py,sha256=zgWJlhnx8QCnvAD92U2BbFchQgYmFv3hbX532RqVMkk,46793
23
+ cocoindex/tests/test_convert.py,sha256=yVWlR2QMfwdzSXBVIi0eFeQ8k9Xvu8vLGgBhzwS1-ng,50430
24
24
  cocoindex/tests/test_optional_database.py,sha256=dnzmTgaJf37D3q8fQsjP5UDER6FYETaUokDnFBMLtIk,8755
25
25
  cocoindex/tests/test_typing.py,sha256=YHQonf49SPxic8wnZC_5UmltwS5eAa5VNTH0aVwccBQ,15373
26
+ cocoindex/tests/test_validation.py,sha256=I4wr8lAMAjmy5xgG5N_OJKveXt8XIa96MsQTXhw5AnA,4677
26
27
  cocoindex/typing.py,sha256=wVjfugSYNXK9H11pPjXci2X2EyvS5eoILAQ3a0H8VLs,13656
27
28
  cocoindex/utils.py,sha256=U3W39zD2uZpXX8v84tJD7sRmbC5ar3z_ljAP1cJrYXI,618
28
- cocoindex-0.1.70.dist-info/RECORD,,
29
+ cocoindex/validation.py,sha256=4ZjsW-SZT8X_TEEhEE6QG6D-8Oq_TkPAhTqP0mdFYSE,3194
30
+ cocoindex-0.1.72.dist-info/RECORD,,