cocoindex 0.1.71__cp311-cp311-manylinux_2_28_aarch64.whl → 0.1.72__cp311-cp311-manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cpython-311-aarch64-linux-gnu.so +0 -0
- cocoindex/flow.py +16 -7
- cocoindex/setting.py +3 -0
- cocoindex/tests/test_validation.py +134 -0
- cocoindex/validation.py +104 -0
- {cocoindex-0.1.71.dist-info → cocoindex-0.1.72.dist-info}/METADATA +1 -1
- {cocoindex-0.1.71.dist-info → cocoindex-0.1.72.dist-info}/RECORD +10 -8
- {cocoindex-0.1.71.dist-info → cocoindex-0.1.72.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.71.dist-info → cocoindex-0.1.72.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.71.dist-info → cocoindex-0.1.72.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/flow.py
CHANGED
@@ -10,6 +10,13 @@ import functools
|
|
10
10
|
import inspect
|
11
11
|
import re
|
12
12
|
|
13
|
+
from .validation import (
|
14
|
+
validate_flow_name,
|
15
|
+
NamingError,
|
16
|
+
validate_full_flow_name,
|
17
|
+
validate_target_name,
|
18
|
+
)
|
19
|
+
|
13
20
|
from dataclasses import dataclass
|
14
21
|
from enum import Enum
|
15
22
|
from threading import Lock
|
@@ -300,6 +307,9 @@ class DataScope:
|
|
300
307
|
)
|
301
308
|
|
302
309
|
def __setitem__(self, field_name: str, value: DataSlice[T]) -> None:
|
310
|
+
from .validation import validate_field_name
|
311
|
+
|
312
|
+
validate_field_name(field_name)
|
303
313
|
value._state.attach_to_scope(self._engine_data_scope, field_name)
|
304
314
|
|
305
315
|
def __enter__(self) -> DataScope:
|
@@ -367,7 +377,7 @@ class DataCollector:
|
|
367
377
|
|
368
378
|
def export(
|
369
379
|
self,
|
370
|
-
|
380
|
+
target_name: str,
|
371
381
|
target_spec: op.TargetSpec,
|
372
382
|
/,
|
373
383
|
*,
|
@@ -381,6 +391,8 @@ class DataCollector:
|
|
381
391
|
|
382
392
|
`vector_index` is for backward compatibility only. Please use `vector_indexes` instead.
|
383
393
|
"""
|
394
|
+
|
395
|
+
validate_target_name(target_name)
|
384
396
|
if not isinstance(target_spec, op.TargetSpec):
|
385
397
|
raise ValueError(
|
386
398
|
"export() can only be called on a CocoIndex target storage"
|
@@ -398,7 +410,7 @@ class DataCollector:
|
|
398
410
|
vector_indexes=vector_indexes,
|
399
411
|
)
|
400
412
|
self._flow_builder_state.engine_flow_builder.export(
|
401
|
-
|
413
|
+
target_name,
|
402
414
|
_spec_kind(target_spec),
|
403
415
|
dump_engine_object(target_spec),
|
404
416
|
dump_engine_object(index_options),
|
@@ -660,6 +672,8 @@ class Flow:
|
|
660
672
|
def __init__(
|
661
673
|
self, name: str, full_name: str, engine_flow_creator: Callable[[], _engine.Flow]
|
662
674
|
):
|
675
|
+
validate_flow_name(name)
|
676
|
+
validate_full_flow_name(full_name)
|
663
677
|
self._name = name
|
664
678
|
self._full_name = full_name
|
665
679
|
engine_flow = None
|
@@ -831,11 +845,6 @@ def get_flow_full_name(name: str) -> str:
|
|
831
845
|
|
832
846
|
|
833
847
|
def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
|
834
|
-
"""Add a flow definition to the cocoindex library."""
|
835
|
-
if not all(c.isalnum() or c == "_" for c in name):
|
836
|
-
raise ValueError(
|
837
|
-
f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed."
|
838
|
-
)
|
839
848
|
with _flows_lock:
|
840
849
|
if name in _flows:
|
841
850
|
raise KeyError(f"Flow with name {name} already exists")
|
cocoindex/setting.py
CHANGED
@@ -6,6 +6,7 @@ import os
|
|
6
6
|
|
7
7
|
from typing import Callable, Self, Any, overload
|
8
8
|
from dataclasses import dataclass
|
9
|
+
from .validation import validate_app_namespace_name
|
9
10
|
|
10
11
|
_app_namespace: str = ""
|
11
12
|
|
@@ -27,6 +28,8 @@ def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
|
|
27
28
|
|
28
29
|
def set_app_namespace(app_namespace: str) -> None:
|
29
30
|
"""Set the application namespace."""
|
31
|
+
if app_namespace:
|
32
|
+
validate_app_namespace_name(app_namespace)
|
30
33
|
global _app_namespace # pylint: disable=global-statement
|
31
34
|
_app_namespace = app_namespace
|
32
35
|
|
@@ -0,0 +1,134 @@
|
|
1
|
+
"""Tests for naming validation functionality."""
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
from cocoindex.validation import (
|
5
|
+
validate_field_name,
|
6
|
+
validate_flow_name,
|
7
|
+
validate_full_flow_name,
|
8
|
+
validate_app_namespace_name,
|
9
|
+
validate_target_name,
|
10
|
+
NamingError,
|
11
|
+
validate_identifier_name,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class TestValidateIdentifierName:
|
16
|
+
"""Test the core validation function."""
|
17
|
+
|
18
|
+
def test_valid_names(self) -> None:
|
19
|
+
"""Test that valid names pass validation."""
|
20
|
+
valid_names = [
|
21
|
+
"field1",
|
22
|
+
"field_name",
|
23
|
+
"_private",
|
24
|
+
"a",
|
25
|
+
"field123",
|
26
|
+
"FIELD_NAME",
|
27
|
+
"MyField",
|
28
|
+
"field_123_test",
|
29
|
+
]
|
30
|
+
|
31
|
+
for name in valid_names:
|
32
|
+
result = validate_identifier_name(name)
|
33
|
+
assert result is None, f"Valid name '{name}' failed validation: {result}"
|
34
|
+
|
35
|
+
def test_valid_names_with_dots(self) -> None:
|
36
|
+
"""Test that valid names with dots pass validation when allowed."""
|
37
|
+
valid_names = ["app.flow", "my_app.my_flow", "namespace.sub.flow", "a.b.c.d"]
|
38
|
+
|
39
|
+
for name in valid_names:
|
40
|
+
result = validate_identifier_name(name, allow_dots=True)
|
41
|
+
assert result is None, (
|
42
|
+
f"Valid dotted name '{name}' failed validation: {result}"
|
43
|
+
)
|
44
|
+
|
45
|
+
def test_invalid_starting_characters(self) -> None:
|
46
|
+
"""Test names with invalid starting characters."""
|
47
|
+
invalid_names = [
|
48
|
+
"123field", # starts with digit
|
49
|
+
".field", # starts with dot
|
50
|
+
"-field", # starts with dash
|
51
|
+
" field", # starts with space
|
52
|
+
]
|
53
|
+
|
54
|
+
for name in invalid_names:
|
55
|
+
result = validate_identifier_name(name)
|
56
|
+
assert result is not None, (
|
57
|
+
f"Invalid name '{name}' should have failed validation"
|
58
|
+
)
|
59
|
+
|
60
|
+
def test_double_underscore_restriction(self) -> None:
|
61
|
+
"""Test double underscore restriction."""
|
62
|
+
invalid_names = ["__reserved", "__internal", "__test"]
|
63
|
+
|
64
|
+
for name in invalid_names:
|
65
|
+
result = validate_identifier_name(name)
|
66
|
+
assert result is not None
|
67
|
+
assert "double underscores" in result.lower()
|
68
|
+
|
69
|
+
def test_length_restriction(self) -> None:
|
70
|
+
"""Test maximum length restriction."""
|
71
|
+
long_name = "a" * 65
|
72
|
+
result = validate_identifier_name(long_name, max_length=64)
|
73
|
+
assert result is not None
|
74
|
+
assert "maximum length" in result.lower()
|
75
|
+
|
76
|
+
|
77
|
+
class TestSpecificValidators:
|
78
|
+
"""Test the specific validation functions."""
|
79
|
+
|
80
|
+
def test_valid_field_names(self) -> None:
|
81
|
+
"""Test valid field names."""
|
82
|
+
valid_names = ["field1", "field_name", "_private", "FIELD"]
|
83
|
+
for name in valid_names:
|
84
|
+
validate_field_name(name) # Should not raise
|
85
|
+
|
86
|
+
def test_invalid_field_names(self) -> None:
|
87
|
+
"""Test invalid field names raise NamingError."""
|
88
|
+
invalid_names = ["123field", "field-name", "__reserved", "a" * 65]
|
89
|
+
|
90
|
+
for name in invalid_names:
|
91
|
+
with pytest.raises(NamingError):
|
92
|
+
validate_field_name(name)
|
93
|
+
|
94
|
+
def test_flow_validation(self) -> None:
|
95
|
+
"""Test flow name validation."""
|
96
|
+
# Valid flow names
|
97
|
+
validate_flow_name("MyFlow")
|
98
|
+
validate_flow_name("my_flow_123")
|
99
|
+
|
100
|
+
# Invalid flow names
|
101
|
+
with pytest.raises(NamingError):
|
102
|
+
validate_flow_name("123flow")
|
103
|
+
|
104
|
+
with pytest.raises(NamingError):
|
105
|
+
validate_flow_name("__reserved_flow")
|
106
|
+
|
107
|
+
def test_full_flow_name_allows_dots(self) -> None:
|
108
|
+
"""Test that full flow names allow dots."""
|
109
|
+
validate_full_flow_name("app.my_flow")
|
110
|
+
validate_full_flow_name("namespace.subnamespace.flow")
|
111
|
+
|
112
|
+
# But still reject invalid patterns
|
113
|
+
with pytest.raises(NamingError):
|
114
|
+
validate_full_flow_name("123.invalid")
|
115
|
+
|
116
|
+
def test_target_validation(self) -> None:
|
117
|
+
"""Test target name validation."""
|
118
|
+
validate_target_name("my_target")
|
119
|
+
validate_target_name("output_table")
|
120
|
+
|
121
|
+
with pytest.raises(NamingError):
|
122
|
+
validate_target_name("123target")
|
123
|
+
|
124
|
+
def test_app_namespace_validation(self) -> None:
|
125
|
+
"""Test app namespace validation."""
|
126
|
+
validate_app_namespace_name("myapp")
|
127
|
+
validate_app_namespace_name("my_app_123")
|
128
|
+
|
129
|
+
# Should not allow dots in app namespace
|
130
|
+
with pytest.raises(NamingError):
|
131
|
+
validate_app_namespace_name("my.app")
|
132
|
+
|
133
|
+
with pytest.raises(NamingError):
|
134
|
+
validate_app_namespace_name("123app")
|
cocoindex/validation.py
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
"""
|
2
|
+
Naming validation for CocoIndex identifiers.
|
3
|
+
|
4
|
+
This module enforces naming conventions for flow names, field names,
|
5
|
+
target names, and app namespace names as specified in issue #779.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import re
|
9
|
+
from typing import Optional
|
10
|
+
|
11
|
+
_IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
|
12
|
+
_IDENTIFIER_WITH_DOTS_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_.]*$")
|
13
|
+
|
14
|
+
|
15
|
+
class NamingError(ValueError):
|
16
|
+
"""Exception raised for naming convention violations."""
|
17
|
+
|
18
|
+
pass
|
19
|
+
|
20
|
+
|
21
|
+
def validate_identifier_name(
|
22
|
+
name: str,
|
23
|
+
max_length: int = 64,
|
24
|
+
allow_dots: bool = False,
|
25
|
+
identifier_type: str = "identifier",
|
26
|
+
) -> Optional[str]:
|
27
|
+
"""
|
28
|
+
Validate identifier names according to CocoIndex naming rules.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
name: The name to validate
|
32
|
+
max_length: Maximum allowed length (default 64)
|
33
|
+
allow_dots: Whether to allow dots in the name (for full flow names)
|
34
|
+
identifier_type: Type of identifier for error messages
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
None if valid, error message string if invalid
|
38
|
+
"""
|
39
|
+
if not name:
|
40
|
+
return f"{identifier_type} name cannot be empty"
|
41
|
+
|
42
|
+
if len(name) > max_length:
|
43
|
+
return f"{identifier_type} name '{name}' exceeds maximum length of {max_length} characters"
|
44
|
+
|
45
|
+
if name.startswith("__"):
|
46
|
+
return f"{identifier_type} name '{name}' cannot start with double underscores (reserved for internal usage)"
|
47
|
+
|
48
|
+
# Define allowed pattern
|
49
|
+
if allow_dots:
|
50
|
+
pattern = _IDENTIFIER_WITH_DOTS_PATTERN
|
51
|
+
allowed_chars = "letters, digits, underscores, and dots"
|
52
|
+
else:
|
53
|
+
pattern = _IDENTIFIER_PATTERN
|
54
|
+
allowed_chars = "letters, digits, and underscores"
|
55
|
+
|
56
|
+
if not pattern.match(name):
|
57
|
+
return f"{identifier_type} name '{name}' must start with a letter or underscore and contain only {allowed_chars}"
|
58
|
+
|
59
|
+
return None
|
60
|
+
|
61
|
+
|
62
|
+
def validate_field_name(name: str) -> None:
|
63
|
+
"""Validate field names."""
|
64
|
+
error = validate_identifier_name(
|
65
|
+
name, max_length=64, allow_dots=False, identifier_type="Field"
|
66
|
+
)
|
67
|
+
if error:
|
68
|
+
raise NamingError(error)
|
69
|
+
|
70
|
+
|
71
|
+
def validate_flow_name(name: str) -> None:
|
72
|
+
"""Validate flow names."""
|
73
|
+
error = validate_identifier_name(
|
74
|
+
name, max_length=64, allow_dots=False, identifier_type="Flow"
|
75
|
+
)
|
76
|
+
if error:
|
77
|
+
raise NamingError(error)
|
78
|
+
|
79
|
+
|
80
|
+
def validate_full_flow_name(name: str) -> None:
|
81
|
+
"""Validate full flow names (can contain dots for namespacing)."""
|
82
|
+
error = validate_identifier_name(
|
83
|
+
name, max_length=64, allow_dots=True, identifier_type="Full flow"
|
84
|
+
)
|
85
|
+
if error:
|
86
|
+
raise NamingError(error)
|
87
|
+
|
88
|
+
|
89
|
+
def validate_app_namespace_name(name: str) -> None:
|
90
|
+
"""Validate app namespace names."""
|
91
|
+
error = validate_identifier_name(
|
92
|
+
name, max_length=64, allow_dots=False, identifier_type="App namespace"
|
93
|
+
)
|
94
|
+
if error:
|
95
|
+
raise NamingError(error)
|
96
|
+
|
97
|
+
|
98
|
+
def validate_target_name(name: str) -> None:
|
99
|
+
"""Validate target names."""
|
100
|
+
error = validate_identifier_name(
|
101
|
+
name, max_length=64, allow_dots=False, identifier_type="Target"
|
102
|
+
)
|
103
|
+
if error:
|
104
|
+
raise NamingError(error)
|
@@ -1,13 +1,13 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
4
|
-
cocoindex-0.1.
|
1
|
+
cocoindex-0.1.72.dist-info/METADATA,sha256=ztXY--5U2oGAPvJlh-fGpYTFcrhoAg3AhaOX78sZACI,11304
|
2
|
+
cocoindex-0.1.72.dist-info/WHEEL,sha256=tYU0YwShGV5a1IBa9K6b40tOXRGPW5TH008p3c7dncU,109
|
3
|
+
cocoindex-0.1.72.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.72.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
5
5
|
cocoindex/__init__.py,sha256=kfTgbh2haepo7kIbzJqfxU6Kx7wPol5_t1SYF2x6cBM,2114
|
6
|
-
cocoindex/_engine.cpython-311-aarch64-linux-gnu.so,sha256=
|
6
|
+
cocoindex/_engine.cpython-311-aarch64-linux-gnu.so,sha256=pqI5mLYD9u8j567vTY2GkyYkWSpIEzzLa7TI5hcWxp8,68480760
|
7
7
|
cocoindex/auth_registry.py,sha256=PE1-kVkcyC1G2C_V7b1kvYzeq73OFQehWKQP7ln7fJ8,1478
|
8
8
|
cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
|
9
9
|
cocoindex/convert.py,sha256=fOzfbMlQ8WQ_nAv8WpX-EEHdZdBV8QXV3qIe1_Ird_U,15806
|
10
|
-
cocoindex/flow.py,sha256=
|
10
|
+
cocoindex/flow.py,sha256=HN24rsihO3BkSYGnTtxgovgka2IobxhFuLmDlqw3fAk,36127
|
11
11
|
cocoindex/functions.py,sha256=LLu_ausirvqnsx_k3euZpv8sLCpBZ4DF77h2HOzbinE,3109
|
12
12
|
cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
|
13
13
|
cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
|
@@ -15,7 +15,7 @@ cocoindex/llm.py,sha256=WxmWUbNcf9HOCM5xkbDeFs9lF67M3mr810B7deDDc-8,673
|
|
15
15
|
cocoindex/op.py,sha256=Afi5CfgU3wPQoPPKFb2WUYCVLmCPhBuK-2NT1AzC2zU,13161
|
16
16
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
cocoindex/runtime.py,sha256=povilB3HH3y1JF-yxKwU-pD8n2WnAqyQxIgvXXHNc60,1080
|
18
|
-
cocoindex/setting.py,sha256=
|
18
|
+
cocoindex/setting.py,sha256=TwhQ6pEeZmvc8ZXlnT9d8Wn8Vz_u7Z5LJUkGsKmKSno,4859
|
19
19
|
cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
|
20
20
|
cocoindex/sources.py,sha256=69COA4qbZDipzGYfXv-WJSmicFkA509xIShRGDh6A0A,2083
|
21
21
|
cocoindex/targets.py,sha256=Nfh_tpFd1goTnS_cxBjIs4j9zl3Z4Z1JomAQ1dl3Sic,2796
|
@@ -23,6 +23,8 @@ cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
23
|
cocoindex/tests/test_convert.py,sha256=CnPDAK8QdzWTS9II-prbwIHeiq5htvRFhkfR8YdUE10,48960
|
24
24
|
cocoindex/tests/test_optional_database.py,sha256=snAmkNa6wtOSaxoZE1HgjvL5v_ylitt3Jt_9df4Cgdc,8506
|
25
25
|
cocoindex/tests/test_typing.py,sha256=NB4nUzoumOF_wGFa4D2Xf6d0bUVtOiSXyb78M1pYSG4,14827
|
26
|
+
cocoindex/tests/test_validation.py,sha256=X6AQzVs-hVKIXcrHMEMQnhfUE8at7iXQnPq8nHNhZ2Q,4543
|
26
27
|
cocoindex/typing.py,sha256=MO9HkrNpargvMPvpkd7jgSu2R-21KE_NaB9-WI4YOZA,13241
|
27
28
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
28
|
-
cocoindex
|
29
|
+
cocoindex/validation.py,sha256=PZnJoby4sLbsmPv9fOjOQXuefjfZ7gmtsiTGU8SH-tc,3090
|
30
|
+
cocoindex-0.1.72.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|