cocoindex 0.1.65__cp311-cp311-macosx_11_0_arm64.whl → 0.1.67__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +1 -0
- cocoindex/_engine.cpython-311-darwin.so +0 -0
- cocoindex/convert.py +47 -1
- cocoindex/functions.py +8 -13
- cocoindex/llm.py +12 -0
- cocoindex/sources.py +15 -0
- cocoindex/tests/test_convert.py +112 -0
- cocoindex/tests/test_typing.py +4 -1
- cocoindex/typing.py +13 -4
- {cocoindex-0.1.65.dist-info → cocoindex-0.1.67.dist-info}/METADATA +44 -18
- {cocoindex-0.1.65.dist-info → cocoindex-0.1.67.dist-info}/RECORD +14 -14
- {cocoindex-0.1.65.dist-info → cocoindex-0.1.67.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.65.dist-info → cocoindex-0.1.67.dist-info}/entry_points.txt +0 -0
- {cocoindex-0.1.65.dist-info → cocoindex-0.1.67.dist-info}/licenses/LICENSE +0 -0
cocoindex/__init__.py
CHANGED
Binary file
|
cocoindex/convert.py
CHANGED
@@ -89,13 +89,26 @@ def make_engine_value_decoder(
|
|
89
89
|
if dst_is_any:
|
90
90
|
if src_type_kind == "Union":
|
91
91
|
return lambda value: value[1]
|
92
|
-
if src_type_kind == "Struct"
|
92
|
+
if src_type_kind == "Struct":
|
93
|
+
return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"])
|
94
|
+
if src_type_kind in TABLE_TYPES:
|
93
95
|
raise ValueError(
|
94
96
|
f"Missing type annotation for `{''.join(field_path)}`."
|
95
97
|
f"It's required for {src_type_kind} type."
|
96
98
|
)
|
97
99
|
return lambda value: value
|
98
100
|
|
101
|
+
# Handle struct -> dict binding for explicit dict annotations
|
102
|
+
is_dict_annotation = False
|
103
|
+
if dst_annotation is dict:
|
104
|
+
is_dict_annotation = True
|
105
|
+
elif getattr(dst_annotation, "__origin__", None) is dict:
|
106
|
+
args = getattr(dst_annotation, "__args__", ())
|
107
|
+
if args == (str, Any):
|
108
|
+
is_dict_annotation = True
|
109
|
+
if is_dict_annotation and src_type_kind == "Struct":
|
110
|
+
return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"])
|
111
|
+
|
99
112
|
dst_type_info = analyze_type_info(dst_annotation)
|
100
113
|
|
101
114
|
if src_type_kind == "Union":
|
@@ -294,6 +307,39 @@ def _make_engine_struct_value_decoder(
|
|
294
307
|
)
|
295
308
|
|
296
309
|
|
310
|
+
def _make_engine_struct_to_dict_decoder(
|
311
|
+
field_path: list[str],
|
312
|
+
src_fields: list[dict[str, Any]],
|
313
|
+
) -> Callable[[list[Any] | None], dict[str, Any] | None]:
|
314
|
+
"""Make a decoder from engine field values to a Python dict."""
|
315
|
+
|
316
|
+
field_decoders = []
|
317
|
+
for i, field_schema in enumerate(src_fields):
|
318
|
+
field_name = field_schema["name"]
|
319
|
+
field_path.append(f".{field_name}")
|
320
|
+
field_decoder = make_engine_value_decoder(
|
321
|
+
field_path,
|
322
|
+
field_schema["type"],
|
323
|
+
Any, # Use Any for recursive decoding
|
324
|
+
)
|
325
|
+
field_path.pop()
|
326
|
+
field_decoders.append((field_name, field_decoder))
|
327
|
+
|
328
|
+
def decode_to_dict(values: list[Any] | None) -> dict[str, Any] | None:
|
329
|
+
if values is None:
|
330
|
+
return None
|
331
|
+
if len(field_decoders) != len(values):
|
332
|
+
raise ValueError(
|
333
|
+
f"Field count mismatch: expected {len(field_decoders)}, got {len(values)}"
|
334
|
+
)
|
335
|
+
return {
|
336
|
+
field_name: field_decoder(value)
|
337
|
+
for value, (field_name, field_decoder) in zip(values, field_decoders)
|
338
|
+
}
|
339
|
+
|
340
|
+
return decode_to_dict
|
341
|
+
|
342
|
+
|
297
343
|
def dump_engine_object(v: Any) -> Any:
|
298
344
|
"""Recursively dump an object for engine. Engine side uses `Pythonized` to catch."""
|
299
345
|
if v is None:
|
cocoindex/functions.py
CHANGED
@@ -9,14 +9,6 @@ from numpy.typing import NDArray
|
|
9
9
|
from . import llm, op
|
10
10
|
from .typing import TypeAttr, Vector
|
11
11
|
|
12
|
-
# Check if sentence_transformers is available
|
13
|
-
try:
|
14
|
-
import sentence_transformers # type: ignore
|
15
|
-
|
16
|
-
_SENTENCE_TRANSFORMERS_AVAILABLE = True
|
17
|
-
except ImportError:
|
18
|
-
_SENTENCE_TRANSFORMERS_AVAILABLE = False
|
19
|
-
|
20
12
|
|
21
13
|
class ParseJson(op.FunctionSpec):
|
22
14
|
"""Parse a text into a JSON object."""
|
@@ -45,6 +37,7 @@ class EmbedText(op.FunctionSpec):
|
|
45
37
|
address: str | None = None
|
46
38
|
output_dimension: int | None = None
|
47
39
|
task_type: str | None = None
|
40
|
+
api_config: llm.VertexAiConfig | None = None
|
48
41
|
|
49
42
|
|
50
43
|
class ExtractByLlm(op.FunctionSpec):
|
@@ -78,18 +71,19 @@ class SentenceTransformerEmbedExecutor:
|
|
78
71
|
"""Executor for SentenceTransformerEmbed."""
|
79
72
|
|
80
73
|
spec: SentenceTransformerEmbed
|
81
|
-
_model:
|
74
|
+
_model: Any | None = None
|
82
75
|
|
83
76
|
def analyze(self, text: Any) -> type:
|
84
|
-
|
77
|
+
try:
|
78
|
+
# Only import sentence_transformers locally when it's needed, as its import is very slow.
|
79
|
+
import sentence_transformers # pylint: disable=import-outside-toplevel
|
80
|
+
except ImportError as e:
|
85
81
|
raise ImportError(
|
86
82
|
"sentence_transformers is required for SentenceTransformerEmbed function. "
|
87
83
|
"Install it with one of these commands:\n"
|
88
84
|
" pip install 'cocoindex[embeddings]'\n"
|
89
85
|
" pip install sentence-transformers"
|
90
|
-
)
|
91
|
-
|
92
|
-
import sentence_transformers # pylint: disable=import-outside-toplevel
|
86
|
+
) from e
|
93
87
|
|
94
88
|
args = self.spec.args or {}
|
95
89
|
self._model = sentence_transformers.SentenceTransformer(self.spec.model, **args)
|
@@ -101,5 +95,6 @@ class SentenceTransformerEmbedExecutor:
|
|
101
95
|
return result
|
102
96
|
|
103
97
|
def __call__(self, text: str) -> NDArray[np.float32]:
|
98
|
+
assert self._model is not None
|
104
99
|
result: NDArray[np.float32] = self._model.encode(text, convert_to_numpy=True)
|
105
100
|
return result
|
cocoindex/llm.py
CHANGED
@@ -8,6 +8,7 @@ class LlmApiType(Enum):
|
|
8
8
|
OPENAI = "OpenAi"
|
9
9
|
OLLAMA = "Ollama"
|
10
10
|
GEMINI = "Gemini"
|
11
|
+
VERTEX_AI = "VertexAi"
|
11
12
|
ANTHROPIC = "Anthropic"
|
12
13
|
LITE_LLM = "LiteLlm"
|
13
14
|
OPEN_ROUTER = "OpenRouter"
|
@@ -15,6 +16,16 @@ class LlmApiType(Enum):
|
|
15
16
|
VLLM = "Vllm"
|
16
17
|
|
17
18
|
|
19
|
+
@dataclass
|
20
|
+
class VertexAiConfig:
|
21
|
+
"""A specification for a Vertex AI LLM."""
|
22
|
+
|
23
|
+
kind = "VertexAi"
|
24
|
+
|
25
|
+
project: str
|
26
|
+
region: str | None = None
|
27
|
+
|
28
|
+
|
18
29
|
@dataclass
|
19
30
|
class LlmSpec:
|
20
31
|
"""A specification for a LLM."""
|
@@ -22,3 +33,4 @@ class LlmSpec:
|
|
22
33
|
api_type: LlmApiType
|
23
34
|
model: str
|
24
35
|
address: str | None = None
|
36
|
+
api_config: VertexAiConfig | None = None
|
cocoindex/sources.py
CHANGED
@@ -43,3 +43,18 @@ class AmazonS3(op.SourceSpec):
|
|
43
43
|
included_patterns: list[str] | None = None
|
44
44
|
excluded_patterns: list[str] | None = None
|
45
45
|
sqs_queue_url: str | None = None
|
46
|
+
|
47
|
+
|
48
|
+
class AzureBlob(op.SourceSpec):
|
49
|
+
"""
|
50
|
+
Import data from an Azure Blob Storage container. Supports optional prefix and file filtering by glob patterns.
|
51
|
+
"""
|
52
|
+
|
53
|
+
_op_category = op.OpCategory.SOURCE
|
54
|
+
|
55
|
+
account_name: str
|
56
|
+
container_name: str
|
57
|
+
prefix: str | None = None
|
58
|
+
binary: bool = False
|
59
|
+
included_patterns: list[str] | None = None
|
60
|
+
excluded_patterns: list[str] | None = None
|
cocoindex/tests/test_convert.py
CHANGED
@@ -1229,3 +1229,115 @@ def test_full_roundtrip_scalar_with_python_types() -> None:
|
|
1229
1229
|
annotated_float=2.0,
|
1230
1230
|
)
|
1231
1231
|
validate_full_roundtrip(instance, MixedStruct)
|
1232
|
+
|
1233
|
+
|
1234
|
+
def test_roundtrip_struct_to_dict_binding() -> None:
|
1235
|
+
"""Test struct -> dict binding with Any annotation."""
|
1236
|
+
|
1237
|
+
@dataclass
|
1238
|
+
class SimpleStruct:
|
1239
|
+
name: str
|
1240
|
+
value: int
|
1241
|
+
price: float
|
1242
|
+
|
1243
|
+
instance = SimpleStruct("test", 42, 3.14)
|
1244
|
+
expected_dict = {"name": "test", "value": 42, "price": 3.14}
|
1245
|
+
|
1246
|
+
# Test Any annotation
|
1247
|
+
validate_full_roundtrip(instance, SimpleStruct, (expected_dict, Any))
|
1248
|
+
|
1249
|
+
|
1250
|
+
def test_roundtrip_struct_to_dict_explicit() -> None:
|
1251
|
+
"""Test struct -> dict binding with explicit dict annotations."""
|
1252
|
+
|
1253
|
+
@dataclass
|
1254
|
+
class Product:
|
1255
|
+
id: str
|
1256
|
+
name: str
|
1257
|
+
price: float
|
1258
|
+
active: bool
|
1259
|
+
|
1260
|
+
instance = Product("P1", "Widget", 29.99, True)
|
1261
|
+
expected_dict = {"id": "P1", "name": "Widget", "price": 29.99, "active": True}
|
1262
|
+
|
1263
|
+
# Test explicit dict annotations
|
1264
|
+
validate_full_roundtrip(
|
1265
|
+
instance, Product, (expected_dict, dict), (expected_dict, dict[str, Any])
|
1266
|
+
)
|
1267
|
+
|
1268
|
+
|
1269
|
+
def test_roundtrip_struct_to_dict_with_none_annotation() -> None:
|
1270
|
+
"""Test struct -> dict binding with None annotation."""
|
1271
|
+
|
1272
|
+
@dataclass
|
1273
|
+
class Config:
|
1274
|
+
host: str
|
1275
|
+
port: int
|
1276
|
+
debug: bool
|
1277
|
+
|
1278
|
+
instance = Config("localhost", 8080, True)
|
1279
|
+
expected_dict = {"host": "localhost", "port": 8080, "debug": True}
|
1280
|
+
|
1281
|
+
# Test None annotation (should be treated as Any)
|
1282
|
+
validate_full_roundtrip(instance, Config, (expected_dict, None))
|
1283
|
+
|
1284
|
+
|
1285
|
+
def test_roundtrip_struct_to_dict_nested() -> None:
|
1286
|
+
"""Test struct -> dict binding with nested structs."""
|
1287
|
+
|
1288
|
+
@dataclass
|
1289
|
+
class Address:
|
1290
|
+
street: str
|
1291
|
+
city: str
|
1292
|
+
|
1293
|
+
@dataclass
|
1294
|
+
class Person:
|
1295
|
+
name: str
|
1296
|
+
age: int
|
1297
|
+
address: Address
|
1298
|
+
|
1299
|
+
address = Address("123 Main St", "Anytown")
|
1300
|
+
person = Person("John", 30, address)
|
1301
|
+
expected_dict = {
|
1302
|
+
"name": "John",
|
1303
|
+
"age": 30,
|
1304
|
+
"address": {"street": "123 Main St", "city": "Anytown"},
|
1305
|
+
}
|
1306
|
+
|
1307
|
+
# Test nested struct conversion
|
1308
|
+
validate_full_roundtrip(person, Person, (expected_dict, dict[str, Any]))
|
1309
|
+
|
1310
|
+
|
1311
|
+
def test_roundtrip_struct_to_dict_with_list() -> None:
|
1312
|
+
"""Test struct -> dict binding with list fields."""
|
1313
|
+
|
1314
|
+
@dataclass
|
1315
|
+
class Team:
|
1316
|
+
name: str
|
1317
|
+
members: list[str]
|
1318
|
+
active: bool
|
1319
|
+
|
1320
|
+
instance = Team("Dev Team", ["Alice", "Bob", "Charlie"], True)
|
1321
|
+
expected_dict = {
|
1322
|
+
"name": "Dev Team",
|
1323
|
+
"members": ["Alice", "Bob", "Charlie"],
|
1324
|
+
"active": True,
|
1325
|
+
}
|
1326
|
+
|
1327
|
+
validate_full_roundtrip(instance, Team, (expected_dict, dict))
|
1328
|
+
|
1329
|
+
|
1330
|
+
def test_roundtrip_namedtuple_to_dict_binding() -> None:
|
1331
|
+
"""Test NamedTuple -> dict binding."""
|
1332
|
+
|
1333
|
+
class Point(NamedTuple):
|
1334
|
+
x: float
|
1335
|
+
y: float
|
1336
|
+
z: float
|
1337
|
+
|
1338
|
+
instance = Point(1.0, 2.0, 3.0)
|
1339
|
+
expected_dict = {"x": 1.0, "y": 2.0, "z": 3.0}
|
1340
|
+
|
1341
|
+
validate_full_roundtrip(
|
1342
|
+
instance, Point, (expected_dict, dict), (expected_dict, Any)
|
1343
|
+
)
|
cocoindex/tests/test_typing.py
CHANGED
@@ -539,5 +539,8 @@ def test_invalid_list_kind() -> None:
|
|
539
539
|
|
540
540
|
def test_unsupported_type() -> None:
|
541
541
|
typ = set
|
542
|
-
with pytest.raises(
|
542
|
+
with pytest.raises(
|
543
|
+
ValueError,
|
544
|
+
match="Unsupported as a specific type annotation for CocoIndex data type.*: <class 'set'>",
|
545
|
+
):
|
543
546
|
analyze_type_info(typ)
|
cocoindex/typing.py
CHANGED
@@ -168,7 +168,8 @@ class AnalyzedTypeInfo:
|
|
168
168
|
|
169
169
|
def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
170
170
|
"""
|
171
|
-
Analyze a Python type and
|
171
|
+
Analyze a Python type annotation and extract CocoIndex-specific type information.
|
172
|
+
Type annotations for specific CocoIndex types are expected. Raises ValueError for Any, empty, or untyped dict types.
|
172
173
|
"""
|
173
174
|
if isinstance(t, tuple) and len(t) == 2:
|
174
175
|
kt, vt = t
|
@@ -239,9 +240,15 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
239
240
|
_ = DtypeRegistry.validate_dtype_and_get_kind(elem_type)
|
240
241
|
vector_info = VectorInfo(dim=None) if vector_info is None else vector_info
|
241
242
|
|
242
|
-
elif base_type is collections.abc.Mapping or base_type is dict:
|
243
|
+
elif base_type is collections.abc.Mapping or base_type is dict or t is dict:
|
243
244
|
args = typing.get_args(t)
|
244
|
-
|
245
|
+
if len(args) == 0: # Handle untyped dict
|
246
|
+
raise ValueError(
|
247
|
+
"Untyped dict is not accepted as a specific type annotation; please provide a concrete type, "
|
248
|
+
"e.g. a dataclass or namedtuple for Struct types, a dict[str, T] for KTable types."
|
249
|
+
)
|
250
|
+
else:
|
251
|
+
elem_type = (args[0], args[1])
|
245
252
|
kind = "KTable"
|
246
253
|
elif base_type in (types.UnionType, typing.Union):
|
247
254
|
possible_types = typing.get_args(t)
|
@@ -283,7 +290,9 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo:
|
|
283
290
|
elif t is datetime.timedelta:
|
284
291
|
kind = "TimeDelta"
|
285
292
|
else:
|
286
|
-
raise ValueError(
|
293
|
+
raise ValueError(
|
294
|
+
f"Unsupported as a specific type annotation for CocoIndex data type (https://cocoindex.io/docs/core/data_types): {t}"
|
295
|
+
)
|
287
296
|
|
288
297
|
return AnalyzedTypeInfo(
|
289
298
|
kind=kind,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.67
|
4
4
|
Requires-Dist: click>=8.1.8
|
5
5
|
Requires-Dist: rich>=14.0.0
|
6
6
|
Requires-Dist: python-dotenv>=1.1.0
|
@@ -10,9 +10,11 @@ Requires-Dist: pytest ; extra == 'test'
|
|
10
10
|
Requires-Dist: ruff ; extra == 'dev'
|
11
11
|
Requires-Dist: pre-commit ; extra == 'dev'
|
12
12
|
Requires-Dist: sentence-transformers>=3.3.1 ; extra == 'embeddings'
|
13
|
+
Requires-Dist: cocoindex[embeddings] ; extra == 'all'
|
13
14
|
Provides-Extra: test
|
14
15
|
Provides-Extra: dev
|
15
16
|
Provides-Extra: embeddings
|
17
|
+
Provides-Extra: all
|
16
18
|
License-File: LICENSE
|
17
19
|
Summary: With CocoIndex, users declare the transformation, CocoIndex creates & maintains an index, and keeps the derived index up to date based on source update, with minimal computation and changes.
|
18
20
|
Author-email: CocoIndex <cocoindex.io@gmail.com>
|
@@ -25,7 +27,7 @@ Project-URL: Homepage, https://cocoindex.io/
|
|
25
27
|
<img src="https://cocoindex.io/images/github.svg" alt="CocoIndex">
|
26
28
|
</p>
|
27
29
|
|
28
|
-
<
|
30
|
+
<h1 align="center">Data transformation for AI</h1>
|
29
31
|
|
30
32
|
<div align="center">
|
31
33
|
|
@@ -40,18 +42,32 @@ Project-URL: Homepage, https://cocoindex.io/
|
|
40
42
|
[](https://discord.com/invite/zpA9S2DR7s)
|
41
43
|
</div>
|
42
44
|
|
43
|
-
|
45
|
+
Ultra performant data transformation framework for AI, with core engine written in Rust. Support incremental processing and data lineage out-of-box. Exceptional developer velocity. Production-ready at day 0.
|
46
|
+
|
47
|
+
⭐ Drop a star to help us grow!
|
48
|
+
|
49
|
+
</br>
|
50
|
+
|
51
|
+
<p align="center">
|
52
|
+
<img src="https://cocoindex.io/images/transformation.svg" alt="CocoIndex Transformation">
|
53
|
+
</p>
|
54
|
+
|
55
|
+
</br>
|
56
|
+
|
57
|
+
CocoIndex makes it super easy to transform data with AI workloads, and keep source data and target in sync effortlessly.
|
58
|
+
|
59
|
+
</br>
|
44
60
|
|
45
61
|
<p align="center">
|
46
|
-
<img src="https://cocoindex.io/images/
|
62
|
+
<img src="https://cocoindex.io/images/venn-features.png" alt="CocoIndex Features" width='480'>
|
47
63
|
</p>
|
48
64
|
|
49
|
-
|
65
|
+
</br>
|
50
66
|
|
51
|
-
|
52
|
-
Unlike a workflow orchestration framework where data is usually opaque, in CocoIndex, data and data operations are first class citizens. CocoIndex follows the idea of [Dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) programming model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
|
67
|
+
Either creating embedding, building knowledge graphs, or any data transformations - beyond traditional SQL.
|
53
68
|
|
54
|
-
|
69
|
+
## Exceptional velocity
|
70
|
+
Just declare transformation in dataflow with ~100 lines of python
|
55
71
|
|
56
72
|
```python
|
57
73
|
# import
|
@@ -69,19 +85,27 @@ collector.collect(...)
|
|
69
85
|
collector.export(...)
|
70
86
|
```
|
71
87
|
|
72
|
-
|
73
|
-
|
88
|
+
CocoIndex follows the idea of [Dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) programming model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
|
89
|
+
|
90
|
+
**Particularly**, developers don't explicitly mutate data by creating, updating and deleting. They just need to define transformation/formula for a set of source data.
|
91
|
+
|
92
|
+
## Build like LEGO
|
93
|
+
Native builtins for different source, targets and transformations. Standardize interface, make it 1-line code switch between different components.
|
74
94
|
|
75
95
|
<p align="center">
|
76
|
-
<img src="https://
|
96
|
+
<img src="https://cocoindex.io/images/components.svg" alt="CocoIndex Features">
|
77
97
|
</p>
|
78
98
|
|
79
|
-
|
80
|
-
|
81
|
-
- Figure out what exactly needs to be updated, and only updating that without having to recompute everything.
|
99
|
+
## Data Freshness
|
100
|
+
CocoIndex keep source data and target in sync effortlessly.
|
82
101
|
|
83
|
-
|
102
|
+
<p align="center">
|
103
|
+
<img src="https://github.com/user-attachments/assets/f4eb29b3-84ee-4fa0-a1e2-80eedeeabde6" alt="Incremental Processing" width="700">
|
104
|
+
</p>
|
84
105
|
|
106
|
+
It has out-of-box support for incremental indexing:
|
107
|
+
- minimal recomputation on source or logic change.
|
108
|
+
- (re-)processing necessary portions; reuse cache when possible
|
85
109
|
|
86
110
|
## Quick Start:
|
87
111
|
If you're new to CocoIndex, we recommend checking out
|
@@ -100,7 +124,7 @@ pip install -U cocoindex
|
|
100
124
|
2. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. CocoIndex uses it for incremental processing.
|
101
125
|
|
102
126
|
|
103
|
-
|
127
|
+
## Define data flow
|
104
128
|
|
105
129
|
Follow [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart) to define your first indexing flow. An example flow looks like:
|
106
130
|
|
@@ -144,8 +168,9 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
|
|
144
168
|
|
145
169
|
It defines an index flow like this:
|
146
170
|
|
147
|
-
<
|
148
|
-
|
171
|
+
<p align="center">
|
172
|
+
<img width="400" alt="Data Flow" src="https://github.com/user-attachments/assets/2ea7be6d-3d94-42b1-b2bd-22515577e463" />
|
173
|
+
</p>
|
149
174
|
|
150
175
|
## 🚀 Examples and demo
|
151
176
|
|
@@ -156,6 +181,7 @@ It defines an index flow like this:
|
|
156
181
|
| [PDF Embedding](examples/pdf_embedding) | Parse PDF and index text embeddings for semantic search |
|
157
182
|
| [Manuals LLM Extraction](examples/manuals_llm_extraction) | Extract structured information from a manual using LLM |
|
158
183
|
| [Amazon S3 Embedding](examples/amazon_s3_embedding) | Index text documents from Amazon S3 |
|
184
|
+
| [Azure Blob Storage Embedding](examples/azure_blob_embedding) | Index text documents from Azure Blob Storage |
|
159
185
|
| [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
|
160
186
|
| [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
|
161
187
|
| [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
|
@@ -1,28 +1,28 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
4
|
-
cocoindex-0.1.
|
5
|
-
cocoindex/__init__.py,sha256=
|
6
|
-
cocoindex/_engine.cpython-311-darwin.so,sha256=
|
1
|
+
cocoindex-0.1.67.dist-info/METADATA,sha256=SJIeiMwnbmOKiJSenH-DSoPatDM3bh6SD7B08GdI5Xs,10172
|
2
|
+
cocoindex-0.1.67.dist-info/WHEEL,sha256=4POUqOUvk-fNEqEa1NBlmMsgWQGl6FnEg9vsbsvEmNM,104
|
3
|
+
cocoindex-0.1.67.dist-info/entry_points.txt,sha256=_NretjYVzBdNTn7dK-zgwr7YfG2afz1u1uSE-5bZXF8,46
|
4
|
+
cocoindex-0.1.67.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
5
|
+
cocoindex/__init__.py,sha256=nr-W-LX0e9EX6ZVMbf0MRF0EhtntqADOdwc9F2nwfWw,2004
|
6
|
+
cocoindex/_engine.cpython-311-darwin.so,sha256=mf_fCuZmCZHt4cNr2XLmnjOL9KpZgIoPDa2qJ9XpfFc,61493152
|
7
7
|
cocoindex/auth_registry.py,sha256=1XqO7ibjmBBd8i11XSJTvTgdz8p1ptW-ZpuSgo_5zzk,716
|
8
8
|
cocoindex/cli.py,sha256=-gp639JSyQN6YjnhGqCakIzYoSSqXxQMbxbkcYGP0QY,22359
|
9
|
-
cocoindex/convert.py,sha256=
|
9
|
+
cocoindex/convert.py,sha256=RYfRUungabr-dHakG4k2kDvYambxHFljAmTuPQeQths,13117
|
10
10
|
cocoindex/flow.py,sha256=MFPtfJBVTjQ56d7vUn2LvtY30Vg4q2rY6nqvjjJL1kQ,35085
|
11
|
-
cocoindex/functions.py,sha256=
|
11
|
+
cocoindex/functions.py,sha256=Ih1rtaTvZzQ2wucCOSpzSUu2-eu0mgtBVi1mh9M-Buw,3162
|
12
12
|
cocoindex/index.py,sha256=j93B9jEvvLXHtpzKWL88SY6wCGEoPgpsQhEGHlyYGFg,540
|
13
13
|
cocoindex/lib.py,sha256=f--9dAYd84CZosbDZqNW0oGbBLsY3dXiUTR1VrfQ_QY,817
|
14
|
-
cocoindex/llm.py,sha256=
|
14
|
+
cocoindex/llm.py,sha256=WxmWUbNcf9HOCM5xkbDeFs9lF67M3mr810B7deDDc-8,673
|
15
15
|
cocoindex/op.py,sha256=r_Usx7Jqh49Cck3tsYLx2vLRNUZArkQP_g7bIID6LPU,11809
|
16
16
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
cocoindex/runtime.py,sha256=bAdHYaXFWiiUWyAgzmKTeaAaRR0D_AmaqVCIdPO-v00,1056
|
18
18
|
cocoindex/setting.py,sha256=ADuv7RaWd9k-m3V0Cfy2jmaCt6DupJCviWdOm0CTiVw,4734
|
19
19
|
cocoindex/setup.py,sha256=7uIHKN4FOCuoidPXcKyGTrkqpkl9luL49-6UcnMxYzw,3068
|
20
|
-
cocoindex/sources.py,sha256=
|
20
|
+
cocoindex/sources.py,sha256=8MR_oyr7t0m-gUFq7FO6HHM-tDLmQSBAjheFXJzRd8g,1733
|
21
21
|
cocoindex/targets.py,sha256=Nfh_tpFd1goTnS_cxBjIs4j9zl3Z4Z1JomAQ1dl3Sic,2796
|
22
22
|
cocoindex/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
cocoindex/tests/test_convert.py,sha256=
|
23
|
+
cocoindex/tests/test_convert.py,sha256=48-fnWKv02gmFETV3b-8IC89SKMUZfJLEH-ucRtXGuI,45450
|
24
24
|
cocoindex/tests/test_optional_database.py,sha256=snAmkNa6wtOSaxoZE1HgjvL5v_ylitt3Jt_9df4Cgdc,8506
|
25
|
-
cocoindex/tests/test_typing.py,sha256=
|
26
|
-
cocoindex/typing.py,sha256=
|
25
|
+
cocoindex/tests/test_typing.py,sha256=NB4nUzoumOF_wGFa4D2Xf6d0bUVtOiSXyb78M1pYSG4,14827
|
26
|
+
cocoindex/typing.py,sha256=MO9HkrNpargvMPvpkd7jgSu2R-21KE_NaB9-WI4YOZA,13241
|
27
27
|
cocoindex/utils.py,sha256=hUhX-XV6XGCtJSEIpBOuDv6VvqImwPlgBxztBTw7u0U,598
|
28
|
-
cocoindex-0.1.
|
28
|
+
cocoindex-0.1.67.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|