cocoindex 0.1.37__cp311-cp311-win_amd64.whl → 0.1.39__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +3 -2
- cocoindex/_engine.cp311-win_amd64.pyd +0 -0
- cocoindex/cli.py +4 -4
- cocoindex/convert.py +1 -1
- cocoindex/flow.py +131 -34
- cocoindex/lib.py +1 -0
- cocoindex/op.py +12 -12
- cocoindex/query.py +1 -1
- cocoindex/setting.py +24 -1
- cocoindex/setup.py +8 -2
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.39.dist-info}/METADATA +2 -1
- cocoindex-0.1.39.dist-info/RECORD +25 -0
- cocoindex-0.1.37.dist-info/RECORD +0 -25
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.39.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.39.dist-info}/licenses/LICENSE +0 -0
cocoindex/__init__.py
CHANGED
@@ -2,13 +2,14 @@
|
|
2
2
|
Cocoindex is a framework for building and running indexing pipelines.
|
3
3
|
"""
|
4
4
|
from . import functions, query, sources, storages, cli
|
5
|
-
from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def
|
5
|
+
from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def, transform_flow
|
6
6
|
from .flow import EvaluateAndDumpOptions, GeneratedField
|
7
7
|
from .flow import update_all_flows_async, FlowLiveUpdater, FlowLiveUpdaterOptions
|
8
8
|
from .llm import LlmSpec, LlmApiType
|
9
9
|
from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
|
10
10
|
from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
|
11
11
|
from .lib import *
|
12
|
-
from .setting import
|
12
|
+
from .setting import DatabaseConnectionSpec, Settings, ServerSettings
|
13
|
+
from .setting import get_app_namespace
|
13
14
|
from ._engine import OpArgSchema
|
14
15
|
from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
|
Binary file
|
cocoindex/cli.py
CHANGED
@@ -16,7 +16,7 @@ def cli():
|
|
16
16
|
@cli.command()
|
17
17
|
@click.option(
|
18
18
|
"-a", "--all", "show_all", is_flag=True, show_default=True, default=False,
|
19
|
-
help="Also show all flows with persisted setup, even if not defined in the current process.")
|
19
|
+
help="Also show all flows with persisted setup under the current app namespace, even if not defined in the current process.")
|
20
20
|
def ls(show_all: bool):
|
21
21
|
"""
|
22
22
|
List all flows.
|
@@ -65,7 +65,7 @@ def show(flow_name: str | None, color: bool, verbose: bool):
|
|
65
65
|
|
66
66
|
console.print()
|
67
67
|
table = Table(
|
68
|
-
title=f"Schema for Flow: {flow.
|
68
|
+
title=f"Schema for Flow: {flow.full_name}",
|
69
69
|
show_header=True,
|
70
70
|
header_style="bold magenta"
|
71
71
|
)
|
@@ -108,7 +108,7 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
|
|
108
108
|
if drop_all:
|
109
109
|
flow_names = flow_names_with_setup()
|
110
110
|
elif len(flow_name) == 0:
|
111
|
-
flow_names =
|
111
|
+
flow_names = flow.flow_names()
|
112
112
|
else:
|
113
113
|
flow_names = list(flow_name)
|
114
114
|
setup_status = drop_setup(flow_names)
|
@@ -160,7 +160,7 @@ def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
|
|
160
160
|
"""
|
161
161
|
fl = _flow_by_name(flow_name)
|
162
162
|
if output_dir is None:
|
163
|
-
output_dir = f"eval_{
|
163
|
+
output_dir = f"eval_{setting.get_app_namespace(trailing_delimiter='_')}{flow_name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
|
164
164
|
options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
|
165
165
|
fl.evaluate_and_dump(options)
|
166
166
|
|
cocoindex/convert.py
CHANGED
@@ -44,7 +44,7 @@ def make_engine_value_decoder(
|
|
44
44
|
|
45
45
|
src_type_kind = src_type['kind']
|
46
46
|
|
47
|
-
if dst_annotation is inspect.Parameter.empty:
|
47
|
+
if dst_annotation is None or dst_annotation is inspect.Parameter.empty or dst_annotation is Any:
|
48
48
|
if src_type_kind == 'Struct' or src_type_kind in TABLE_TYPES:
|
49
49
|
raise ValueError(f"Missing type annotation for `{''.join(field_path)}`."
|
50
50
|
f"It's required for {src_type_kind} type.")
|
cocoindex/flow.py
CHANGED
@@ -8,8 +8,9 @@ import asyncio
|
|
8
8
|
import re
|
9
9
|
import inspect
|
10
10
|
import datetime
|
11
|
+
import functools
|
11
12
|
|
12
|
-
from typing import Any, Callable, Sequence, TypeVar
|
13
|
+
from typing import Any, Callable, Sequence, TypeVar, Generic, get_args, get_origin, Type, NamedTuple
|
13
14
|
from threading import Lock
|
14
15
|
from enum import Enum
|
15
16
|
from dataclasses import dataclass
|
@@ -19,7 +20,8 @@ from rich.tree import Tree
|
|
19
20
|
from . import _engine
|
20
21
|
from . import index
|
21
22
|
from . import op
|
22
|
-
from .
|
23
|
+
from . import setting
|
24
|
+
from .convert import dump_engine_object, encode_engine_value, make_engine_value_decoder
|
23
25
|
from .typing import encode_enriched_type
|
24
26
|
from .runtime import execution_context
|
25
27
|
|
@@ -122,7 +124,7 @@ class _DataSliceState:
|
|
122
124
|
# TODO: We'll support this by an identity transformer or "aliasing" in the future.
|
123
125
|
raise ValueError("DataSlice is already attached to a field")
|
124
126
|
|
125
|
-
class DataSlice:
|
127
|
+
class DataSlice(Generic[T]):
|
126
128
|
"""A data slice represents a slice of data in a flow. It's readonly."""
|
127
129
|
|
128
130
|
_state: _DataSliceState
|
@@ -182,11 +184,11 @@ class DataSlice:
|
|
182
184
|
name, prefix=_to_snake_case(_spec_kind(fn_spec))+'_'),
|
183
185
|
))
|
184
186
|
|
185
|
-
def call(self, func: Callable[[DataSlice], T]) -> T:
|
187
|
+
def call(self, func: Callable[[DataSlice], T], *args, **kwargs) -> T:
|
186
188
|
"""
|
187
189
|
Call a function with the data slice.
|
188
190
|
"""
|
189
|
-
return func(self)
|
191
|
+
return func(self, *args, **kwargs)
|
190
192
|
|
191
193
|
def _data_slice_state(data_slice: DataSlice) -> _DataSliceState:
|
192
194
|
return data_slice._state # pylint: disable=protected-access
|
@@ -310,7 +312,7 @@ class _FlowBuilderState:
|
|
310
312
|
|
311
313
|
def __init__(self, /, name: str | None = None):
|
312
314
|
flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
|
313
|
-
self.engine_flow_builder = _engine.FlowBuilder(flow_name)
|
315
|
+
self.engine_flow_builder = _engine.FlowBuilder(get_full_flow_name(flow_name))
|
314
316
|
self.field_name_builder = _NameBuilder()
|
315
317
|
|
316
318
|
def get_data_slice(self, v: Any) -> _engine.DataSlice:
|
@@ -481,7 +483,7 @@ class Flow:
|
|
481
483
|
Render the flow spec as a styled rich Tree with hierarchical structure.
|
482
484
|
"""
|
483
485
|
spec = self._get_spec(verbose=verbose)
|
484
|
-
tree = Tree(f"Flow: {self.
|
486
|
+
tree = Tree(f"Flow: {self.full_name}", style="cyan")
|
485
487
|
|
486
488
|
def build_tree(label: str, lines: list):
|
487
489
|
node = Tree(label, style="bold magenta" if lines else "cyan")
|
@@ -508,9 +510,9 @@ class Flow:
|
|
508
510
|
return repr(self._lazy_engine_flow())
|
509
511
|
|
510
512
|
@property
|
511
|
-
def
|
513
|
+
def full_name(self) -> str:
|
512
514
|
"""
|
513
|
-
Get the name of the flow.
|
515
|
+
Get the full name of the flow.
|
514
516
|
"""
|
515
517
|
return self._lazy_engine_flow().name()
|
516
518
|
|
@@ -566,8 +568,16 @@ def _create_lazy_flow(name: str | None, fl_def: Callable[[FlowBuilder, DataScope
|
|
566
568
|
_flows_lock = Lock()
|
567
569
|
_flows: dict[str, Flow] = {}
|
568
570
|
|
571
|
+
def get_full_flow_name(name: str) -> str:
|
572
|
+
"""
|
573
|
+
Get the full name of a flow.
|
574
|
+
"""
|
575
|
+
return f"{setting.get_app_namespace(trailing_delimiter='.')}{name}"
|
576
|
+
|
569
577
|
def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
|
570
578
|
"""Add a flow definition to the cocoindex library."""
|
579
|
+
if not all(c.isalnum() or c == '_' for c in name):
|
580
|
+
raise ValueError(f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed.")
|
571
581
|
with _flows_lock:
|
572
582
|
if name in _flows:
|
573
583
|
raise KeyError(f"Flow with name {name} already exists")
|
@@ -587,12 +597,12 @@ def flow_names() -> list[str]:
|
|
587
597
|
with _flows_lock:
|
588
598
|
return list(_flows.keys())
|
589
599
|
|
590
|
-
def flows() ->
|
600
|
+
def flows() -> dict[str, Flow]:
|
591
601
|
"""
|
592
602
|
Get all flows.
|
593
603
|
"""
|
594
604
|
with _flows_lock:
|
595
|
-
return
|
605
|
+
return dict(_flows)
|
596
606
|
|
597
607
|
def flow_by_name(name: str) -> Flow:
|
598
608
|
"""
|
@@ -605,14 +615,13 @@ def ensure_all_flows_built() -> None:
|
|
605
615
|
"""
|
606
616
|
Ensure all flows are built.
|
607
617
|
"""
|
608
|
-
|
609
|
-
fl.internal_flow()
|
618
|
+
execution_context.run(ensure_all_flows_built_async())
|
610
619
|
|
611
620
|
async def ensure_all_flows_built_async() -> None:
|
612
621
|
"""
|
613
622
|
Ensure all flows are built.
|
614
623
|
"""
|
615
|
-
for fl in flows():
|
624
|
+
for fl in flows().values():
|
616
625
|
await fl.internal_flow_async()
|
617
626
|
|
618
627
|
def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
|
@@ -626,35 +635,75 @@ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _
|
|
626
635
|
Update all flows.
|
627
636
|
"""
|
628
637
|
await ensure_all_flows_built_async()
|
629
|
-
async def _update_flow(fl: Flow) -> _engine.IndexUpdateInfo:
|
638
|
+
async def _update_flow(name: str, fl: Flow) -> tuple[str, _engine.IndexUpdateInfo]:
|
630
639
|
async with FlowLiveUpdater(fl, options) as updater:
|
631
640
|
await updater.wait_async()
|
632
|
-
return updater.update_stats()
|
641
|
+
return (name, updater.update_stats())
|
633
642
|
fls = flows()
|
634
|
-
all_stats = await asyncio.gather(*(_update_flow(fl) for fl in fls))
|
635
|
-
return
|
643
|
+
all_stats = await asyncio.gather(*(_update_flow(name, fl) for (name, fl) in fls.items()))
|
644
|
+
return dict(all_stats)
|
645
|
+
|
646
|
+
def _get_data_slice_annotation_type(data_slice_type: Type[DataSlice[T]]) -> Type[T] | None:
|
647
|
+
type_args = get_args(data_slice_type)
|
648
|
+
if data_slice_type is DataSlice:
|
649
|
+
return None
|
650
|
+
if get_origin(data_slice_type) != DataSlice or len(type_args) != 1:
|
651
|
+
raise ValueError(f"Expect a DataSlice[T] type, but got {data_slice_type}")
|
652
|
+
return type_args[0]
|
636
653
|
|
637
|
-
|
638
|
-
|
654
|
+
_transform_flow_name_builder = _NameBuilder()
|
655
|
+
|
656
|
+
class TransformFlowInfo(NamedTuple):
|
657
|
+
engine_flow: _engine.TransientFlow
|
658
|
+
result_decoder: Callable[[Any], T]
|
659
|
+
|
660
|
+
class TransformFlow(Generic[T]):
|
639
661
|
"""
|
640
662
|
A transient transformation flow that transforms in-memory data.
|
641
663
|
"""
|
642
|
-
|
664
|
+
_flow_fn: Callable[..., DataSlice[T]]
|
665
|
+
_flow_name: str
|
666
|
+
_flow_arg_types: list[Any]
|
667
|
+
_param_names: list[str]
|
668
|
+
|
669
|
+
_lazy_lock: asyncio.Lock
|
670
|
+
_lazy_flow_info: TransformFlowInfo | None = None
|
643
671
|
|
644
672
|
def __init__(
|
645
|
-
self, flow_fn: Callable[..., DataSlice],
|
673
|
+
self, flow_fn: Callable[..., DataSlice[T]],
|
646
674
|
flow_arg_types: Sequence[Any], /, name: str | None = None):
|
675
|
+
self._flow_fn = flow_fn
|
676
|
+
self._flow_name = _transform_flow_name_builder.build_name(name, prefix="_transform_flow_")
|
677
|
+
self._flow_arg_types = list(flow_arg_types)
|
678
|
+
self._lazy_lock = asyncio.Lock()
|
647
679
|
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
680
|
+
def __call__(self, *args, **kwargs) -> DataSlice[T]:
|
681
|
+
return self._flow_fn(*args, **kwargs)
|
682
|
+
|
683
|
+
@property
|
684
|
+
def _flow_info(self) -> TransformFlowInfo:
|
685
|
+
if self._lazy_flow_info is not None:
|
686
|
+
return self._lazy_flow_info
|
687
|
+
return execution_context.run(self._flow_info_async())
|
688
|
+
|
689
|
+
async def _flow_info_async(self) -> TransformFlowInfo:
|
690
|
+
if self._lazy_flow_info is not None:
|
691
|
+
return self._lazy_flow_info
|
692
|
+
async with self._lazy_lock:
|
693
|
+
if self._lazy_flow_info is None:
|
694
|
+
self._lazy_flow_info = await self._build_flow_info_async()
|
695
|
+
return self._lazy_flow_info
|
696
|
+
|
697
|
+
async def _build_flow_info_async(self) -> TransformFlowInfo:
|
698
|
+
flow_builder_state = _FlowBuilderState(name=self._flow_name)
|
699
|
+
sig = inspect.signature(self._flow_fn)
|
700
|
+
if len(sig.parameters) != len(self._flow_arg_types):
|
652
701
|
raise ValueError(
|
653
702
|
f"Number of parameters in the flow function ({len(sig.parameters)}) "
|
654
|
-
"does not match the number of argument types ({len(
|
703
|
+
f"does not match the number of argument types ({len(self._flow_arg_types)})")
|
655
704
|
|
656
705
|
kwargs: dict[str, DataSlice] = {}
|
657
|
-
for (param_name, param), param_type in zip(sig.parameters.items(),
|
706
|
+
for (param_name, param), param_type in zip(sig.parameters.items(), self._flow_arg_types):
|
658
707
|
if param.kind not in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
659
708
|
inspect.Parameter.KEYWORD_ONLY):
|
660
709
|
raise ValueError(f"Parameter {param_name} is not a parameter can be passed by name")
|
@@ -662,20 +711,68 @@ class TransientFlow:
|
|
662
711
|
param_name, encode_enriched_type(param_type))
|
663
712
|
kwargs[param_name] = DataSlice(_DataSliceState(flow_builder_state, engine_ds))
|
664
713
|
|
665
|
-
output =
|
714
|
+
output = self._flow_fn(**kwargs)
|
666
715
|
flow_builder_state.engine_flow_builder.set_direct_output(
|
667
716
|
_data_slice_state(output).engine_data_slice)
|
668
|
-
|
669
|
-
|
717
|
+
engine_flow = await flow_builder_state.engine_flow_builder.build_transient_flow_async(execution_context.event_loop)
|
718
|
+
self._param_names = list(sig.parameters.keys())
|
719
|
+
|
720
|
+
engine_return_type = _data_slice_state(output).engine_data_slice.data_type().schema()
|
721
|
+
python_return_type = _get_data_slice_annotation_type(sig.return_annotation)
|
722
|
+
result_decoder = make_engine_value_decoder([], engine_return_type['type'], python_return_type)
|
723
|
+
|
724
|
+
return TransformFlowInfo(engine_flow, result_decoder)
|
670
725
|
|
671
726
|
def __str__(self):
|
672
|
-
return str(self.
|
727
|
+
return str(self._flow_info.engine_flow)
|
673
728
|
|
674
729
|
def __repr__(self):
|
675
|
-
return repr(self.
|
730
|
+
return repr(self._flow_info.engine_flow)
|
676
731
|
|
677
732
|
def internal_flow(self) -> _engine.TransientFlow:
|
678
733
|
"""
|
679
734
|
Get the internal flow.
|
680
735
|
"""
|
681
|
-
return self.
|
736
|
+
return self._flow_info.engine_flow
|
737
|
+
|
738
|
+
def eval(self, *args, **kwargs) -> T:
|
739
|
+
"""
|
740
|
+
Evaluate the transform flow.
|
741
|
+
"""
|
742
|
+
return execution_context.run(self.eval_async(*args, **kwargs))
|
743
|
+
|
744
|
+
async def eval_async(self, *args, **kwargs) -> T:
|
745
|
+
"""
|
746
|
+
Evaluate the transform flow.
|
747
|
+
"""
|
748
|
+
flow_info = await self._flow_info_async()
|
749
|
+
params = []
|
750
|
+
for i, arg in enumerate(self._param_names):
|
751
|
+
if i < len(args):
|
752
|
+
params.append(encode_engine_value(args[i]))
|
753
|
+
elif arg in kwargs:
|
754
|
+
params.append(encode_engine_value(kwargs[arg]))
|
755
|
+
else:
|
756
|
+
raise ValueError(f"Parameter {arg} is not provided")
|
757
|
+
engine_result = await flow_info.engine_flow.evaluate_async(params)
|
758
|
+
return flow_info.result_decoder(engine_result)
|
759
|
+
|
760
|
+
|
761
|
+
def transform_flow() -> Callable[[Callable[..., DataSlice[T]]], TransformFlow[T]]:
|
762
|
+
"""
|
763
|
+
A decorator to wrap the transform function.
|
764
|
+
"""
|
765
|
+
def _transform_flow_wrapper(fn: Callable[..., DataSlice[T]]):
|
766
|
+
sig = inspect.signature(fn)
|
767
|
+
arg_types = []
|
768
|
+
for (param_name, param) in sig.parameters.items():
|
769
|
+
if param.kind not in (inspect.Parameter.POSITIONAL_OR_KEYWORD,
|
770
|
+
inspect.Parameter.KEYWORD_ONLY):
|
771
|
+
raise ValueError(f"Parameter {param_name} is not a parameter can be passed by name")
|
772
|
+
arg_types.append(_get_data_slice_annotation_type(param.annotation))
|
773
|
+
|
774
|
+
_transform_flow = TransformFlow(fn, arg_types)
|
775
|
+
functools.update_wrapper(_transform_flow, fn)
|
776
|
+
return _transform_flow
|
777
|
+
|
778
|
+
return _transform_flow_wrapper
|
cocoindex/lib.py
CHANGED
@@ -15,6 +15,7 @@ from .convert import dump_engine_object
|
|
15
15
|
def init(settings: setting.Settings):
|
16
16
|
"""Initialize the cocoindex library."""
|
17
17
|
_engine.init(dump_engine_object(settings))
|
18
|
+
setting.set_app_namespace(settings.app_namespace)
|
18
19
|
|
19
20
|
|
20
21
|
def start_server(settings: setting.ServerSettings):
|
cocoindex/op.py
CHANGED
@@ -100,8 +100,8 @@ def _register_op_factory(
|
|
100
100
|
return op_args.behavior_version
|
101
101
|
|
102
102
|
class _WrappedClass(executor_cls, _Fallback):
|
103
|
-
|
104
|
-
|
103
|
+
_args_decoders: list[Callable[[Any], Any]]
|
104
|
+
_kwargs_decoders: dict[str, Callable[[str, Any], Any]]
|
105
105
|
_acall: Callable
|
106
106
|
|
107
107
|
def __init__(self, spec):
|
@@ -109,17 +109,17 @@ def _register_op_factory(
|
|
109
109
|
self.spec = spec
|
110
110
|
self._acall = _to_async_call(super().__call__)
|
111
111
|
|
112
|
-
def analyze(self, *args, **kwargs):
|
112
|
+
def analyze(self, *args: _engine.OpArgSchema, **kwargs: _engine.OpArgSchema):
|
113
113
|
"""
|
114
114
|
Analyze the spec and arguments. In this phase, argument types should be validated.
|
115
115
|
It should return the expected result type for the current op.
|
116
116
|
"""
|
117
|
-
self.
|
118
|
-
self.
|
117
|
+
self._args_decoders = []
|
118
|
+
self._kwargs_decoders = {}
|
119
119
|
|
120
120
|
# Match arguments with parameters.
|
121
121
|
next_param_idx = 0
|
122
|
-
for arg in
|
122
|
+
for arg in args:
|
123
123
|
if next_param_idx >= len(expected_args):
|
124
124
|
raise ValueError(
|
125
125
|
f"Too many arguments passed in: {len(args)} > {len(expected_args)}")
|
@@ -128,7 +128,7 @@ def _register_op_factory(
|
|
128
128
|
inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD):
|
129
129
|
raise ValueError(
|
130
130
|
f"Too many positional arguments passed in: {len(args)} > {next_param_idx}")
|
131
|
-
self.
|
131
|
+
self._args_decoders.append(
|
132
132
|
make_engine_value_decoder(
|
133
133
|
[arg_name], arg.value_type['type'], arg_param.annotation))
|
134
134
|
if arg_param.kind != inspect.Parameter.VAR_POSITIONAL:
|
@@ -146,7 +146,7 @@ def _register_op_factory(
|
|
146
146
|
if expected_arg is None:
|
147
147
|
raise ValueError(f"Unexpected keyword argument passed in: {kwarg_name}")
|
148
148
|
arg_param = expected_arg[1]
|
149
|
-
self.
|
149
|
+
self._kwargs_decoders[kwarg_name] = make_engine_value_decoder(
|
150
150
|
[kwarg_name], kwarg.value_type['type'], arg_param.annotation)
|
151
151
|
|
152
152
|
missing_args = [name for (name, arg) in expected_kwargs
|
@@ -174,8 +174,8 @@ def _register_op_factory(
|
|
174
174
|
await _to_async_call(setup_method)()
|
175
175
|
|
176
176
|
async def __call__(self, *args, **kwargs):
|
177
|
-
|
178
|
-
|
177
|
+
decoded_args = (decoder(arg) for decoder, arg in zip(self._args_decoders, args))
|
178
|
+
decoded_kwargs = {arg_name: self._kwargs_decoders[arg_name](arg)
|
179
179
|
for arg_name, arg in kwargs.items()}
|
180
180
|
|
181
181
|
if op_args.gpu:
|
@@ -185,9 +185,9 @@ def _register_op_factory(
|
|
185
185
|
# For now, we use a lock to ensure only one task is executed at a time.
|
186
186
|
# TODO: Implement multi-processing dispatching.
|
187
187
|
async with _gpu_dispatch_lock:
|
188
|
-
output = await self._acall(*
|
188
|
+
output = await self._acall(*decoded_args, **decoded_kwargs)
|
189
189
|
else:
|
190
|
-
output = await self._acall(*
|
190
|
+
output = await self._acall(*decoded_args, **decoded_kwargs)
|
191
191
|
return encode_engine_value(output)
|
192
192
|
|
193
193
|
_WrappedClass.__name__ = executor_cls.__name__
|
cocoindex/query.py
CHANGED
@@ -50,7 +50,7 @@ class SimpleSemanticsQueryHandler:
|
|
50
50
|
if engine_handler is None:
|
51
51
|
engine_handler = _engine.SimpleSemanticsQueryHandler(
|
52
52
|
flow.internal_flow(), target_name,
|
53
|
-
fl.
|
53
|
+
fl.TransformFlow(query_transform_flow, [str]).internal_flow(),
|
54
54
|
default_similarity_metric.value)
|
55
55
|
engine_handler.register_query_handler(name)
|
56
56
|
return engine_handler
|
cocoindex/setting.py
CHANGED
@@ -6,6 +6,25 @@ import os
|
|
6
6
|
from typing import Callable, Self, Any, overload
|
7
7
|
from dataclasses import dataclass
|
8
8
|
|
9
|
+
_app_namespace: str = ''
|
10
|
+
|
11
|
+
def get_app_namespace(*, trailing_delimiter: str | None = None) -> str:
|
12
|
+
"""Get the application namespace. Append the `trailing_delimiter` if not empty."""
|
13
|
+
if _app_namespace == '' or trailing_delimiter is None:
|
14
|
+
return _app_namespace
|
15
|
+
return f'{_app_namespace}{trailing_delimiter}'
|
16
|
+
|
17
|
+
def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
|
18
|
+
"""Split the full name into the application namespace and the rest."""
|
19
|
+
parts = full_name.split(delimiter, 1)
|
20
|
+
if len(parts) == 1:
|
21
|
+
return '', parts[0]
|
22
|
+
return (parts[0], parts[1])
|
23
|
+
|
24
|
+
def set_app_namespace(app_namespace: str):
|
25
|
+
"""Set the application namespace."""
|
26
|
+
global _app_namespace # pylint: disable=global-statement
|
27
|
+
_app_namespace = app_namespace
|
9
28
|
|
10
29
|
@dataclass
|
11
30
|
class DatabaseConnectionSpec:
|
@@ -30,6 +49,7 @@ def _load_field(target: dict[str, Any], name: str, env_name: str, required: bool
|
|
30
49
|
class Settings:
|
31
50
|
"""Settings for the cocoindex library."""
|
32
51
|
database: DatabaseConnectionSpec
|
52
|
+
app_namespace: str = ""
|
33
53
|
|
34
54
|
@classmethod
|
35
55
|
def from_env(cls) -> Self:
|
@@ -40,7 +60,10 @@ class Settings:
|
|
40
60
|
_load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
|
41
61
|
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
42
62
|
database = DatabaseConnectionSpec(**db_kwargs)
|
43
|
-
|
63
|
+
|
64
|
+
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", '')
|
65
|
+
|
66
|
+
return cls(database=database, app_namespace=app_namespace)
|
44
67
|
|
45
68
|
@dataclass
|
46
69
|
class ServerSettings:
|
cocoindex/setup.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from . import flow
|
2
|
+
from . import setting
|
2
3
|
from . import _engine
|
3
4
|
|
4
5
|
def sync_setup() -> _engine.SetupStatus:
|
@@ -7,10 +8,15 @@ def sync_setup() -> _engine.SetupStatus:
|
|
7
8
|
|
8
9
|
def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
|
9
10
|
flow.ensure_all_flows_built()
|
10
|
-
return _engine.drop_setup(flow_names)
|
11
|
+
return _engine.drop_setup([flow.get_full_flow_name(name) for name in flow_names])
|
11
12
|
|
12
13
|
def flow_names_with_setup() -> list[str]:
|
13
|
-
|
14
|
+
result = []
|
15
|
+
for name in _engine.flow_names_with_setup():
|
16
|
+
app_namespace, name = setting.split_app_namespace(name, '.')
|
17
|
+
if app_namespace == setting.get_app_namespace():
|
18
|
+
result.append(name)
|
19
|
+
return result
|
14
20
|
|
15
21
|
def apply_setup_changes(setup_status: _engine.SetupStatus):
|
16
22
|
_engine.apply_setup_changes(setup_status)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.39
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
@@ -148,6 +148,7 @@ It defines an index flow like this:
|
|
148
148
|
| [Code Embedding](examples/code_embedding) | Index code embeddings for semantic search |
|
149
149
|
| [PDF Embedding](examples/pdf_embedding) | Parse PDF and index text embeddings for semantic search |
|
150
150
|
| [Manuals LLM Extraction](examples/manuals_llm_extraction) | Extract structured information from a manual using LLM |
|
151
|
+
| [Amazon S3 Embedding](examples/amazon_s3_embedding) | Index text documents from Amazon S3 |
|
151
152
|
| [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
|
152
153
|
| [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
|
153
154
|
| [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
|
@@ -0,0 +1,25 @@
|
|
1
|
+
cocoindex-0.1.39.dist-info/METADATA,sha256=pnwutYA2ul-s26Xw4BkP6nqpTCoye74FvOqRPw1AOEk,9958
|
2
|
+
cocoindex-0.1.39.dist-info/WHEEL,sha256=Kw8y023UaufEXFM028WbBCk7rJUUAhAX1Zw-54pv0m4,96
|
3
|
+
cocoindex-0.1.39.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
4
|
+
cocoindex/__init__.py,sha256=pHcVzNeWewxtx9gKiPnTB8jLjuFlX9Bw9hX1cEGOWvI,806
|
5
|
+
cocoindex/_engine.cp311-win_amd64.pyd,sha256=DM4eY3Dq3MlYvk6doOpMFTvqMzsVlsWCiK9G9XCwSco,60931584
|
6
|
+
cocoindex/auth_registry.py,sha256=-EhwmyIwJLPZthPOvEK21TfMbxQtndRbogk8q7y4XuU,716
|
7
|
+
cocoindex/cli.py,sha256=iJUQ2GSI8iHW3wfKRxHH5nixug92imPVmTalOjUVa3w,9237
|
8
|
+
cocoindex/convert.py,sha256=Co8MhwbnGWONu2mkrQYzFM1gw3stOIdFHueT1mszdV4,7085
|
9
|
+
cocoindex/flow.py,sha256=xMJM0vzTrx4PHgQ5OxktCaBW1q54B_E11MpP-a_SD-s,28396
|
10
|
+
cocoindex/functions.py,sha256=m7R8gNVK5RcavPY37d2lkRXwLBdMblZexCJ0JYcruss,1881
|
11
|
+
cocoindex/index.py,sha256=32iiQI60VKhRRHle17rpoEVa_tsAHnyXXXnrLaX68uQ,557
|
12
|
+
cocoindex/lib.py,sha256=Yv3_qKESgrjxJRYQn8a89aZcEoLZ5-xewgfEN0l9K1w,2461
|
13
|
+
cocoindex/llm.py,sha256=POMdB-huMvPkRDpvcaBeOgXfbm0YZa0swNOdD2s4TRc,364
|
14
|
+
cocoindex/op.py,sha256=B1P7vCmrcuBuxnX8tHqGvfFgUMRBu4Om2VFnzk00MpI,11007
|
15
|
+
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
+
cocoindex/query.py,sha256=ljsylpRRxEpBXQakE2_4a89wWLYXTCE9HjacyjhxsQw,3297
|
17
|
+
cocoindex/runtime.py,sha256=WwyDSpJhvaHjnxH4q6r2MVftJB-QkNVH_TxZLxuDtFw,1009
|
18
|
+
cocoindex/setting.py,sha256=dTtUD_L2b4Jf2w1-Dt8M1eFMpYQV9sMS95fJ4wB_9-I,3364
|
19
|
+
cocoindex/setup.py,sha256=YAH9ssZB27MXYvsiqtBA1IDjAfeUci--d3cjFE76i0Y,767
|
20
|
+
cocoindex/sources.py,sha256=W5wgiNwhTjPyNxt3DLKBbOnGwrHHfi36b9Gfr1GYhe0,1369
|
21
|
+
cocoindex/storages.py,sha256=oJcY_zZx1wXjoJ_gitSvaFTUqhL6lUJihZQivvMMZ5U,2292
|
22
|
+
cocoindex/tests/__init__.py,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
23
|
+
cocoindex/tests/test_convert.py,sha256=XbjzUoKazBJyBRCPaNElbRGoDD4NJZ9J-i4el39O7w8,15863
|
24
|
+
cocoindex/typing.py,sha256=ZZ-QqkfNNSRkzqWahx-8w3X4upjy4kyIFr7OAPWYz5Q,9232
|
25
|
+
cocoindex-0.1.39.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
cocoindex-0.1.37.dist-info/METADATA,sha256=WufgZVFlEeR6MRUikNwS-VxuoDH8ragHIArfgZuwR9I,9863
|
2
|
-
cocoindex-0.1.37.dist-info/WHEEL,sha256=Kw8y023UaufEXFM028WbBCk7rJUUAhAX1Zw-54pv0m4,96
|
3
|
-
cocoindex-0.1.37.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
4
|
-
cocoindex/__init__.py,sha256=euvHrtzQEXtAEut0EFlgcc4VtogA6zltqWRdt70Zgrg,703
|
5
|
-
cocoindex/_engine.cp311-win_amd64.pyd,sha256=g4nyxhLAmkxIh_2iB7xn7Ap6ZXWLu-bTYp4DeYzjVF0,60771840
|
6
|
-
cocoindex/auth_registry.py,sha256=-EhwmyIwJLPZthPOvEK21TfMbxQtndRbogk8q7y4XuU,716
|
7
|
-
cocoindex/cli.py,sha256=3Q8EKnUlnFP3dE2-IxsdZBdG9rA4DVc0vNCcTz-XHQ8,9162
|
8
|
-
cocoindex/convert.py,sha256=XT747PZcerRGlsaJ69mZoG-7Z4ruwDNX1pO0VlGKlwY,7034
|
9
|
-
cocoindex/flow.py,sha256=aFY1J0M-djr4gNa2giYY2yLYFEkePnBFHrGSvPXgFhw,24056
|
10
|
-
cocoindex/functions.py,sha256=m7R8gNVK5RcavPY37d2lkRXwLBdMblZexCJ0JYcruss,1881
|
11
|
-
cocoindex/index.py,sha256=32iiQI60VKhRRHle17rpoEVa_tsAHnyXXXnrLaX68uQ,557
|
12
|
-
cocoindex/lib.py,sha256=w0SumLvn2xfuQOgzLZDnQLur_GZ9DMAKOFo9NKF8q1k,2406
|
13
|
-
cocoindex/llm.py,sha256=POMdB-huMvPkRDpvcaBeOgXfbm0YZa0swNOdD2s4TRc,364
|
14
|
-
cocoindex/op.py,sha256=PeoBddukU5vOBvUrFPC6fYH35L59THy9ZXVm_Vc2ng0,10998
|
15
|
-
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
cocoindex/query.py,sha256=GU3V8AoSK--xOU2yX0Ao-Rz1t3mlrsJ4JIhzsLVwFH8,3297
|
17
|
-
cocoindex/runtime.py,sha256=WwyDSpJhvaHjnxH4q6r2MVftJB-QkNVH_TxZLxuDtFw,1009
|
18
|
-
cocoindex/setting.py,sha256=UiTAWLvVEMsM-p31c20ciWVetmPcZsLhHQ74P5jGChA,2432
|
19
|
-
cocoindex/setup.py,sha256=UyU2vvSyFwTDL-TnYMB0cNlyW6t2enjcufEhURMWIEA,497
|
20
|
-
cocoindex/sources.py,sha256=W5wgiNwhTjPyNxt3DLKBbOnGwrHHfi36b9Gfr1GYhe0,1369
|
21
|
-
cocoindex/storages.py,sha256=oJcY_zZx1wXjoJ_gitSvaFTUqhL6lUJihZQivvMMZ5U,2292
|
22
|
-
cocoindex/tests/__init__.py,sha256=frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN_XKdLCPjaYaY,2
|
23
|
-
cocoindex/tests/test_convert.py,sha256=XbjzUoKazBJyBRCPaNElbRGoDD4NJZ9J-i4el39O7w8,15863
|
24
|
-
cocoindex/typing.py,sha256=ZZ-QqkfNNSRkzqWahx-8w3X4upjy4kyIFr7OAPWYz5Q,9232
|
25
|
-
cocoindex-0.1.37.dist-info/RECORD,,
|
File without changes
|
File without changes
|