cocoindex 0.1.37__cp312-cp312-macosx_11_0_arm64.whl → 0.1.38__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/_engine.cpython-312-darwin.so +0 -0
- cocoindex/cli.py +4 -4
- cocoindex/flow.py +21 -13
- cocoindex/lib.py +1 -0
- cocoindex/setting.py +24 -1
- cocoindex/setup.py +8 -2
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.38.dist-info}/METADATA +2 -1
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.38.dist-info}/RECORD +10 -10
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.38.dist-info}/WHEEL +0 -0
- {cocoindex-0.1.37.dist-info → cocoindex-0.1.38.dist-info}/licenses/LICENSE +0 -0
Binary file
|
cocoindex/cli.py
CHANGED
@@ -16,7 +16,7 @@ def cli():
|
|
16
16
|
@cli.command()
|
17
17
|
@click.option(
|
18
18
|
"-a", "--all", "show_all", is_flag=True, show_default=True, default=False,
|
19
|
-
help="Also show all flows with persisted setup, even if not defined in the current process.")
|
19
|
+
help="Also show all flows with persisted setup under the current app namespace, even if not defined in the current process.")
|
20
20
|
def ls(show_all: bool):
|
21
21
|
"""
|
22
22
|
List all flows.
|
@@ -65,7 +65,7 @@ def show(flow_name: str | None, color: bool, verbose: bool):
|
|
65
65
|
|
66
66
|
console.print()
|
67
67
|
table = Table(
|
68
|
-
title=f"Schema for Flow: {flow.
|
68
|
+
title=f"Schema for Flow: {flow.full_name}",
|
69
69
|
show_header=True,
|
70
70
|
header_style="bold magenta"
|
71
71
|
)
|
@@ -108,7 +108,7 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
|
|
108
108
|
if drop_all:
|
109
109
|
flow_names = flow_names_with_setup()
|
110
110
|
elif len(flow_name) == 0:
|
111
|
-
flow_names =
|
111
|
+
flow_names = flow.flow_names()
|
112
112
|
else:
|
113
113
|
flow_names = list(flow_name)
|
114
114
|
setup_status = drop_setup(flow_names)
|
@@ -160,7 +160,7 @@ def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
|
|
160
160
|
"""
|
161
161
|
fl = _flow_by_name(flow_name)
|
162
162
|
if output_dir is None:
|
163
|
-
output_dir = f"eval_{
|
163
|
+
output_dir = f"eval_{setting.get_app_namespace(trailing_delimiter='_')}{flow_name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
|
164
164
|
options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
|
165
165
|
fl.evaluate_and_dump(options)
|
166
166
|
|
cocoindex/flow.py
CHANGED
@@ -19,6 +19,7 @@ from rich.tree import Tree
|
|
19
19
|
from . import _engine
|
20
20
|
from . import index
|
21
21
|
from . import op
|
22
|
+
from . import setting
|
22
23
|
from .convert import dump_engine_object
|
23
24
|
from .typing import encode_enriched_type
|
24
25
|
from .runtime import execution_context
|
@@ -310,7 +311,7 @@ class _FlowBuilderState:
|
|
310
311
|
|
311
312
|
def __init__(self, /, name: str | None = None):
|
312
313
|
flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
|
313
|
-
self.engine_flow_builder = _engine.FlowBuilder(flow_name)
|
314
|
+
self.engine_flow_builder = _engine.FlowBuilder(get_full_flow_name(flow_name))
|
314
315
|
self.field_name_builder = _NameBuilder()
|
315
316
|
|
316
317
|
def get_data_slice(self, v: Any) -> _engine.DataSlice:
|
@@ -481,7 +482,7 @@ class Flow:
|
|
481
482
|
Render the flow spec as a styled rich Tree with hierarchical structure.
|
482
483
|
"""
|
483
484
|
spec = self._get_spec(verbose=verbose)
|
484
|
-
tree = Tree(f"Flow: {self.
|
485
|
+
tree = Tree(f"Flow: {self.full_name}", style="cyan")
|
485
486
|
|
486
487
|
def build_tree(label: str, lines: list):
|
487
488
|
node = Tree(label, style="bold magenta" if lines else "cyan")
|
@@ -508,9 +509,9 @@ class Flow:
|
|
508
509
|
return repr(self._lazy_engine_flow())
|
509
510
|
|
510
511
|
@property
|
511
|
-
def
|
512
|
+
def full_name(self) -> str:
|
512
513
|
"""
|
513
|
-
Get the name of the flow.
|
514
|
+
Get the full name of the flow.
|
514
515
|
"""
|
515
516
|
return self._lazy_engine_flow().name()
|
516
517
|
|
@@ -566,8 +567,16 @@ def _create_lazy_flow(name: str | None, fl_def: Callable[[FlowBuilder, DataScope
|
|
566
567
|
_flows_lock = Lock()
|
567
568
|
_flows: dict[str, Flow] = {}
|
568
569
|
|
570
|
+
def get_full_flow_name(name: str) -> str:
|
571
|
+
"""
|
572
|
+
Get the full name of a flow.
|
573
|
+
"""
|
574
|
+
return f"{setting.get_app_namespace(trailing_delimiter='.')}{name}"
|
575
|
+
|
569
576
|
def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
|
570
577
|
"""Add a flow definition to the cocoindex library."""
|
578
|
+
if not all(c.isalnum() or c == '_' for c in name):
|
579
|
+
raise ValueError(f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed.")
|
571
580
|
with _flows_lock:
|
572
581
|
if name in _flows:
|
573
582
|
raise KeyError(f"Flow with name {name} already exists")
|
@@ -587,12 +596,12 @@ def flow_names() -> list[str]:
|
|
587
596
|
with _flows_lock:
|
588
597
|
return list(_flows.keys())
|
589
598
|
|
590
|
-
def flows() ->
|
599
|
+
def flows() -> dict[str, Flow]:
|
591
600
|
"""
|
592
601
|
Get all flows.
|
593
602
|
"""
|
594
603
|
with _flows_lock:
|
595
|
-
return
|
604
|
+
return dict(_flows)
|
596
605
|
|
597
606
|
def flow_by_name(name: str) -> Flow:
|
598
607
|
"""
|
@@ -605,14 +614,13 @@ def ensure_all_flows_built() -> None:
|
|
605
614
|
"""
|
606
615
|
Ensure all flows are built.
|
607
616
|
"""
|
608
|
-
|
609
|
-
fl.internal_flow()
|
617
|
+
execution_context.run(ensure_all_flows_built_async())
|
610
618
|
|
611
619
|
async def ensure_all_flows_built_async() -> None:
|
612
620
|
"""
|
613
621
|
Ensure all flows are built.
|
614
622
|
"""
|
615
|
-
for fl in flows():
|
623
|
+
for fl in flows().values():
|
616
624
|
await fl.internal_flow_async()
|
617
625
|
|
618
626
|
def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
|
@@ -626,13 +634,13 @@ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _
|
|
626
634
|
Update all flows.
|
627
635
|
"""
|
628
636
|
await ensure_all_flows_built_async()
|
629
|
-
async def _update_flow(fl: Flow) -> _engine.IndexUpdateInfo:
|
637
|
+
async def _update_flow(name: str, fl: Flow) -> tuple[str, _engine.IndexUpdateInfo]:
|
630
638
|
async with FlowLiveUpdater(fl, options) as updater:
|
631
639
|
await updater.wait_async()
|
632
|
-
return updater.update_stats()
|
640
|
+
return (name, updater.update_stats())
|
633
641
|
fls = flows()
|
634
|
-
all_stats = await asyncio.gather(*(_update_flow(fl) for fl in fls))
|
635
|
-
return
|
642
|
+
all_stats = await asyncio.gather(*(_update_flow(name, fl) for (name, fl) in fls.items()))
|
643
|
+
return dict(all_stats)
|
636
644
|
|
637
645
|
_transient_flow_name_builder = _NameBuilder()
|
638
646
|
class TransientFlow:
|
cocoindex/lib.py
CHANGED
@@ -15,6 +15,7 @@ from .convert import dump_engine_object
|
|
15
15
|
def init(settings: setting.Settings):
|
16
16
|
"""Initialize the cocoindex library."""
|
17
17
|
_engine.init(dump_engine_object(settings))
|
18
|
+
setting.set_app_namespace(settings.app_namespace)
|
18
19
|
|
19
20
|
|
20
21
|
def start_server(settings: setting.ServerSettings):
|
cocoindex/setting.py
CHANGED
@@ -6,6 +6,25 @@ import os
|
|
6
6
|
from typing import Callable, Self, Any, overload
|
7
7
|
from dataclasses import dataclass
|
8
8
|
|
9
|
+
_app_namespace: str = ''
|
10
|
+
|
11
|
+
def get_app_namespace(*, trailing_delimiter: str | None = None) -> str:
|
12
|
+
"""Get the application namespace. Append the `trailing_delimiter` if not empty."""
|
13
|
+
if _app_namespace == '' or trailing_delimiter is None:
|
14
|
+
return _app_namespace
|
15
|
+
return f'{_app_namespace}{trailing_delimiter}'
|
16
|
+
|
17
|
+
def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
|
18
|
+
"""Split the full name into the application namespace and the rest."""
|
19
|
+
parts = full_name.split(delimiter, 1)
|
20
|
+
if len(parts) == 1:
|
21
|
+
return '', parts[0]
|
22
|
+
return (parts[0], parts[1])
|
23
|
+
|
24
|
+
def set_app_namespace(app_namespace: str):
|
25
|
+
"""Set the application namespace."""
|
26
|
+
global _app_namespace # pylint: disable=global-statement
|
27
|
+
_app_namespace = app_namespace
|
9
28
|
|
10
29
|
@dataclass
|
11
30
|
class DatabaseConnectionSpec:
|
@@ -30,6 +49,7 @@ def _load_field(target: dict[str, Any], name: str, env_name: str, required: bool
|
|
30
49
|
class Settings:
|
31
50
|
"""Settings for the cocoindex library."""
|
32
51
|
database: DatabaseConnectionSpec
|
52
|
+
app_namespace: str
|
33
53
|
|
34
54
|
@classmethod
|
35
55
|
def from_env(cls) -> Self:
|
@@ -40,7 +60,10 @@ class Settings:
|
|
40
60
|
_load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
|
41
61
|
_load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
|
42
62
|
database = DatabaseConnectionSpec(**db_kwargs)
|
43
|
-
|
63
|
+
|
64
|
+
app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", '')
|
65
|
+
|
66
|
+
return cls(database=database, app_namespace=app_namespace)
|
44
67
|
|
45
68
|
@dataclass
|
46
69
|
class ServerSettings:
|
cocoindex/setup.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
from . import flow
|
2
|
+
from . import setting
|
2
3
|
from . import _engine
|
3
4
|
|
4
5
|
def sync_setup() -> _engine.SetupStatus:
|
@@ -7,10 +8,15 @@ def sync_setup() -> _engine.SetupStatus:
|
|
7
8
|
|
8
9
|
def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
|
9
10
|
flow.ensure_all_flows_built()
|
10
|
-
return _engine.drop_setup(flow_names)
|
11
|
+
return _engine.drop_setup([flow.get_full_flow_name(name) for name in flow_names])
|
11
12
|
|
12
13
|
def flow_names_with_setup() -> list[str]:
|
13
|
-
|
14
|
+
result = []
|
15
|
+
for name in _engine.flow_names_with_setup():
|
16
|
+
app_namespace, name = setting.split_app_namespace(name, '.')
|
17
|
+
if app_namespace == setting.get_app_namespace():
|
18
|
+
result.append(name)
|
19
|
+
return result
|
14
20
|
|
15
21
|
def apply_setup_changes(setup_status: _engine.SetupStatus):
|
16
22
|
_engine.apply_setup_changes(setup_status)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: cocoindex
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.38
|
4
4
|
Requires-Dist: sentence-transformers>=3.3.1
|
5
5
|
Requires-Dist: click>=8.1.8
|
6
6
|
Requires-Dist: rich>=14.0.0
|
@@ -148,6 +148,7 @@ It defines an index flow like this:
|
|
148
148
|
| [Code Embedding](examples/code_embedding) | Index code embeddings for semantic search |
|
149
149
|
| [PDF Embedding](examples/pdf_embedding) | Parse PDF and index text embeddings for semantic search |
|
150
150
|
| [Manuals LLM Extraction](examples/manuals_llm_extraction) | Extract structured information from a manual using LLM |
|
151
|
+
| [Amazon S3 Embedding](examples/amazon_s3_embedding) | Index text documents from Amazon S3 |
|
151
152
|
| [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
|
152
153
|
| [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
|
153
154
|
| [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
|
@@ -1,25 +1,25 @@
|
|
1
|
-
cocoindex-0.1.
|
2
|
-
cocoindex-0.1.
|
3
|
-
cocoindex-0.1.
|
1
|
+
cocoindex-0.1.38.dist-info/METADATA,sha256=d1qjWo7V_MjeUiBe4zIOyqCyg8gM7-DaUDbKPSIf3_Q,9793
|
2
|
+
cocoindex-0.1.38.dist-info/WHEEL,sha256=ryp1uXCltaq3TPfjxhSWoam4EHMC40-Yabw-yvEW9gU,104
|
3
|
+
cocoindex-0.1.38.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
4
4
|
cocoindex/__init__.py,sha256=LpB0VjGvkD1beio8R9RCT6PI3eU0keV-3sBL45fHTQE,690
|
5
|
-
cocoindex/_engine.cpython-312-darwin.so,sha256=
|
5
|
+
cocoindex/_engine.cpython-312-darwin.so,sha256=yNba_1vfgJyswvHd7QM_s7XOfkYJBUP4o4h97J3WTEU,56739728
|
6
6
|
cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
|
7
|
-
cocoindex/cli.py,sha256=
|
7
|
+
cocoindex/cli.py,sha256=Ac3ybnQW-HGVGJeUwIOHd1qhjs0KC5wCsemWuyouEfU,8999
|
8
8
|
cocoindex/convert.py,sha256=tRY-QBeeFMFwCYiRk7a0_tuDqopw8iqBpg_Aswcq9JQ,6864
|
9
|
-
cocoindex/flow.py,sha256=
|
9
|
+
cocoindex/flow.py,sha256=r1GnRIthmkniJEsAxGsYlDXbcf7ydMwZy1qJEKzwtqc,23814
|
10
10
|
cocoindex/functions.py,sha256=F79dNmGE127LaU67kF5Oqtf_tIzebFQH7MkyceMX4-s,1830
|
11
11
|
cocoindex/index.py,sha256=LssEOuZi6AqhwKtZM3QFeQpa9T-0ELi8G5DsrYKECvc,534
|
12
|
-
cocoindex/lib.py,sha256=
|
12
|
+
cocoindex/lib.py,sha256=OqTMuOHicdyX9PRA7fmTzznK8HZMrzxpUDbqxAEF--Q,2383
|
13
13
|
cocoindex/llm.py,sha256=_3rtahuKcqcEHPkFSwhXOSrekZyGxVApPoYtlU_chcA,348
|
14
14
|
cocoindex/op.py,sha256=OGYRYl7gPa7X7iSU30iTrCzvqRBu7jQqfvN4vjG__dA,10730
|
15
15
|
cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
|
17
17
|
cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
|
18
|
-
cocoindex/setting.py,sha256=
|
19
|
-
cocoindex/setup.py,sha256=
|
18
|
+
cocoindex/setting.py,sha256=AaIMclEktbBgK7Cks2D8LfS1cskf8UUcbSb6UBLdoSs,3260
|
19
|
+
cocoindex/setup.py,sha256=ErNtX08NfFOFKehp5qGUvCx8Wiz9f3gmzvfBhAqrQyI,745
|
20
20
|
cocoindex/sources.py,sha256=7lpwYLsFCRfbURKf79Vu0JZZoXjAYY0DxNHzUb-VHBY,1327
|
21
21
|
cocoindex/storages.py,sha256=MFMsfyOCYMggTWeWrOi82miqOXQmiUuqq828x5htBr0,2207
|
22
22
|
cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
23
23
|
cocoindex/tests/test_convert.py,sha256=7jc--I3frrg7DB5MPr4JFzE7DSCznJuWyHdlDLQJ_fM,15516
|
24
24
|
cocoindex/typing.py,sha256=369ABRtnpbaVSQVIBc2ZDutXW8jUmncvNJd9CHEWT3Q,8962
|
25
|
-
cocoindex-0.1.
|
25
|
+
cocoindex-0.1.38.dist-info/RECORD,,
|
File without changes
|
File without changes
|