cocoindex 0.1.37__cp312-cp312-macosx_11_0_arm64.whl → 0.1.38__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/cli.py CHANGED
@@ -16,7 +16,7 @@ def cli():
16
16
  @cli.command()
17
17
  @click.option(
18
18
  "-a", "--all", "show_all", is_flag=True, show_default=True, default=False,
19
- help="Also show all flows with persisted setup, even if not defined in the current process.")
19
+ help="Also show all flows with persisted setup under the current app namespace, even if not defined in the current process.")
20
20
  def ls(show_all: bool):
21
21
  """
22
22
  List all flows.
@@ -65,7 +65,7 @@ def show(flow_name: str | None, color: bool, verbose: bool):
65
65
 
66
66
  console.print()
67
67
  table = Table(
68
- title=f"Schema for Flow: {flow.name}",
68
+ title=f"Schema for Flow: {flow.full_name}",
69
69
  show_header=True,
70
70
  header_style="bold magenta"
71
71
  )
@@ -108,7 +108,7 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
108
108
  if drop_all:
109
109
  flow_names = flow_names_with_setup()
110
110
  elif len(flow_name) == 0:
111
- flow_names = [fl.name for fl in flow.flows()]
111
+ flow_names = flow.flow_names()
112
112
  else:
113
113
  flow_names = list(flow_name)
114
114
  setup_status = drop_setup(flow_names)
@@ -160,7 +160,7 @@ def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
160
160
  """
161
161
  fl = _flow_by_name(flow_name)
162
162
  if output_dir is None:
163
- output_dir = f"eval_{fl.name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
163
+ output_dir = f"eval_{setting.get_app_namespace(trailing_delimiter='_')}{flow_name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
164
164
  options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
165
165
  fl.evaluate_and_dump(options)
166
166
 
cocoindex/flow.py CHANGED
@@ -19,6 +19,7 @@ from rich.tree import Tree
19
19
  from . import _engine
20
20
  from . import index
21
21
  from . import op
22
+ from . import setting
22
23
  from .convert import dump_engine_object
23
24
  from .typing import encode_enriched_type
24
25
  from .runtime import execution_context
@@ -310,7 +311,7 @@ class _FlowBuilderState:
310
311
 
311
312
  def __init__(self, /, name: str | None = None):
312
313
  flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
313
- self.engine_flow_builder = _engine.FlowBuilder(flow_name)
314
+ self.engine_flow_builder = _engine.FlowBuilder(get_full_flow_name(flow_name))
314
315
  self.field_name_builder = _NameBuilder()
315
316
 
316
317
  def get_data_slice(self, v: Any) -> _engine.DataSlice:
@@ -481,7 +482,7 @@ class Flow:
481
482
  Render the flow spec as a styled rich Tree with hierarchical structure.
482
483
  """
483
484
  spec = self._get_spec(verbose=verbose)
484
- tree = Tree(f"Flow: {self.name}", style="cyan")
485
+ tree = Tree(f"Flow: {self.full_name}", style="cyan")
485
486
 
486
487
  def build_tree(label: str, lines: list):
487
488
  node = Tree(label, style="bold magenta" if lines else "cyan")
@@ -508,9 +509,9 @@ class Flow:
508
509
  return repr(self._lazy_engine_flow())
509
510
 
510
511
  @property
511
- def name(self) -> str:
512
+ def full_name(self) -> str:
512
513
  """
513
- Get the name of the flow.
514
+ Get the full name of the flow.
514
515
  """
515
516
  return self._lazy_engine_flow().name()
516
517
 
@@ -566,8 +567,16 @@ def _create_lazy_flow(name: str | None, fl_def: Callable[[FlowBuilder, DataScope
566
567
  _flows_lock = Lock()
567
568
  _flows: dict[str, Flow] = {}
568
569
 
570
+ def get_full_flow_name(name: str) -> str:
571
+ """
572
+ Get the full name of a flow.
573
+ """
574
+ return f"{setting.get_app_namespace(trailing_delimiter='.')}{name}"
575
+
569
576
  def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
570
577
  """Add a flow definition to the cocoindex library."""
578
+ if not all(c.isalnum() or c == '_' for c in name):
579
+ raise ValueError(f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed.")
571
580
  with _flows_lock:
572
581
  if name in _flows:
573
582
  raise KeyError(f"Flow with name {name} already exists")
@@ -587,12 +596,12 @@ def flow_names() -> list[str]:
587
596
  with _flows_lock:
588
597
  return list(_flows.keys())
589
598
 
590
- def flows() -> list[Flow]:
599
+ def flows() -> dict[str, Flow]:
591
600
  """
592
601
  Get all flows.
593
602
  """
594
603
  with _flows_lock:
595
- return list(_flows.values())
604
+ return dict(_flows)
596
605
 
597
606
  def flow_by_name(name: str) -> Flow:
598
607
  """
@@ -605,14 +614,13 @@ def ensure_all_flows_built() -> None:
605
614
  """
606
615
  Ensure all flows are built.
607
616
  """
608
- for fl in flows():
609
- fl.internal_flow()
617
+ execution_context.run(ensure_all_flows_built_async())
610
618
 
611
619
  async def ensure_all_flows_built_async() -> None:
612
620
  """
613
621
  Ensure all flows are built.
614
622
  """
615
- for fl in flows():
623
+ for fl in flows().values():
616
624
  await fl.internal_flow_async()
617
625
 
618
626
  def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
@@ -626,13 +634,13 @@ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _
626
634
  Update all flows.
627
635
  """
628
636
  await ensure_all_flows_built_async()
629
- async def _update_flow(fl: Flow) -> _engine.IndexUpdateInfo:
637
+ async def _update_flow(name: str, fl: Flow) -> tuple[str, _engine.IndexUpdateInfo]:
630
638
  async with FlowLiveUpdater(fl, options) as updater:
631
639
  await updater.wait_async()
632
- return updater.update_stats()
640
+ return (name, updater.update_stats())
633
641
  fls = flows()
634
- all_stats = await asyncio.gather(*(_update_flow(fl) for fl in fls))
635
- return {fl.name: stats for fl, stats in zip(fls, all_stats)}
642
+ all_stats = await asyncio.gather(*(_update_flow(name, fl) for (name, fl) in fls.items()))
643
+ return dict(all_stats)
636
644
 
637
645
  _transient_flow_name_builder = _NameBuilder()
638
646
  class TransientFlow:
cocoindex/lib.py CHANGED
@@ -15,6 +15,7 @@ from .convert import dump_engine_object
15
15
  def init(settings: setting.Settings):
16
16
  """Initialize the cocoindex library."""
17
17
  _engine.init(dump_engine_object(settings))
18
+ setting.set_app_namespace(settings.app_namespace)
18
19
 
19
20
 
20
21
  def start_server(settings: setting.ServerSettings):
cocoindex/setting.py CHANGED
@@ -6,6 +6,25 @@ import os
6
6
  from typing import Callable, Self, Any, overload
7
7
  from dataclasses import dataclass
8
8
 
9
+ _app_namespace: str = ''
10
+
11
+ def get_app_namespace(*, trailing_delimiter: str | None = None) -> str:
12
+ """Get the application namespace. Append the `trailing_delimiter` if not empty."""
13
+ if _app_namespace == '' or trailing_delimiter is None:
14
+ return _app_namespace
15
+ return f'{_app_namespace}{trailing_delimiter}'
16
+
17
+ def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
18
+ """Split the full name into the application namespace and the rest."""
19
+ parts = full_name.split(delimiter, 1)
20
+ if len(parts) == 1:
21
+ return '', parts[0]
22
+ return (parts[0], parts[1])
23
+
24
+ def set_app_namespace(app_namespace: str):
25
+ """Set the application namespace."""
26
+ global _app_namespace # pylint: disable=global-statement
27
+ _app_namespace = app_namespace
9
28
 
10
29
  @dataclass
11
30
  class DatabaseConnectionSpec:
@@ -30,6 +49,7 @@ def _load_field(target: dict[str, Any], name: str, env_name: str, required: bool
30
49
  class Settings:
31
50
  """Settings for the cocoindex library."""
32
51
  database: DatabaseConnectionSpec
52
+ app_namespace: str
33
53
 
34
54
  @classmethod
35
55
  def from_env(cls) -> Self:
@@ -40,7 +60,10 @@ class Settings:
40
60
  _load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
41
61
  _load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
42
62
  database = DatabaseConnectionSpec(**db_kwargs)
43
- return cls(database=database)
63
+
64
+ app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", '')
65
+
66
+ return cls(database=database, app_namespace=app_namespace)
44
67
 
45
68
  @dataclass
46
69
  class ServerSettings:
cocoindex/setup.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from . import flow
2
+ from . import setting
2
3
  from . import _engine
3
4
 
4
5
  def sync_setup() -> _engine.SetupStatus:
@@ -7,10 +8,15 @@ def sync_setup() -> _engine.SetupStatus:
7
8
 
8
9
  def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
9
10
  flow.ensure_all_flows_built()
10
- return _engine.drop_setup(flow_names)
11
+ return _engine.drop_setup([flow.get_full_flow_name(name) for name in flow_names])
11
12
 
12
13
  def flow_names_with_setup() -> list[str]:
13
- return _engine.flow_names_with_setup()
14
+ result = []
15
+ for name in _engine.flow_names_with_setup():
16
+ app_namespace, name = setting.split_app_namespace(name, '.')
17
+ if app_namespace == setting.get_app_namespace():
18
+ result.append(name)
19
+ return result
14
20
 
15
21
  def apply_setup_changes(setup_status: _engine.SetupStatus):
16
22
  _engine.apply_setup_changes(setup_status)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.37
3
+ Version: 0.1.38
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -148,6 +148,7 @@ It defines an index flow like this:
148
148
  | [Code Embedding](examples/code_embedding) | Index code embeddings for semantic search |
149
149
  | [PDF Embedding](examples/pdf_embedding) | Parse PDF and index text embeddings for semantic search |
150
150
  | [Manuals LLM Extraction](examples/manuals_llm_extraction) | Extract structured information from a manual using LLM |
151
+ | [Amazon S3 Embedding](examples/amazon_s3_embedding) | Index text documents from Amazon S3 |
151
152
  | [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
152
153
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
153
154
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
@@ -1,25 +1,25 @@
1
- cocoindex-0.1.37.dist-info/METADATA,sha256=q_5xT2W7gjD_IiCl_pBr__gMfn3ldcoFppGNTmROkMY,9699
2
- cocoindex-0.1.37.dist-info/WHEEL,sha256=ryp1uXCltaq3TPfjxhSWoam4EHMC40-Yabw-yvEW9gU,104
3
- cocoindex-0.1.37.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ cocoindex-0.1.38.dist-info/METADATA,sha256=d1qjWo7V_MjeUiBe4zIOyqCyg8gM7-DaUDbKPSIf3_Q,9793
2
+ cocoindex-0.1.38.dist-info/WHEEL,sha256=ryp1uXCltaq3TPfjxhSWoam4EHMC40-Yabw-yvEW9gU,104
3
+ cocoindex-0.1.38.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
4
  cocoindex/__init__.py,sha256=LpB0VjGvkD1beio8R9RCT6PI3eU0keV-3sBL45fHTQE,690
5
- cocoindex/_engine.cpython-312-darwin.so,sha256=ixvMKTQ1mhgoOpeiCWhmtTuxFhpjTKD0D1mrpP6FeoI,56739584
5
+ cocoindex/_engine.cpython-312-darwin.so,sha256=yNba_1vfgJyswvHd7QM_s7XOfkYJBUP4o4h97J3WTEU,56739728
6
6
  cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
7
- cocoindex/cli.py,sha256=1PJTdwX-X9r6Obw_W7VZDUgd5E7Xw6tLyvSDT8dFnY8,8924
7
+ cocoindex/cli.py,sha256=Ac3ybnQW-HGVGJeUwIOHd1qhjs0KC5wCsemWuyouEfU,8999
8
8
  cocoindex/convert.py,sha256=tRY-QBeeFMFwCYiRk7a0_tuDqopw8iqBpg_Aswcq9JQ,6864
9
- cocoindex/flow.py,sha256=MZZ0Uf0ObAzR1yIjUecRgA-U0t__95eoLBK_DxwwLnk,23375
9
+ cocoindex/flow.py,sha256=r1GnRIthmkniJEsAxGsYlDXbcf7ydMwZy1qJEKzwtqc,23814
10
10
  cocoindex/functions.py,sha256=F79dNmGE127LaU67kF5Oqtf_tIzebFQH7MkyceMX4-s,1830
11
11
  cocoindex/index.py,sha256=LssEOuZi6AqhwKtZM3QFeQpa9T-0ELi8G5DsrYKECvc,534
12
- cocoindex/lib.py,sha256=812GB8Z-2PyjG73Odvw5jtNBLnoeU9aOh9s2ZnETKa8,2329
12
+ cocoindex/lib.py,sha256=OqTMuOHicdyX9PRA7fmTzznK8HZMrzxpUDbqxAEF--Q,2383
13
13
  cocoindex/llm.py,sha256=_3rtahuKcqcEHPkFSwhXOSrekZyGxVApPoYtlU_chcA,348
14
14
  cocoindex/op.py,sha256=OGYRYl7gPa7X7iSU30iTrCzvqRBu7jQqfvN4vjG__dA,10730
15
15
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
17
17
  cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
18
- cocoindex/setting.py,sha256=pms1blwlXIOqZIpye-rfiwzqYUCAC8oEL7mQM5A160g,2356
19
- cocoindex/setup.py,sha256=AQLbtBLuJX066IANS7BGp20246mAGQ_4Z0W6MVJcQzY,481
18
+ cocoindex/setting.py,sha256=AaIMclEktbBgK7Cks2D8LfS1cskf8UUcbSb6UBLdoSs,3260
19
+ cocoindex/setup.py,sha256=ErNtX08NfFOFKehp5qGUvCx8Wiz9f3gmzvfBhAqrQyI,745
20
20
  cocoindex/sources.py,sha256=7lpwYLsFCRfbURKf79Vu0JZZoXjAYY0DxNHzUb-VHBY,1327
21
21
  cocoindex/storages.py,sha256=MFMsfyOCYMggTWeWrOi82miqOXQmiUuqq828x5htBr0,2207
22
22
  cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
23
  cocoindex/tests/test_convert.py,sha256=7jc--I3frrg7DB5MPr4JFzE7DSCznJuWyHdlDLQJ_fM,15516
24
24
  cocoindex/typing.py,sha256=369ABRtnpbaVSQVIBc2ZDutXW8jUmncvNJd9CHEWT3Q,8962
25
- cocoindex-0.1.37.dist-info/RECORD,,
25
+ cocoindex-0.1.38.dist-info/RECORD,,