cocoindex 0.1.36__cp311-cp311-macosx_11_0_arm64.whl → 0.1.38__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
cocoindex/cli.py CHANGED
@@ -16,7 +16,7 @@ def cli():
16
16
  @cli.command()
17
17
  @click.option(
18
18
  "-a", "--all", "show_all", is_flag=True, show_default=True, default=False,
19
- help="Also show all flows with persisted setup, even if not defined in the current process.")
19
+ help="Also show all flows with persisted setup under the current app namespace, even if not defined in the current process.")
20
20
  def ls(show_all: bool):
21
21
  """
22
22
  List all flows.
@@ -65,7 +65,7 @@ def show(flow_name: str | None, color: bool, verbose: bool):
65
65
 
66
66
  console.print()
67
67
  table = Table(
68
- title=f"Schema for Flow: {flow.name}",
68
+ title=f"Schema for Flow: {flow.full_name}",
69
69
  show_header=True,
70
70
  header_style="bold magenta"
71
71
  )
@@ -108,7 +108,7 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
108
108
  if drop_all:
109
109
  flow_names = flow_names_with_setup()
110
110
  elif len(flow_name) == 0:
111
- flow_names = [fl.name for fl in flow.flows()]
111
+ flow_names = flow.flow_names()
112
112
  else:
113
113
  flow_names = list(flow_name)
114
114
  setup_status = drop_setup(flow_names)
@@ -160,7 +160,7 @@ def evaluate(flow_name: str | None, output_dir: str | None, cache: bool = True):
160
160
  """
161
161
  fl = _flow_by_name(flow_name)
162
162
  if output_dir is None:
163
- output_dir = f"eval_{fl.name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
163
+ output_dir = f"eval_{setting.get_app_namespace(trailing_delimiter='_')}{flow_name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
164
164
  options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
165
165
  fl.evaluate_and_dump(options)
166
166
 
@@ -212,11 +212,12 @@ def server(address: str | None, live_update: bool, quiet: bool, cors_origin: str
212
212
 
213
213
  lib.start_server(server_settings)
214
214
 
215
+ if COCOINDEX_HOST in cors_origins:
216
+ click.echo(f"Open CocoInsight at: {COCOINDEX_HOST}/cocoinsight")
217
+
215
218
  if live_update:
216
219
  options = flow.FlowLiveUpdaterOptions(live_mode=True, print_stats=not quiet)
217
220
  flow.update_all_flows(options)
218
- if COCOINDEX_HOST in cors_origins:
219
- click.echo(f"Open CocoInsight at: {COCOINDEX_HOST}/cocoinsight")
220
221
  input("Press Enter to stop...")
221
222
 
222
223
 
cocoindex/convert.py CHANGED
@@ -8,13 +8,15 @@ import uuid
8
8
 
9
9
  from enum import Enum
10
10
  from typing import Any, Callable, get_origin
11
- from .typing import analyze_type_info, encode_enriched_type, TABLE_TYPES, KEY_FIELD_NAME
11
+ from .typing import analyze_type_info, encode_enriched_type, is_namedtuple_type, TABLE_TYPES, KEY_FIELD_NAME
12
12
 
13
13
 
14
14
  def encode_engine_value(value: Any) -> Any:
15
15
  """Encode a Python value to an engine value."""
16
16
  if dataclasses.is_dataclass(value):
17
17
  return [encode_engine_value(getattr(value, f.name)) for f in dataclasses.fields(value)]
18
+ if is_namedtuple_type(type(value)):
19
+ return [encode_engine_value(getattr(value, name)) for name in value._fields]
18
20
  if isinstance(value, (list, tuple)):
19
21
  return [encode_engine_value(v) for v in value]
20
22
  if isinstance(value, dict):
@@ -55,16 +57,16 @@ def make_engine_value_decoder(
55
57
  f"Type mismatch for `{''.join(field_path)}`: "
56
58
  f"passed in {src_type_kind}, declared {dst_annotation} ({dst_type_info.kind})")
57
59
 
58
- if dst_type_info.dataclass_type is not None:
60
+ if dst_type_info.struct_type is not None:
59
61
  return _make_engine_struct_value_decoder(
60
- field_path, src_type['fields'], dst_type_info.dataclass_type)
62
+ field_path, src_type['fields'], dst_type_info.struct_type)
61
63
 
62
64
  if src_type_kind in TABLE_TYPES:
63
65
  field_path.append('[*]')
64
66
  elem_type_info = analyze_type_info(dst_type_info.elem_type)
65
- if elem_type_info.dataclass_type is None:
67
+ if elem_type_info.struct_type is None:
66
68
  raise ValueError(f"Type mismatch for `{''.join(field_path)}`: "
67
- f"declared `{dst_type_info.kind}`, a dataclass type expected")
69
+ f"declared `{dst_type_info.kind}`, a dataclass or NamedTuple type expected")
68
70
  engine_fields_schema = src_type['row']['fields']
69
71
  if elem_type_info.key_type is not None:
70
72
  key_field_schema = engine_fields_schema[0]
@@ -73,14 +75,14 @@ def make_engine_value_decoder(
73
75
  field_path, key_field_schema['type'], elem_type_info.key_type)
74
76
  field_path.pop()
75
77
  value_decoder = _make_engine_struct_value_decoder(
76
- field_path, engine_fields_schema[1:], elem_type_info.dataclass_type)
78
+ field_path, engine_fields_schema[1:], elem_type_info.struct_type)
77
79
  def decode(value):
78
80
  if value is None:
79
81
  return None
80
82
  return {key_decoder(v[0]): value_decoder(v[1:]) for v in value}
81
83
  else:
82
84
  elem_decoder = _make_engine_struct_value_decoder(
83
- field_path, engine_fields_schema, elem_type_info.dataclass_type)
85
+ field_path, engine_fields_schema, elem_type_info.struct_type)
84
86
  def decode(value):
85
87
  if value is None:
86
88
  return None
@@ -96,11 +98,31 @@ def make_engine_value_decoder(
96
98
  def _make_engine_struct_value_decoder(
97
99
  field_path: list[str],
98
100
  src_fields: list[dict[str, Any]],
99
- dst_dataclass_type: type,
101
+ dst_struct_type: type,
100
102
  ) -> Callable[[list], Any]:
101
103
  """Make a decoder from an engine field values to a Python value."""
102
104
 
103
105
  src_name_to_idx = {f['name']: i for i, f in enumerate(src_fields)}
106
+
107
+ is_dataclass = dataclasses.is_dataclass(dst_struct_type)
108
+ is_namedtuple = is_namedtuple_type(dst_struct_type)
109
+
110
+ if is_dataclass:
111
+ parameters = inspect.signature(dst_struct_type).parameters
112
+ elif is_namedtuple:
113
+ defaults = getattr(dst_struct_type, '_field_defaults', {})
114
+ parameters = {
115
+ name: inspect.Parameter(
116
+ name=name,
117
+ kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
118
+ default=defaults.get(name, inspect.Parameter.empty),
119
+ annotation=dst_struct_type.__annotations__.get(name, inspect.Parameter.empty)
120
+ )
121
+ for name in dst_struct_type._fields
122
+ }
123
+ else:
124
+ raise ValueError(f"Unsupported struct type: {dst_struct_type}")
125
+
104
126
  def make_closure_for_value(name: str, param: inspect.Parameter) -> Callable[[list], Any]:
105
127
  src_idx = src_name_to_idx.get(name)
106
128
  if src_idx is not None:
@@ -108,7 +130,7 @@ def _make_engine_struct_value_decoder(
108
130
  field_decoder = make_engine_value_decoder(
109
131
  field_path, src_fields[src_idx]['type'], param.annotation)
110
132
  field_path.pop()
111
- return lambda values: field_decoder(values[src_idx])
133
+ return lambda values: field_decoder(values[src_idx]) if len(values) > src_idx else param.default
112
134
 
113
135
  default_value = param.default
114
136
  if default_value is inspect.Parameter.empty:
@@ -119,9 +141,9 @@ def _make_engine_struct_value_decoder(
119
141
 
120
142
  field_value_decoder = [
121
143
  make_closure_for_value(name, param)
122
- for (name, param) in inspect.signature(dst_dataclass_type).parameters.items()]
144
+ for (name, param) in parameters.items()]
123
145
 
124
- return lambda values: dst_dataclass_type(
146
+ return lambda values: dst_struct_type(
125
147
  *(decoder(values) for decoder in field_value_decoder))
126
148
 
127
149
  def dump_engine_object(v: Any) -> Any:
cocoindex/flow.py CHANGED
@@ -19,6 +19,7 @@ from rich.tree import Tree
19
19
  from . import _engine
20
20
  from . import index
21
21
  from . import op
22
+ from . import setting
22
23
  from .convert import dump_engine_object
23
24
  from .typing import encode_enriched_type
24
25
  from .runtime import execution_context
@@ -310,7 +311,7 @@ class _FlowBuilderState:
310
311
 
311
312
  def __init__(self, /, name: str | None = None):
312
313
  flow_name = _flow_name_builder.build_name(name, prefix="_flow_")
313
- self.engine_flow_builder = _engine.FlowBuilder(flow_name)
314
+ self.engine_flow_builder = _engine.FlowBuilder(get_full_flow_name(flow_name))
314
315
  self.field_name_builder = _NameBuilder()
315
316
 
316
317
  def get_data_slice(self, v: Any) -> _engine.DataSlice:
@@ -481,7 +482,7 @@ class Flow:
481
482
  Render the flow spec as a styled rich Tree with hierarchical structure.
482
483
  """
483
484
  spec = self._get_spec(verbose=verbose)
484
- tree = Tree(f"Flow: {self.name}", style="cyan")
485
+ tree = Tree(f"Flow: {self.full_name}", style="cyan")
485
486
 
486
487
  def build_tree(label: str, lines: list):
487
488
  node = Tree(label, style="bold magenta" if lines else "cyan")
@@ -508,9 +509,9 @@ class Flow:
508
509
  return repr(self._lazy_engine_flow())
509
510
 
510
511
  @property
511
- def name(self) -> str:
512
+ def full_name(self) -> str:
512
513
  """
513
- Get the name of the flow.
514
+ Get the full name of the flow.
514
515
  """
515
516
  return self._lazy_engine_flow().name()
516
517
 
@@ -566,8 +567,16 @@ def _create_lazy_flow(name: str | None, fl_def: Callable[[FlowBuilder, DataScope
566
567
  _flows_lock = Lock()
567
568
  _flows: dict[str, Flow] = {}
568
569
 
570
+ def get_full_flow_name(name: str) -> str:
571
+ """
572
+ Get the full name of a flow.
573
+ """
574
+ return f"{setting.get_app_namespace(trailing_delimiter='.')}{name}"
575
+
569
576
  def add_flow_def(name: str, fl_def: Callable[[FlowBuilder, DataScope], None]) -> Flow:
570
577
  """Add a flow definition to the cocoindex library."""
578
+ if not all(c.isalnum() or c == '_' for c in name):
579
+ raise ValueError(f"Flow name '{name}' contains invalid characters. Only alphanumeric characters and underscores are allowed.")
571
580
  with _flows_lock:
572
581
  if name in _flows:
573
582
  raise KeyError(f"Flow with name {name} already exists")
@@ -587,12 +596,12 @@ def flow_names() -> list[str]:
587
596
  with _flows_lock:
588
597
  return list(_flows.keys())
589
598
 
590
- def flows() -> list[Flow]:
599
+ def flows() -> dict[str, Flow]:
591
600
  """
592
601
  Get all flows.
593
602
  """
594
603
  with _flows_lock:
595
- return list(_flows.values())
604
+ return dict(_flows)
596
605
 
597
606
  def flow_by_name(name: str) -> Flow:
598
607
  """
@@ -605,14 +614,13 @@ def ensure_all_flows_built() -> None:
605
614
  """
606
615
  Ensure all flows are built.
607
616
  """
608
- for fl in flows():
609
- fl.internal_flow()
617
+ execution_context.run(ensure_all_flows_built_async())
610
618
 
611
619
  async def ensure_all_flows_built_async() -> None:
612
620
  """
613
621
  Ensure all flows are built.
614
622
  """
615
- for fl in flows():
623
+ for fl in flows().values():
616
624
  await fl.internal_flow_async()
617
625
 
618
626
  def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
@@ -626,13 +634,13 @@ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _
626
634
  Update all flows.
627
635
  """
628
636
  await ensure_all_flows_built_async()
629
- async def _update_flow(fl: Flow) -> _engine.IndexUpdateInfo:
637
+ async def _update_flow(name: str, fl: Flow) -> tuple[str, _engine.IndexUpdateInfo]:
630
638
  async with FlowLiveUpdater(fl, options) as updater:
631
639
  await updater.wait_async()
632
- return updater.update_stats()
640
+ return (name, updater.update_stats())
633
641
  fls = flows()
634
- all_stats = await asyncio.gather(*(_update_flow(fl) for fl in fls))
635
- return {fl.name: stats for fl, stats in zip(fls, all_stats)}
642
+ all_stats = await asyncio.gather(*(_update_flow(name, fl) for (name, fl) in fls.items()))
643
+ return dict(all_stats)
636
644
 
637
645
  _transient_flow_name_builder = _NameBuilder()
638
646
  class TransientFlow:
cocoindex/lib.py CHANGED
@@ -15,6 +15,7 @@ from .convert import dump_engine_object
15
15
  def init(settings: setting.Settings):
16
16
  """Initialize the cocoindex library."""
17
17
  _engine.init(dump_engine_object(settings))
18
+ setting.set_app_namespace(settings.app_namespace)
18
19
 
19
20
 
20
21
  def start_server(settings: setting.ServerSettings):
cocoindex/setting.py CHANGED
@@ -6,6 +6,25 @@ import os
6
6
  from typing import Callable, Self, Any, overload
7
7
  from dataclasses import dataclass
8
8
 
9
+ _app_namespace: str = ''
10
+
11
+ def get_app_namespace(*, trailing_delimiter: str | None = None) -> str:
12
+ """Get the application namespace. Append the `trailing_delimiter` if not empty."""
13
+ if _app_namespace == '' or trailing_delimiter is None:
14
+ return _app_namespace
15
+ return f'{_app_namespace}{trailing_delimiter}'
16
+
17
+ def split_app_namespace(full_name: str, delimiter: str) -> tuple[str, str]:
18
+ """Split the full name into the application namespace and the rest."""
19
+ parts = full_name.split(delimiter, 1)
20
+ if len(parts) == 1:
21
+ return '', parts[0]
22
+ return (parts[0], parts[1])
23
+
24
+ def set_app_namespace(app_namespace: str):
25
+ """Set the application namespace."""
26
+ global _app_namespace # pylint: disable=global-statement
27
+ _app_namespace = app_namespace
9
28
 
10
29
  @dataclass
11
30
  class DatabaseConnectionSpec:
@@ -30,6 +49,7 @@ def _load_field(target: dict[str, Any], name: str, env_name: str, required: bool
30
49
  class Settings:
31
50
  """Settings for the cocoindex library."""
32
51
  database: DatabaseConnectionSpec
52
+ app_namespace: str
33
53
 
34
54
  @classmethod
35
55
  def from_env(cls) -> Self:
@@ -40,7 +60,10 @@ class Settings:
40
60
  _load_field(db_kwargs, "user", "COCOINDEX_DATABASE_USER")
41
61
  _load_field(db_kwargs, "password", "COCOINDEX_DATABASE_PASSWORD")
42
62
  database = DatabaseConnectionSpec(**db_kwargs)
43
- return cls(database=database)
63
+
64
+ app_namespace = os.getenv("COCOINDEX_APP_NAMESPACE", '')
65
+
66
+ return cls(database=database, app_namespace=app_namespace)
44
67
 
45
68
  @dataclass
46
69
  class ServerSettings:
cocoindex/setup.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from . import flow
2
+ from . import setting
2
3
  from . import _engine
3
4
 
4
5
  def sync_setup() -> _engine.SetupStatus:
@@ -7,10 +8,15 @@ def sync_setup() -> _engine.SetupStatus:
7
8
 
8
9
  def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
9
10
  flow.ensure_all_flows_built()
10
- return _engine.drop_setup(flow_names)
11
+ return _engine.drop_setup([flow.get_full_flow_name(name) for name in flow_names])
11
12
 
12
13
  def flow_names_with_setup() -> list[str]:
13
- return _engine.flow_names_with_setup()
14
+ result = []
15
+ for name in _engine.flow_names_with_setup():
16
+ app_namespace, name = setting.split_app_namespace(name, '.')
17
+ if app_namespace == setting.get_app_namespace():
18
+ result.append(name)
19
+ return result
14
20
 
15
21
  def apply_setup_changes(setup_status: _engine.SetupStatus):
16
22
  _engine.apply_setup_changes(setup_status)
cocoindex/sources.py CHANGED
@@ -28,3 +28,16 @@ class GoogleDrive(op.SourceSpec):
28
28
  root_folder_ids: list[str]
29
29
  binary: bool = False
30
30
  recent_changes_poll_interval: datetime.timedelta | None = None
31
+
32
+
33
+ class AmazonS3(op.SourceSpec):
34
+ """Import data from an Amazon S3 bucket. Supports optional prefix and file filtering by glob patterns."""
35
+
36
+ _op_category = op.OpCategory.SOURCE
37
+
38
+ bucket_name: str
39
+ prefix: str | None = None
40
+ binary: bool = False
41
+ included_patterns: list[str] | None = None
42
+ excluded_patterns: list[str] | None = None
43
+ sqs_queue_url: str | None = None
@@ -1,11 +1,12 @@
1
1
  import uuid
2
2
  import datetime
3
3
  from dataclasses import dataclass, make_dataclass
4
+ from typing import NamedTuple, Literal
4
5
  import pytest
5
6
  import cocoindex
6
7
  from cocoindex.typing import encode_enriched_type
7
8
  from cocoindex.convert import encode_engine_value, make_engine_value_decoder
8
- from typing import Literal
9
+
9
10
  @dataclass
10
11
  class Order:
11
12
  order_id: str
@@ -33,6 +34,17 @@ class NestedStruct:
33
34
  orders: list[Order]
34
35
  count: int = 0
35
36
 
37
+ class OrderNamedTuple(NamedTuple):
38
+ order_id: str
39
+ name: str
40
+ price: float
41
+ extra_field: str = "default_extra"
42
+
43
+ class CustomerNamedTuple(NamedTuple):
44
+ name: str
45
+ order: OrderNamedTuple
46
+ tags: list[Tag] | None = None
47
+
36
48
  def build_engine_value_decoder(engine_type_in_py, python_type=None):
37
49
  """
38
50
  Helper to build a converter for the given engine-side type (as represented in Python).
@@ -62,10 +74,16 @@ def test_encode_engine_value_date_time_types():
62
74
  def test_encode_engine_value_struct():
63
75
  order = Order(order_id="O123", name="mixed nuts", price=25.0)
64
76
  assert encode_engine_value(order) == ["O123", "mixed nuts", 25.0, "default_extra"]
77
+
78
+ order_nt = OrderNamedTuple(order_id="O123", name="mixed nuts", price=25.0)
79
+ assert encode_engine_value(order_nt) == ["O123", "mixed nuts", 25.0, "default_extra"]
65
80
 
66
81
  def test_encode_engine_value_list_of_structs():
67
82
  orders = [Order("O1", "item1", 10.0), Order("O2", "item2", 20.0)]
68
83
  assert encode_engine_value(orders) == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]]
84
+
85
+ orders_nt = [OrderNamedTuple("O1", "item1", 10.0), OrderNamedTuple("O2", "item2", 20.0)]
86
+ assert encode_engine_value(orders_nt) == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]]
69
87
 
70
88
  def test_encode_engine_value_struct_with_list():
71
89
  basket = Basket(items=["apple", "banana"])
@@ -74,6 +92,9 @@ def test_encode_engine_value_struct_with_list():
74
92
  def test_encode_engine_value_nested_struct():
75
93
  customer = Customer(name="Alice", order=Order("O1", "item1", 10.0))
76
94
  assert encode_engine_value(customer) == ["Alice", ["O1", "item1", 10.0, "default_extra"], None]
95
+
96
+ customer_nt = CustomerNamedTuple(name="Alice", order=OrderNamedTuple("O1", "item1", 10.0))
97
+ assert encode_engine_value(customer_nt) == ["Alice", ["O1", "item1", 10.0, "default_extra"], None]
77
98
 
78
99
  def test_encode_engine_value_empty_list():
79
100
  assert encode_engine_value([]) == []
@@ -103,20 +124,34 @@ def test_make_engine_value_decoder_basic_types():
103
124
  @pytest.mark.parametrize(
104
125
  "data_type, engine_val, expected",
105
126
  [
106
- # All fields match
127
+ # All fields match (dataclass)
107
128
  (Order, ["O123", "mixed nuts", 25.0, "default_extra"], Order("O123", "mixed nuts", 25.0, "default_extra")),
129
+ # All fields match (NamedTuple)
130
+ (OrderNamedTuple, ["O123", "mixed nuts", 25.0, "default_extra"], OrderNamedTuple("O123", "mixed nuts", 25.0, "default_extra")),
108
131
  # Extra field in engine value (should ignore extra)
109
132
  (Order, ["O123", "mixed nuts", 25.0, "default_extra", "unexpected"], Order("O123", "mixed nuts", 25.0, "default_extra")),
133
+ (OrderNamedTuple, ["O123", "mixed nuts", 25.0, "default_extra", "unexpected"], OrderNamedTuple("O123", "mixed nuts", 25.0, "default_extra")),
110
134
  # Fewer fields in engine value (should fill with default)
111
135
  (Order, ["O123", "mixed nuts", 0.0, "default_extra"], Order("O123", "mixed nuts", 0.0, "default_extra")),
136
+ (OrderNamedTuple, ["O123", "mixed nuts", 0.0, "default_extra"], OrderNamedTuple("O123", "mixed nuts", 0.0, "default_extra")),
112
137
  # More fields in engine value (should ignore extra)
113
138
  (Order, ["O123", "mixed nuts", 25.0, "unexpected"], Order("O123", "mixed nuts", 25.0, "unexpected")),
139
+ (OrderNamedTuple, ["O123", "mixed nuts", 25.0, "unexpected"], OrderNamedTuple("O123", "mixed nuts", 25.0, "unexpected")),
114
140
  # Truly extra field (should ignore the fifth field)
115
141
  (Order, ["O123", "mixed nuts", 25.0, "default_extra", "ignored"], Order("O123", "mixed nuts", 25.0, "default_extra")),
142
+ (OrderNamedTuple, ["O123", "mixed nuts", 25.0, "default_extra", "ignored"], OrderNamedTuple("O123", "mixed nuts", 25.0, "default_extra")),
116
143
  # Missing optional field in engine value (tags=None)
117
144
  (Customer, ["Alice", ["O1", "item1", 10.0, "default_extra"], None], Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), None)),
145
+ (CustomerNamedTuple, ["Alice", ["O1", "item1", 10.0, "default_extra"], None], CustomerNamedTuple("Alice", OrderNamedTuple("O1", "item1", 10.0, "default_extra"), None)),
118
146
  # Extra field in engine value for Customer (should ignore)
119
147
  (Customer, ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"]], "extra"], Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip")])),
148
+ (CustomerNamedTuple, ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"]], "extra"], CustomerNamedTuple("Alice", OrderNamedTuple("O1", "item1", 10.0, "default_extra"), [Tag("vip")])),
149
+ # Missing optional field with default
150
+ (Order, ["O123", "mixed nuts", 25.0], Order("O123", "mixed nuts", 25.0, "default_extra")),
151
+ (OrderNamedTuple, ["O123", "mixed nuts", 25.0], OrderNamedTuple("O123", "mixed nuts", 25.0, "default_extra")),
152
+ # Partial optional fields
153
+ (Customer, ["Alice", ["O1", "item1", 10.0]], Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), None)),
154
+ (CustomerNamedTuple, ["Alice", ["O1", "item1", 10.0]], CustomerNamedTuple("Alice", OrderNamedTuple("O1", "item1", 10.0, "default_extra"), None)),
120
155
  ]
121
156
  )
122
157
  def test_struct_decoder_cases(data_type, engine_val, expected):
@@ -124,17 +159,27 @@ def test_struct_decoder_cases(data_type, engine_val, expected):
124
159
  assert decoder(engine_val) == expected
125
160
 
126
161
  def test_make_engine_value_decoder_collections():
127
- # List of structs
162
+ # List of structs (dataclass)
128
163
  decoder = build_engine_value_decoder(list[Order])
129
164
  engine_val = [
130
165
  ["O1", "item1", 10.0, "default_extra"],
131
166
  ["O2", "item2", 20.0, "default_extra"]
132
167
  ]
133
168
  assert decoder(engine_val) == [Order("O1", "item1", 10.0, "default_extra"), Order("O2", "item2", 20.0, "default_extra")]
169
+
170
+ # List of structs (NamedTuple)
171
+ decoder = build_engine_value_decoder(list[OrderNamedTuple])
172
+ assert decoder(engine_val) == [OrderNamedTuple("O1", "item1", 10.0, "default_extra"), OrderNamedTuple("O2", "item2", 20.0, "default_extra")]
173
+
134
174
  # Struct with list field
135
175
  decoder = build_engine_value_decoder(Customer)
136
176
  engine_val = ["Alice", ["O1", "item1", 10.0, "default_extra"], [["vip"], ["premium"]]]
137
177
  assert decoder(engine_val) == Customer("Alice", Order("O1", "item1", 10.0, "default_extra"), [Tag("vip"), Tag("premium")])
178
+
179
+ # NamedTuple with list field
180
+ decoder = build_engine_value_decoder(CustomerNamedTuple)
181
+ assert decoder(engine_val) == CustomerNamedTuple("Alice", OrderNamedTuple("O1", "item1", 10.0, "default_extra"), [Tag("vip"), Tag("premium")])
182
+
138
183
  # Struct with struct field
139
184
  decoder = build_engine_value_decoder(NestedStruct)
140
185
  engine_val = [
@@ -239,6 +284,13 @@ def test_roundtrip_ltable():
239
284
  assert encoded == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]]
240
285
  decoded = build_engine_value_decoder(t)(encoded)
241
286
  assert decoded == value
287
+
288
+ t_nt = list[OrderNamedTuple]
289
+ value_nt = [OrderNamedTuple("O1", "item1", 10.0), OrderNamedTuple("O2", "item2", 20.0)]
290
+ encoded = encode_engine_value(value_nt)
291
+ assert encoded == [["O1", "item1", 10.0, "default_extra"], ["O2", "item2", 20.0, "default_extra"]]
292
+ decoded = build_engine_value_decoder(t_nt)(encoded)
293
+ assert decoded == value_nt
242
294
 
243
295
  def test_roundtrip_ktable_str_key():
244
296
  t = dict[str, Order]
@@ -247,6 +299,13 @@ def test_roundtrip_ktable_str_key():
247
299
  assert encoded == [["K1", "O1", "item1", 10.0, "default_extra"], ["K2", "O2", "item2", 20.0, "default_extra"]]
248
300
  decoded = build_engine_value_decoder(t)(encoded)
249
301
  assert decoded == value
302
+
303
+ t_nt = dict[str, OrderNamedTuple]
304
+ value_nt = {"K1": OrderNamedTuple("O1", "item1", 10.0), "K2": OrderNamedTuple("O2", "item2", 20.0)}
305
+ encoded = encode_engine_value(value_nt)
306
+ assert encoded == [["K1", "O1", "item1", 10.0, "default_extra"], ["K2", "O2", "item2", 20.0, "default_extra"]]
307
+ decoded = build_engine_value_decoder(t_nt)(encoded)
308
+ assert decoded == value_nt
250
309
 
251
310
  def test_roundtrip_ktable_struct_key():
252
311
  @dataclass(frozen=True)
@@ -261,6 +320,14 @@ def test_roundtrip_ktable_struct_key():
261
320
  [["B", 4], "O2", "item2", 20.0, "default_extra"]]
262
321
  decoded = build_engine_value_decoder(t)(encoded)
263
322
  assert decoded == value
323
+
324
+ t_nt = dict[OrderKey, OrderNamedTuple]
325
+ value_nt = {OrderKey("A", 3): OrderNamedTuple("O1", "item1", 10.0), OrderKey("B", 4): OrderNamedTuple("O2", "item2", 20.0)}
326
+ encoded = encode_engine_value(value_nt)
327
+ assert encoded == [[["A", 3], "O1", "item1", 10.0, "default_extra"],
328
+ [["B", 4], "O2", "item2", 20.0, "default_extra"]]
329
+ decoded = build_engine_value_decoder(t_nt)(encoded)
330
+ assert decoded == value_nt
264
331
 
265
332
  IntVectorType = cocoindex.Vector[int, Literal[5]]
266
333
  def test_vector_as_vector() -> None:
cocoindex/typing.py CHANGED
@@ -56,8 +56,11 @@ KEY_FIELD_NAME = '_key'
56
56
 
57
57
  ElementType = type | tuple[type, type]
58
58
 
59
+ def is_namedtuple_type(t) -> bool:
60
+ return isinstance(t, type) and issubclass(t, tuple) and hasattr(t, "_fields")
61
+
59
62
  def _is_struct_type(t) -> bool:
60
- return isinstance(t, type) and dataclasses.is_dataclass(t)
63
+ return isinstance(t, type) and (dataclasses.is_dataclass(t) or is_namedtuple_type(t))
61
64
 
62
65
  @dataclasses.dataclass
63
66
  class AnalyzedTypeInfo:
@@ -69,7 +72,7 @@ class AnalyzedTypeInfo:
69
72
  elem_type: ElementType | None # For Vector and Table
70
73
 
71
74
  key_type: type | None # For element of KTable
72
- dataclass_type: type | None # For Struct
75
+ struct_type: type | None # For Struct, a dataclass or namedtuple
73
76
 
74
77
  attrs: dict[str, Any] | None
75
78
  nullable: bool = False
@@ -117,15 +120,16 @@ def analyze_type_info(t) -> AnalyzedTypeInfo:
117
120
  elif isinstance(attr, TypeKind):
118
121
  kind = attr.kind
119
122
 
120
- dataclass_type = None
123
+ struct_type = None
121
124
  elem_type = None
122
125
  key_type = None
123
126
  if _is_struct_type(t):
127
+ struct_type = t
128
+
124
129
  if kind is None:
125
130
  kind = 'Struct'
126
131
  elif kind != 'Struct':
127
132
  raise ValueError(f"Unexpected type kind for struct: {kind}")
128
- dataclass_type = t
129
133
  elif base_type is collections.abc.Sequence or base_type is list:
130
134
  args = typing.get_args(t)
131
135
  elem_type = args[0]
@@ -167,36 +171,50 @@ def analyze_type_info(t) -> AnalyzedTypeInfo:
167
171
  else:
168
172
  raise ValueError(f"type unsupported yet: {t}")
169
173
 
170
- return AnalyzedTypeInfo(kind=kind, vector_info=vector_info,
171
- elem_type=elem_type, key_type=key_type, dataclass_type=dataclass_type,
172
- attrs=attrs, nullable=nullable)
173
-
174
- def _encode_fields_schema(dataclass_type: type, key_type: type | None = None) -> list[dict[str, Any]]:
174
+ return AnalyzedTypeInfo(
175
+ kind=kind,
176
+ vector_info=vector_info,
177
+ elem_type=elem_type,
178
+ key_type=key_type,
179
+ struct_type=struct_type,
180
+ attrs=attrs,
181
+ nullable=nullable,
182
+ )
183
+
184
+ def _encode_fields_schema(struct_type: type, key_type: type | None = None) -> list[dict[str, Any]]:
175
185
  result = []
176
186
  def add_field(name: str, t) -> None:
177
187
  try:
178
188
  type_info = encode_enriched_type_info(analyze_type_info(t))
179
189
  except ValueError as e:
180
- e.add_note(f"Failed to encode annotation for field - "
181
- f"{dataclass_type.__name__}.{name}: {t}")
190
+ e.add_note(
191
+ f"Failed to encode annotation for field - "
192
+ f"{struct_type.__name__}.{name}: {t}"
193
+ )
182
194
  raise
183
195
  type_info['name'] = name
184
196
  result.append(type_info)
185
197
 
186
198
  if key_type is not None:
187
199
  add_field(KEY_FIELD_NAME, key_type)
188
- for field in dataclasses.fields(dataclass_type):
189
- add_field(field.name, field.type)
200
+
201
+ if dataclasses.is_dataclass(struct_type):
202
+ for field in dataclasses.fields(struct_type):
203
+ add_field(field.name, field.type)
204
+ elif is_namedtuple_type(struct_type):
205
+ for name, field_type in struct_type.__annotations__.items():
206
+ add_field(name, field_type)
207
+
190
208
  return result
191
209
 
192
210
  def _encode_type(type_info: AnalyzedTypeInfo) -> dict[str, Any]:
193
211
  encoded_type: dict[str, Any] = { 'kind': type_info.kind }
194
212
 
195
213
  if type_info.kind == 'Struct':
196
- if type_info.dataclass_type is None:
197
- raise ValueError("Struct type must have a dataclass type")
198
- encoded_type['fields'] = _encode_fields_schema(type_info.dataclass_type, type_info.key_type)
199
- if doc := inspect.getdoc(type_info.dataclass_type):
214
+ if type_info.struct_type is None:
215
+ raise ValueError("Struct type must have a dataclass or namedtuple type")
216
+ encoded_type['fields'] = _encode_fields_schema(type_info.struct_type, type_info.key_type)
217
+ if doc := inspect.getdoc(type_info.struct_type):
200
218
  encoded_type['description'] = doc
201
219
 
202
220
  elif type_info.kind == 'Vector':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.36
3
+ Version: 0.1.38
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -148,12 +148,13 @@ It defines an index flow like this:
148
148
  | [Code Embedding](examples/code_embedding) | Index code embeddings for semantic search |
149
149
  | [PDF Embedding](examples/pdf_embedding) | Parse PDF and index text embeddings for semantic search |
150
150
  | [Manuals LLM Extraction](examples/manuals_llm_extraction) | Extract structured information from a manual using LLM |
151
+ | [Amazon S3 Embedding](examples/amazon_s3_embedding) | Index text documents from Amazon S3 |
151
152
  | [Google Drive Text Embedding](examples/gdrive_text_embedding) | Index text documents from Google Drive |
152
153
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
153
154
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
154
155
  | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
155
156
  | [Product_Taxonomy_Knowledge_Graph](examples/product_taxonomy_knowledge_graph) | Build knowledge graph for product recommendations |
156
- | [Image Search with Vision API](examples/image_search_example) | Generates detailed captions for images using a vision model, embeds them, enables semantic search via FastAPI and served on a React frontend.|
157
+ | [Image Search with Vision API](examples/image_search_example) | Generates detailed captions for images using a vision model, embeds them, enables live-updating semantic search via FastAPI and served on a React frontend|
157
158
 
158
159
  More coming and stay tuned 👀!
159
160
 
@@ -0,0 +1,25 @@
1
+ cocoindex-0.1.38.dist-info/METADATA,sha256=d1qjWo7V_MjeUiBe4zIOyqCyg8gM7-DaUDbKPSIf3_Q,9793
2
+ cocoindex-0.1.38.dist-info/WHEEL,sha256=p_tvkyHH2UmMBrR2Gemb1ahXJMM2SXUIsCLrWZgJvB8,104
3
+ cocoindex-0.1.38.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ cocoindex/__init__.py,sha256=LpB0VjGvkD1beio8R9RCT6PI3eU0keV-3sBL45fHTQE,690
5
+ cocoindex/_engine.cpython-311-darwin.so,sha256=Al34X1flIWqzdu6cu_UjO34tUA0SHYm4xuSKymvq2-0,56753664
6
+ cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
7
+ cocoindex/cli.py,sha256=Ac3ybnQW-HGVGJeUwIOHd1qhjs0KC5wCsemWuyouEfU,8999
8
+ cocoindex/convert.py,sha256=tRY-QBeeFMFwCYiRk7a0_tuDqopw8iqBpg_Aswcq9JQ,6864
9
+ cocoindex/flow.py,sha256=r1GnRIthmkniJEsAxGsYlDXbcf7ydMwZy1qJEKzwtqc,23814
10
+ cocoindex/functions.py,sha256=F79dNmGE127LaU67kF5Oqtf_tIzebFQH7MkyceMX4-s,1830
11
+ cocoindex/index.py,sha256=LssEOuZi6AqhwKtZM3QFeQpa9T-0ELi8G5DsrYKECvc,534
12
+ cocoindex/lib.py,sha256=OqTMuOHicdyX9PRA7fmTzznK8HZMrzxpUDbqxAEF--Q,2383
13
+ cocoindex/llm.py,sha256=_3rtahuKcqcEHPkFSwhXOSrekZyGxVApPoYtlU_chcA,348
14
+ cocoindex/op.py,sha256=OGYRYl7gPa7X7iSU30iTrCzvqRBu7jQqfvN4vjG__dA,10730
15
+ cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
17
+ cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
18
+ cocoindex/setting.py,sha256=AaIMclEktbBgK7Cks2D8LfS1cskf8UUcbSb6UBLdoSs,3260
19
+ cocoindex/setup.py,sha256=ErNtX08NfFOFKehp5qGUvCx8Wiz9f3gmzvfBhAqrQyI,745
20
+ cocoindex/sources.py,sha256=7lpwYLsFCRfbURKf79Vu0JZZoXjAYY0DxNHzUb-VHBY,1327
21
+ cocoindex/storages.py,sha256=MFMsfyOCYMggTWeWrOi82miqOXQmiUuqq828x5htBr0,2207
22
+ cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
+ cocoindex/tests/test_convert.py,sha256=7jc--I3frrg7DB5MPr4JFzE7DSCznJuWyHdlDLQJ_fM,15516
24
+ cocoindex/typing.py,sha256=369ABRtnpbaVSQVIBc2ZDutXW8jUmncvNJd9CHEWT3Q,8962
25
+ cocoindex-0.1.38.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.8.4)
2
+ Generator: maturin (1.8.6)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-cp311-macosx_11_0_arm64
@@ -1,25 +0,0 @@
1
- cocoindex-0.1.36.dist-info/METADATA,sha256=YH6UMfZ-m_ako8_9VXinagNN6kD6iVEh94Pv9Ua-C3U,9686
2
- cocoindex-0.1.36.dist-info/WHEEL,sha256=M0oGXcMDUVEBxvyDRZ1SJRlU2WxAfG7DBwXO4GUZt1Q,104
3
- cocoindex-0.1.36.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
- cocoindex/__init__.py,sha256=LpB0VjGvkD1beio8R9RCT6PI3eU0keV-3sBL45fHTQE,690
5
- cocoindex/_engine.cpython-311-darwin.so,sha256=OD7YGKys5qje2tDaT8QmDRRSkLAPe7hJeQa5SGqhcC0,49748544
6
- cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
7
- cocoindex/cli.py,sha256=QdZjgnABuDQfy6JiAxeAJiQMI5FNT9FQGLiYAUtLMw8,8923
8
- cocoindex/convert.py,sha256=mBUTa_Ag39_ut-yE_jc1wqS3zLjtOm6QKet-bqJ-RWc,5947
9
- cocoindex/flow.py,sha256=MZZ0Uf0ObAzR1yIjUecRgA-U0t__95eoLBK_DxwwLnk,23375
10
- cocoindex/functions.py,sha256=F79dNmGE127LaU67kF5Oqtf_tIzebFQH7MkyceMX4-s,1830
11
- cocoindex/index.py,sha256=LssEOuZi6AqhwKtZM3QFeQpa9T-0ELi8G5DsrYKECvc,534
12
- cocoindex/lib.py,sha256=812GB8Z-2PyjG73Odvw5jtNBLnoeU9aOh9s2ZnETKa8,2329
13
- cocoindex/llm.py,sha256=_3rtahuKcqcEHPkFSwhXOSrekZyGxVApPoYtlU_chcA,348
14
- cocoindex/op.py,sha256=OGYRYl7gPa7X7iSU30iTrCzvqRBu7jQqfvN4vjG__dA,10730
15
- cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
17
- cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
18
- cocoindex/setting.py,sha256=pms1blwlXIOqZIpye-rfiwzqYUCAC8oEL7mQM5A160g,2356
19
- cocoindex/setup.py,sha256=AQLbtBLuJX066IANS7BGp20246mAGQ_4Z0W6MVJcQzY,481
20
- cocoindex/sources.py,sha256=wZFU8lwSXjyofJR-syySH9fTyPnBlAPJ6-1hQNX8fGA,936
21
- cocoindex/storages.py,sha256=MFMsfyOCYMggTWeWrOi82miqOXQmiUuqq828x5htBr0,2207
22
- cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
- cocoindex/tests/test_convert.py,sha256=WPRKp0jv_uSEM81RGWEAmsax-J-FtXt90mZ0yEnvGLs,11236
24
- cocoindex/typing.py,sha256=BI2vPw4Iu4S3aznNJQrfM2LZU_weGYASTXF1W3ZWh_Y,8568
25
- cocoindex-0.1.36.dist-info/RECORD,,