cocoindex 0.1.33__cp311-cp311-macosx_11_0_arm64.whl → 0.1.35__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/__init__.py CHANGED
@@ -4,7 +4,7 @@ Cocoindex is a framework for building and running indexing pipelines.
4
4
  from . import functions, query, sources, storages, cli
5
5
  from .flow import FlowBuilder, DataScope, DataSlice, Flow, flow_def
6
6
  from .flow import EvaluateAndDumpOptions, GeneratedField
7
- from .flow import update_all_flows, FlowLiveUpdater, FlowLiveUpdaterOptions
7
+ from .flow import update_all_flows_async, FlowLiveUpdater, FlowLiveUpdaterOptions
8
8
  from .llm import LlmSpec, LlmApiType
9
9
  from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
10
10
  from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
Binary file
cocoindex/cli.py CHANGED
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import click
3
2
  import datetime
4
3
 
@@ -7,7 +6,6 @@ from rich.table import Table
7
6
 
8
7
  from . import flow, lib, setting
9
8
  from .setup import sync_setup, drop_setup, flow_names_with_setup, apply_setup_changes
10
- from .runtime import execution_context
11
9
 
12
10
  @click.group()
13
11
  def cli():
@@ -55,16 +53,17 @@ def ls(show_all: bool):
55
53
 
56
54
  @cli.command()
57
55
  @click.argument("flow_name", type=str, required=False)
58
- @click.option("--color/--no-color", default=True)
59
- def show(flow_name: str | None, color: bool):
56
+ @click.option("--color/--no-color", default=True, help="Enable or disable colored output.")
57
+ @click.option("--verbose", is_flag=True, help="Show verbose output with full details.")
58
+ def show(flow_name: str | None, color: bool, verbose: bool):
60
59
  """
61
- Show the flow spec in a readable format with colored output,
62
- including the schema.
60
+ Show the flow spec and schema in a readable format with colored output.
63
61
  """
64
62
  flow = _flow_by_name(flow_name)
65
63
  console = Console(no_color=not color)
66
- console.print(flow._render_text())
64
+ console.print(flow._render_spec(verbose=verbose))
67
65
 
66
+ console.print()
68
67
  table = Table(
69
68
  title=f"Schema for Flow: {flow.name}",
70
69
  show_header=True,
@@ -74,7 +73,7 @@ def show(flow_name: str | None, color: bool):
74
73
  table.add_column("Type", style="green")
75
74
  table.add_column("Attributes", style="yellow")
76
75
 
77
- for field_name, field_type, attr_str in flow._render_schema():
76
+ for field_name, field_type, attr_str in flow._get_schema():
78
77
  table.add_row(field_name, field_type, attr_str)
79
78
 
80
79
  console.print(table)
@@ -85,15 +84,15 @@ def setup():
85
84
  Check and apply backend setup changes for flows, including the internal and target storage
86
85
  (to export).
87
86
  """
88
- status_check = sync_setup()
89
- click.echo(status_check)
90
- if status_check.is_up_to_date():
87
+ setup_status = sync_setup()
88
+ click.echo(setup_status)
89
+ if setup_status.is_up_to_date():
91
90
  click.echo("No changes need to be pushed.")
92
91
  return
93
92
  if not click.confirm(
94
93
  "Changes need to be pushed. Continue? [yes/N]", default=False, show_default=False):
95
94
  return
96
- apply_setup_changes(status_check)
95
+ apply_setup_changes(setup_status)
97
96
 
98
97
  @cli.command()
99
98
  @click.argument("flow_name", type=str, nargs=-1)
@@ -112,15 +111,15 @@ def drop(flow_name: tuple[str, ...], drop_all: bool):
112
111
  flow_names = [fl.name for fl in flow.flows()]
113
112
  else:
114
113
  flow_names = list(flow_name)
115
- status_check = drop_setup(flow_names)
116
- click.echo(status_check)
117
- if status_check.is_up_to_date():
114
+ setup_status = drop_setup(flow_names)
115
+ click.echo(setup_status)
116
+ if setup_status.is_up_to_date():
118
117
  click.echo("No flows need to be dropped.")
119
118
  return
120
119
  if not click.confirm(
121
120
  "Changes need to be pushed. Continue? [yes/N]", default=False, show_default=False):
122
121
  return
123
- apply_setup_changes(status_check)
122
+ apply_setup_changes(setup_status)
124
123
 
125
124
  @cli.command()
126
125
  @click.argument("flow_name", type=str, required=False)
@@ -135,13 +134,12 @@ def update(flow_name: str | None, live: bool, quiet: bool):
135
134
  Update the index to reflect the latest data from data sources.
136
135
  """
137
136
  options = flow.FlowLiveUpdaterOptions(live_mode=live, print_stats=not quiet)
138
- async def _update():
139
- if flow_name is None:
140
- await flow.update_all_flows(options)
141
- else:
142
- updater = await flow.FlowLiveUpdater.create(_flow_by_name(flow_name), options)
143
- await updater.wait()
144
- execution_context.run(_update())
137
+ if flow_name is None:
138
+ return flow.update_all_flows(options)
139
+ else:
140
+ with flow.FlowLiveUpdater(_flow_by_name(flow_name), options) as updater:
141
+ updater.wait()
142
+ return updater.update_stats()
145
143
 
146
144
  @cli.command()
147
145
  @click.argument("flow_name", type=str, required=False)
@@ -216,7 +214,7 @@ def server(address: str | None, live_update: bool, quiet: bool, cors_origin: str
216
214
 
217
215
  if live_update:
218
216
  options = flow.FlowLiveUpdaterOptions(live_mode=True, print_stats=not quiet)
219
- execution_context.run(flow.update_all_flows(options))
217
+ flow.update_all_flows(options)
220
218
  if COCOINDEX_HOST in cors_origins:
221
219
  click.echo(f"Open CocoInsight at: {COCOINDEX_HOST}/cocoinsight")
222
220
  input("Press Enter to stop...")
cocoindex/flow.py CHANGED
@@ -8,14 +8,13 @@ import asyncio
8
8
  import re
9
9
  import inspect
10
10
  import datetime
11
- import json
12
11
 
13
12
  from typing import Any, Callable, Sequence, TypeVar
14
13
  from threading import Lock
15
14
  from enum import Enum
16
15
  from dataclasses import dataclass
17
16
  from rich.text import Text
18
- from rich.console import Console
17
+ from rich.tree import Tree
19
18
 
20
19
  from . import _engine
21
20
  from . import index
@@ -161,6 +160,9 @@ class DataSlice:
161
160
  """
162
161
  Apply a function to the data slice.
163
162
  """
163
+ if not isinstance(fn_spec, op.FunctionSpec):
164
+ raise ValueError("transform() can only be called on a CocoIndex function")
165
+
164
166
  transform_args: list[tuple[Any, str | None]]
165
167
  transform_args = [(self._state.engine_data_slice, None)]
166
168
  transform_args += [(self._state.flow_builder_state.get_data_slice(v), None) for v in args]
@@ -280,6 +282,9 @@ class DataCollector:
280
282
 
281
283
  `vector_index` is for backward compatibility only. Please use `vector_indexes` instead.
282
284
  """
285
+ if not isinstance(target_spec, op.StorageSpec):
286
+ raise ValueError("export() can only be called on a CocoIndex target storage")
287
+
283
288
  # For backward compatibility only.
284
289
  if len(vector_indexes) == 0 and len(vector_index) > 0:
285
290
  vector_indexes = [index.VectorIndexDef(field_name=field_name, metric=metric)
@@ -343,8 +348,10 @@ class FlowBuilder:
343
348
  refresh_interval: datetime.timedelta | None = None,
344
349
  ) -> DataSlice:
345
350
  """
346
- Add a source to the flow.
351
+ Import a source to the flow.
347
352
  """
353
+ if not isinstance(spec, op.SourceSpec):
354
+ raise ValueError("add_source() can only be called on a CocoIndex source")
348
355
  return _create_data_slice(
349
356
  self._state,
350
357
  lambda target_scope, name: self._state.engine_flow_builder.add_source(
@@ -376,56 +383,71 @@ class FlowLiveUpdater:
376
383
  """
377
384
  A live updater for a flow.
378
385
  """
379
- _engine_live_updater: _engine.FlowLiveUpdater
380
-
381
- def __init__(self, arg: Flow | _engine.FlowLiveUpdater, options: FlowLiveUpdaterOptions | None = None):
382
- if isinstance(arg, _engine.FlowLiveUpdater):
383
- self._engine_live_updater = arg
384
- else:
385
- self._engine_live_updater = execution_context.run(_engine.FlowLiveUpdater(
386
- arg.internal_flow(), dump_engine_object(options or FlowLiveUpdaterOptions())))
386
+ _flow: Flow
387
+ _options: FlowLiveUpdaterOptions
388
+ _engine_live_updater: _engine.FlowLiveUpdater | None = None
387
389
 
388
- @staticmethod
389
- async def create(fl: Flow, options: FlowLiveUpdaterOptions | None = None) -> FlowLiveUpdater:
390
- """
391
- Create a live updater for a flow.
392
- """
393
- engine_live_updater = await _engine.FlowLiveUpdater.create(
394
- await fl.ainternal_flow(),
395
- dump_engine_object(options or FlowLiveUpdaterOptions()))
396
- return FlowLiveUpdater(engine_live_updater)
390
+ def __init__(self, fl: Flow, options: FlowLiveUpdaterOptions | None = None):
391
+ self._flow = fl
392
+ self._options = options or FlowLiveUpdaterOptions()
397
393
 
398
394
  def __enter__(self) -> FlowLiveUpdater:
395
+ self.start()
399
396
  return self
400
397
 
401
398
  def __exit__(self, exc_type, exc_value, traceback):
402
399
  self.abort()
403
- execution_context.run(self.wait())
400
+ self.wait()
404
401
 
405
402
  async def __aenter__(self) -> FlowLiveUpdater:
403
+ await self.start_async()
406
404
  return self
407
405
 
408
406
  async def __aexit__(self, exc_type, exc_value, traceback):
409
407
  self.abort()
410
- await self.wait()
408
+ await self.wait_async()
411
409
 
412
- async def wait(self) -> None:
410
+ def start(self) -> None:
411
+ """
412
+ Start the live updater.
413
+ """
414
+ execution_context.run(self.start_async())
415
+
416
+ async def start_async(self) -> None:
417
+ """
418
+ Start the live updater.
419
+ """
420
+ self._engine_live_updater = await _engine.FlowLiveUpdater.create(
421
+ await self._flow.internal_flow_async(), dump_engine_object(self._options))
422
+
423
+ def wait(self) -> None:
413
424
  """
414
425
  Wait for the live updater to finish.
415
426
  """
416
- await self._engine_live_updater.wait()
427
+ execution_context.run(self.wait_async())
428
+
429
+ async def wait_async(self) -> None:
430
+ """
431
+ Wait for the live updater to finish. Async version.
432
+ """
433
+ await self._get_engine_live_updater().wait()
417
434
 
418
435
  def abort(self) -> None:
419
436
  """
420
437
  Abort the live updater.
421
438
  """
422
- self._engine_live_updater.abort()
439
+ self._get_engine_live_updater().abort()
423
440
 
424
441
  def update_stats(self) -> _engine.IndexUpdateInfo:
425
442
  """
426
443
  Get the index update info.
427
444
  """
428
- return self._engine_live_updater.index_update_info()
445
+ return self._get_engine_live_updater().index_update_info()
446
+
447
+ def _get_engine_live_updater(self) -> _engine.FlowLiveUpdater:
448
+ if self._engine_live_updater is None:
449
+ raise RuntimeError("Live updater is not started")
450
+ return self._engine_live_updater
429
451
 
430
452
 
431
453
  @dataclass
@@ -454,61 +476,33 @@ class Flow:
454
476
  return engine_flow
455
477
  self._lazy_engine_flow = _lazy_engine_flow
456
478
 
457
- def _format_flow(self, flow_dict: dict) -> Text:
458
- output = Text()
479
+ def _render_spec(self, verbose: bool = False) -> Tree:
480
+ """
481
+ Render the flow spec as a styled rich Tree with hierarchical structure.
482
+ """
483
+ spec = self._get_spec(verbose=verbose)
484
+ tree = Tree(f"Flow: {self.name}", style="cyan")
459
485
 
460
- def add_line(content, indent=0, style=None, end="\n"):
461
- output.append(" " * indent)
462
- output.append(content, style=style)
463
- output.append(end)
486
+ def build_tree(label: str, lines: list):
487
+ node = Tree(label, style="bold magenta" if lines else "cyan")
488
+ for line in lines:
489
+ child_node = node.add(Text(line.content, style="yellow"))
490
+ child_node.children = build_tree("", line.children).children
491
+ return node
464
492
 
465
- def format_key_value(key, value, indent):
466
- if isinstance(value, (dict, list)):
467
- add_line(f"- {key}:", indent, style="green")
468
- format_data(value, indent + 2)
469
- else:
470
- add_line(f"- {key}:", indent, style="green", end="")
471
- add_line(f" {value}", style="yellow")
472
-
473
- def format_data(data, indent=0):
474
- if isinstance(data, dict):
475
- for key, value in data.items():
476
- format_key_value(key, value, indent)
477
- elif isinstance(data, list):
478
- for i, item in enumerate(data):
479
- format_key_value(f"[{i}]", item, indent)
480
- else:
481
- add_line(str(data), indent, style="yellow")
482
-
483
- # Header
484
- flow_name = flow_dict.get("name", "Unnamed")
485
- add_line(f"Flow: {flow_name}", style="bold cyan")
486
-
487
- # Section
488
- for section_title, section_key in [
489
- ("Sources:", "import_ops"),
490
- ("Processing:", "reactive_ops"),
491
- ("Targets:", "export_ops"),
492
- ]:
493
- add_line("")
494
- add_line(section_title, style="bold cyan")
495
- format_data(flow_dict.get(section_key, []), indent=0)
496
-
497
- return output
498
-
499
- def _render_text(self) -> Text:
500
- flow_spec_str = str(self._lazy_engine_flow())
501
- try:
502
- flow_dict = json.loads(flow_spec_str)
503
- return self._format_flow(flow_dict)
504
- except json.JSONDecodeError:
505
- return Text(flow_spec_str)
493
+ for section, lines in spec.sections:
494
+ section_node = build_tree(f"{section}:", lines)
495
+ tree.children.append(section_node)
496
+ return tree
497
+
498
+ def _get_spec(self, verbose: bool = False) -> list[tuple[str, str, int]]:
499
+ return self._lazy_engine_flow().get_spec(output_mode="verbose" if verbose else "concise")
506
500
 
507
- def _render_schema(self) -> list[tuple[str, str, str]]:
501
+ def _get_schema(self) -> list[tuple[str, str, str]]:
508
502
  return self._lazy_engine_flow().get_schema()
509
503
 
510
504
  def __str__(self):
511
- return str(self._render_text())
505
+ return str(self._get_spec())
512
506
 
513
507
  def __repr__(self):
514
508
  return repr(self._lazy_engine_flow())
@@ -520,13 +514,20 @@ class Flow:
520
514
  """
521
515
  return self._lazy_engine_flow().name()
522
516
 
523
- async def update(self) -> _engine.IndexUpdateInfo:
517
+ def update(self) -> _engine.IndexUpdateInfo:
524
518
  """
525
519
  Update the index defined by the flow.
526
- Once the function returns, the indice is fresh up to the moment when the function is called.
520
+ Once the function returns, the index is fresh up to the moment when the function is called.
521
+ """
522
+ return execution_context.run(self.update_async())
523
+
524
+ async def update_async(self) -> _engine.IndexUpdateInfo:
527
525
  """
528
- updater = await FlowLiveUpdater.create(self, FlowLiveUpdaterOptions(live_mode=False))
529
- await updater.wait()
526
+ Update the index defined by the flow.
527
+ Once the function returns, the index is fresh up to the moment when the function is called.
528
+ """
529
+ updater = await FlowLiveUpdater.create_async(self, FlowLiveUpdaterOptions(live_mode=False))
530
+ await updater.wait_async()
530
531
  return updater.update_stats()
531
532
 
532
533
  def evaluate_and_dump(self, options: EvaluateAndDumpOptions):
@@ -541,7 +542,7 @@ class Flow:
541
542
  """
542
543
  return self._lazy_engine_flow()
543
544
 
544
- async def ainternal_flow(self) -> _engine.Flow:
545
+ async def internal_flow_async(self) -> _engine.Flow:
545
546
  """
546
547
  Get the engine flow. The async version.
547
548
  """
@@ -607,22 +608,28 @@ def ensure_all_flows_built() -> None:
607
608
  for fl in flows():
608
609
  fl.internal_flow()
609
610
 
610
- async def aensure_all_flows_built() -> None:
611
+ async def ensure_all_flows_built_async() -> None:
611
612
  """
612
613
  Ensure all flows are built.
613
614
  """
614
615
  for fl in flows():
615
- await fl.ainternal_flow()
616
+ await fl.internal_flow_async()
616
617
 
617
- async def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
618
+ def update_all_flows(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
618
619
  """
619
620
  Update all flows.
620
621
  """
621
- await aensure_all_flows_built()
622
+ return execution_context.run(update_all_flows_async(options))
623
+
624
+ async def update_all_flows_async(options: FlowLiveUpdaterOptions) -> dict[str, _engine.IndexUpdateInfo]:
625
+ """
626
+ Update all flows.
627
+ """
628
+ await ensure_all_flows_built_async()
622
629
  async def _update_flow(fl: Flow) -> _engine.IndexUpdateInfo:
623
- updater = await FlowLiveUpdater.create(fl, options)
624
- await updater.wait()
625
- return updater.update_stats()
630
+ async with FlowLiveUpdater(fl, options) as updater:
631
+ await updater.wait_async()
632
+ return updater.update_stats()
626
633
  fls = flows()
627
634
  all_stats = await asyncio.gather(*(_update_flow(fl) for fl in fls))
628
635
  return {fl.name: stats for fl, stats in zip(fls, all_stats)}
cocoindex/setup.py CHANGED
@@ -1,16 +1,16 @@
1
1
  from . import flow
2
2
  from . import _engine
3
3
 
4
- def sync_setup() -> _engine.SetupStatusCheck:
4
+ def sync_setup() -> _engine.SetupStatus:
5
5
  flow.ensure_all_flows_built()
6
6
  return _engine.sync_setup()
7
7
 
8
- def drop_setup(flow_names: list[str]) -> _engine.SetupStatusCheck:
8
+ def drop_setup(flow_names: list[str]) -> _engine.SetupStatus:
9
9
  flow.ensure_all_flows_built()
10
10
  return _engine.drop_setup(flow_names)
11
11
 
12
12
  def flow_names_with_setup() -> list[str]:
13
13
  return _engine.flow_names_with_setup()
14
14
 
15
- def apply_setup_changes(status_check: _engine.SetupStatusCheck):
16
- _engine.apply_setup_changes(status_check)
15
+ def apply_setup_changes(setup_status: _engine.SetupStatus):
16
+ _engine.apply_setup_changes(setup_status)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cocoindex
3
- Version: 0.1.33
3
+ Version: 0.1.35
4
4
  Requires-Dist: sentence-transformers>=3.3.1
5
5
  Requires-Dist: click>=8.1.8
6
6
  Requires-Dist: rich>=14.0.0
@@ -28,44 +28,74 @@ Project-URL: Homepage, https://cocoindex.io/
28
28
  [![PyPI version](https://img.shields.io/pypi/v/cocoindex?color=5B5BD6)](https://pypi.org/project/cocoindex/)
29
29
  [![PyPI - Downloads](https://img.shields.io/pypi/dm/cocoindex)](https://pypistats.org/packages/cocoindex)
30
30
 
31
- <!-- [![Python](https://img.shields.io/badge/python-3.11%20to%203.13-5B5BD6?logo=python&logoColor=white)](https://www.python.org/) -->
32
31
  [![CI](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/CI.yml)
33
32
  [![release](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml/badge.svg?event=push&color=5B5BD6)](https://github.com/cocoindex-io/cocoindex/actions/workflows/release.yml)
34
33
  [![Discord](https://img.shields.io/discord/1314801574169673738?logo=discord&color=5B5BD6&logoColor=white)](https://discord.com/invite/zpA9S2DR7s)
35
- <!--[![LinkedIn](https://img.shields.io/badge/LinkedIn-CocoIndex-5B5BD6?logo=linkedin&logoColor=white)](https://www.linkedin.com/company/cocoindex) -->
36
- <!--[![X (Twitter)](https://img.shields.io/twitter/follow/cocoindex_io)](https://twitter.com/intent/follow?screen_name=cocoindex_io) -->
37
-
38
34
  </div>
39
35
 
40
- CocoIndex is the world's first open-source engine that supports both custom transformation logic and incremental updates specialized for data indexing.
36
+ **CocoIndex** is an ultra performant data transformation framework, with its core engine written in Rust. The problem it tries to solve is to make it easy to prepare fresh data for AI - either creating embedding, building knowledge graphs, or performing other data transformations - and take real-time data pipelines beyond traditional SQL.
37
+
38
+ <p align="center">
39
+ <img src="https://cocoindex.io/images/cocoindex-features.png" alt="CocoIndex Features" width="500">
40
+ </p>
41
+
42
+ The philosophy is to have the framework handle the source updates, and having developers only worry about defining a series of data transformation, inspired by spreadsheet.
43
+
44
+ ## Dataflow programming
45
+ Unlike a workflow orchestration framework where data is usually opaque, in CocoIndex, data and data operations are first class citizens. CocoIndex follows the idea of [Dataflow](https://en.wikipedia.org/wiki/Dataflow_programming) programming model. Each transformation creates a new field solely based on input fields, without hidden states and value mutation. All data before/after each transformation is observable, with lineage out of the box.
46
+
47
+ **Particularly**, users don't explicitly mutate data by creating, updating and deleting. Rather, they define something like - for a set of source data, this is the transformation or formula. The framework takes care of the data operations such as when to create, update, or delete.
48
+
49
+ ```python
50
+ # import
51
+ data['content'] = flow_builder.add_source(...)
52
+
53
+ # transform
54
+ data['out'] = data['content']
55
+ .transform(...)
56
+ .transform(...)
57
+
58
+ # collect data
59
+ collector.collect(...)
60
+
61
+ # export to db, vector db, graph db ...
62
+ collector.export(...)
63
+ ```
64
+
65
+ ## Data Freshness
66
+ As a data framework, CocoIndex takes it to the next level on data freshness. **Incremental processing** is one of the core values provided by CocoIndex.
67
+
41
68
  <p align="center">
42
- <img src="https://cocoindex.io/images/venn.svg" alt="CocoIndex">
69
+ <img src="https://github.com/user-attachments/assets/f4eb29b3-84ee-4fa0-a1e2-80eedeeabde6" alt="Incremental Processing" width="700">
43
70
  </p>
44
- With CocoIndex, users declare the transformation, CocoIndex creates & maintains an index, and keeps the derived index up to date based on source update, with minimal computation and changes.
71
+
72
+ The frameworks takes care of
73
+ - Change data capture.
74
+ - Figure out what exactly needs to be updated, and only updating that without having to recompute everything.
75
+
76
+ This makes it fast to reflect any source updates to the target store. If you have concerns with surfacing stale data to AI agents and are spending lots of efforts working on infra piece to optimize the latency, the framework actually handles it for you.
45
77
 
46
78
 
47
79
  ## Quick Start:
48
- If you're new to CocoIndex 🤗, we recommend checking out the 📖 [Documentation](https://cocoindex.io/docs) and ⚡ [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart). We also have a ▶️ [quick start video tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT) for you to jump start.
80
+ If you're new to CocoIndex, we recommend checking out
81
+ - 📖 [Documentation](https://cocoindex.io/docs)
82
+ - ⚡ [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart)
83
+ - 🎬 [Quick Start Video Tutorial](https://youtu.be/gv5R8nOXsWU?si=9ioeKYkMEnYevTXT)
49
84
 
50
85
  ### Setup
86
+
51
87
  1. Install CocoIndex Python library
52
88
 
53
89
  ```bash
54
90
  pip install -U cocoindex
55
91
  ```
56
92
 
57
- 2. Setup Postgres with pgvector extension; or bring up a Postgres database using docker compose:
93
+ 2. [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one. CocoIndex uses it for incremental processing.
58
94
 
59
- - Make sure Docker Compose is installed: [docs](https://docs.docker.com/compose/install/)
60
- - Start a Postgres SQL database for cocoindex using our docker compose config:
61
95
 
62
- ```bash
63
- docker compose -f <(curl -L https://raw.githubusercontent.com/cocoindex-io/cocoindex/refs/heads/main/dev/postgres.yaml) up -d
64
- ```
96
+ ### Define data flow
65
97
 
66
- ### Start your first indexing flow!
67
- Follow [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart) to define your first indexing flow.
68
- A common indexing flow looks like:
98
+ Follow [Quick Start Guide](https://cocoindex.io/docs/getting_started/quickstart) to define your first indexing flow. An example flow looks like:
69
99
 
70
100
  ```python
71
101
  @cocoindex.flow_def(name="TextEmbedding")
@@ -106,10 +136,11 @@ def text_embedding_flow(flow_builder: cocoindex.FlowBuilder, data_scope: cocoind
106
136
  ```
107
137
 
108
138
  It defines an index flow like this:
109
- ![Flow diagram](docs/docs/core/flow_example.svg)
110
139
 
111
- ### Play with existing example and demo
112
- Go to the [examples directory](examples) to try out with any of the examples, following instructions under specific example directory.
140
+ <img width="363" alt="Data Flow" src="https://github.com/user-attachments/assets/2ea7be6d-3d94-42b1-b2bd-22515577e463" />
141
+
142
+
143
+ ## 🚀 Examples and demo
113
144
 
114
145
  | Example | Description |
115
146
  |---------|-------------|
@@ -121,8 +152,10 @@ Go to the [examples directory](examples) to try out with any of the examples, fo
121
152
  | [Docs to Knowledge Graph](examples/docs_to_knowledge_graph) | Extract relationships from Markdown documents and build a knowledge graph |
122
153
  | [Embeddings to Qdrant](examples/text_embedding_qdrant) | Index documents in a Qdrant collection for semantic search |
123
154
  | [FastAPI Server with Docker](examples/fastapi_server_docker) | Run the semantic search server in a Dockerized FastAPI setup |
155
+ | [Product_Taxonomy_Knowledge_Graph](examples/product_taxonomy_knowledge_graph) | Build knowledge graph for product recommendations |
156
+ | [Image Search with Vision API](examples/image_search_example) | Generates detailed captions for images using a vision model, embeds them, enables semantic search via FastAPI and served on a React frontend.|
124
157
 
125
- More coming and stay tuned! If there's any specific examples you would like to see, please let us know in our [Discord community](https://discord.com/invite/zpA9S2DR7s) 🌱.
158
+ More coming and stay tuned 👀!
126
159
 
127
160
  ## 📖 Documentation
128
161
  For detailed documentation, visit [CocoIndex Documentation](https://cocoindex.io/docs), including a [Quickstart guide](https://cocoindex.io/docs/getting_started/quickstart).
@@ -136,13 +169,13 @@ Welcome with a huge coconut hug 🥥⋆。˚🤗. We are super excited for commu
136
169
  Join our community here:
137
170
 
138
171
  - 🌟 [Star us on GitHub](https://github.com/cocoindex-io/cocoindex)
139
- - 💬 [Start a GitHub Discussion](https://github.com/cocoindex-io/cocoindex/discussions)
140
172
  - 👋 [Join our Discord community](https://discord.com/invite/zpA9S2DR7s)
141
- - 𝕏 [Follow us on X](https://x.com/cocoindex_io)
142
- - 🐚 [Follow us on LinkedIn](https://www.linkedin.com/company/cocoindex/about/)
143
173
  - ▶️ [Subscribe to our YouTube channel](https://www.youtube.com/@cocoindex-io)
144
174
  - 📜 [Read our blog posts](https://cocoindex.io/blogs/)
145
175
 
176
+ ## Support us:
177
+ We are constantly improving, and more features and examples are coming soon. If you love this project, please drop us a star ⭐ at GitHub repo [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex) to stay tuned and help us grow.
178
+
146
179
  ## License
147
180
  CocoIndex is Apache 2.0 licensed.
148
181
 
@@ -1,25 +1,25 @@
1
- cocoindex-0.1.33.dist-info/METADATA,sha256=SNak6BWyWii_I2_xCTt5KPvqobfaGtBJuywUfhBFMuo,8236
2
- cocoindex-0.1.33.dist-info/WHEEL,sha256=wsVBlw9xyAuHecZeOYqJ_tA7emUKfXYOn-_180uZRi4,104
3
- cocoindex-0.1.33.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
1
+ cocoindex-0.1.35.dist-info/METADATA,sha256=pcN86SngXFfIOvvQtGQREkVyTBemfX5JXDNc2PQf0fg,9686
2
+ cocoindex-0.1.35.dist-info/WHEEL,sha256=M0oGXcMDUVEBxvyDRZ1SJRlU2WxAfG7DBwXO4GUZt1Q,104
3
+ cocoindex-0.1.35.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
4
+ cocoindex/__init__.py,sha256=LpB0VjGvkD1beio8R9RCT6PI3eU0keV-3sBL45fHTQE,690
5
+ cocoindex/_engine.cpython-311-darwin.so,sha256=jkZjB2b_IwXug3tGtCJnCErcVoz44RCB41PFztm8sDc,49711104
6
+ cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
7
+ cocoindex/cli.py,sha256=QdZjgnABuDQfy6JiAxeAJiQMI5FNT9FQGLiYAUtLMw8,8923
8
+ cocoindex/convert.py,sha256=mBUTa_Ag39_ut-yE_jc1wqS3zLjtOm6QKet-bqJ-RWc,5947
9
+ cocoindex/flow.py,sha256=MZZ0Uf0ObAzR1yIjUecRgA-U0t__95eoLBK_DxwwLnk,23375
4
10
  cocoindex/functions.py,sha256=F79dNmGE127LaU67kF5Oqtf_tIzebFQH7MkyceMX4-s,1830
5
- cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
6
11
  cocoindex/index.py,sha256=LssEOuZi6AqhwKtZM3QFeQpa9T-0ELi8G5DsrYKECvc,534
7
12
  cocoindex/lib.py,sha256=812GB8Z-2PyjG73Odvw5jtNBLnoeU9aOh9s2ZnETKa8,2329
8
- cocoindex/auth_registry.py,sha256=NsALZ3SKsDG9cPdrlTlalIqUvgbgFOaFGAbWJNedtJE,692
9
- cocoindex/convert.py,sha256=mBUTa_Ag39_ut-yE_jc1wqS3zLjtOm6QKet-bqJ-RWc,5947
10
- cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
- cocoindex/tests/test_convert.py,sha256=WPRKp0jv_uSEM81RGWEAmsax-J-FtXt90mZ0yEnvGLs,11236
12
- cocoindex/__init__.py,sha256=CMfiZ-CROvrcE6jjkmzEZBk4HjuN6s6nfRXtSd0c_z8,684
13
- cocoindex/flow.py,sha256=KVbB_Ebm0IpJgZxV4BLg30fjIPmsGFhrtmQOBqCZaIk,23037
14
13
  cocoindex/llm.py,sha256=_3rtahuKcqcEHPkFSwhXOSrekZyGxVApPoYtlU_chcA,348
15
- cocoindex/setting.py,sha256=pms1blwlXIOqZIpye-rfiwzqYUCAC8oEL7mQM5A160g,2356
16
- cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
17
14
  cocoindex/op.py,sha256=OGYRYl7gPa7X7iSU30iTrCzvqRBu7jQqfvN4vjG__dA,10730
18
- cocoindex/sources.py,sha256=wZFU8lwSXjyofJR-syySH9fTyPnBlAPJ6-1hQNX8fGA,936
19
- cocoindex/setup.py,sha256=W1HshwYk_K2aeLOVn_e62ZOXBO9yWsoUboRiH4SjF48,496
20
- cocoindex/cli.py,sha256=Vh8bNZ41yLr1l_jJR1Z_b7mY-dOvN-EbiCRxDvtIsRk,8885
21
15
  cocoindex/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- cocoindex/typing.py,sha256=BI2vPw4Iu4S3aznNJQrfM2LZU_weGYASTXF1W3ZWh_Y,8568
16
+ cocoindex/query.py,sha256=8_3Lb_EVjZtl2ZyJNZGX16LoKXEd-PL8OjY-zs9GQeA,3205
17
+ cocoindex/runtime.py,sha256=jqRnWkkIlAhE04gi4y0Y5bzuq9FX4j0aVNU-nengLJk,980
18
+ cocoindex/setting.py,sha256=pms1blwlXIOqZIpye-rfiwzqYUCAC8oEL7mQM5A160g,2356
19
+ cocoindex/setup.py,sha256=AQLbtBLuJX066IANS7BGp20246mAGQ_4Z0W6MVJcQzY,481
20
+ cocoindex/sources.py,sha256=wZFU8lwSXjyofJR-syySH9fTyPnBlAPJ6-1hQNX8fGA,936
23
21
  cocoindex/storages.py,sha256=MFMsfyOCYMggTWeWrOi82miqOXQmiUuqq828x5htBr0,2207
24
- cocoindex/_engine.cpython-311-darwin.so,sha256=cIk37QA-gNIYrOYdwqQ9HLnLhsIdbcAoNP1ZxZujn6A,49645984
25
- cocoindex-0.1.33.dist-info/RECORD,,
22
+ cocoindex/tests/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
23
+ cocoindex/tests/test_convert.py,sha256=WPRKp0jv_uSEM81RGWEAmsax-J-FtXt90mZ0yEnvGLs,11236
24
+ cocoindex/typing.py,sha256=BI2vPw4Iu4S3aznNJQrfM2LZU_weGYASTXF1W3ZWh_Y,8568
25
+ cocoindex-0.1.35.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: maturin (1.8.3)
2
+ Generator: maturin (1.8.4)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp311-cp311-macosx_11_0_arm64