cocoindex 0.1.44__cp312-cp312-macosx_11_0_arm64.whl → 0.1.46__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cocoindex/__init__.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Cocoindex is a framework for building and running indexing pipelines.
3
3
  """
4
+
4
5
  from . import functions, query, sources, storages, cli, utils
5
6
 
6
7
  from .auth_registry import AuthEntryReference, add_auth_entry, ref_auth_entry
@@ -13,4 +14,4 @@ from .llm import LlmSpec, LlmApiType
13
14
  from .index import VectorSimilarityMetric, VectorIndexDef, IndexOptions
14
15
  from .setting import DatabaseConnectionSpec, Settings, ServerSettings
15
16
  from .setting import get_app_namespace
16
- from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
17
+ from .typing import Float32, Float64, LocalDateTime, OffsetDateTime, Range, Vector, Json
Binary file
@@ -5,21 +5,25 @@ Auth registry is used to register and reference auth entries.
5
5
  from dataclasses import dataclass
6
6
  from typing import Generic, TypeVar
7
7
 
8
- from . import _engine
8
+ from . import _engine # type: ignore
9
9
  from .convert import dump_engine_object
10
10
 
11
11
  T = TypeVar("T")
12
12
 
13
+
13
14
  @dataclass
14
15
  class AuthEntryReference(Generic[T]):
15
16
  """Reference an auth entry by its key."""
17
+
16
18
  key: str
17
19
 
20
+
18
21
  def add_auth_entry(key: str, value: T) -> AuthEntryReference[T]:
19
22
  """Add an auth entry to the registry. Returns its reference."""
20
23
  _engine.add_auth_entry(key, dump_engine_object(value))
21
24
  return AuthEntryReference(key)
22
25
 
23
- def ref_auth_entry(key: str) -> AuthEntryReference:
26
+
27
+ def ref_auth_entry(key: str) -> AuthEntryReference[T]:
24
28
  """Reference an auth entry by its key."""
25
- return AuthEntryReference(key)
29
+ return AuthEntryReference(key)
cocoindex/cli.py CHANGED
@@ -8,13 +8,16 @@ import types
8
8
 
9
9
  from dotenv import load_dotenv, find_dotenv
10
10
  from rich.console import Console
11
+ from rich.panel import Panel
11
12
  from rich.table import Table
13
+ from typing import Any
12
14
 
13
- from . import flow, lib, setting, query
15
+ from . import flow, lib, setting
14
16
  from .setup import sync_setup, drop_setup, flow_names_with_setup, apply_setup_changes
15
17
 
16
18
  # Create ServerSettings lazily upon first call, as environment variables may be loaded from files, etc.
17
- COCOINDEX_HOST = 'https://cocoindex.io'
19
+ COCOINDEX_HOST = "https://cocoindex.io"
20
+
18
21
 
19
22
  def _parse_app_flow_specifier(specifier: str) -> tuple[str, str | None]:
20
23
  """Parses 'module_or_path[:flow_name]' into (module_or_path, flow_name | None)."""
@@ -25,7 +28,7 @@ def _parse_app_flow_specifier(specifier: str) -> tuple[str, str | None]:
25
28
  raise click.BadParameter(
26
29
  f"Application module/path part is missing or invalid in specifier: '{specifier}'. "
27
30
  "Expected format like 'myapp.py' or 'myapp:MyFlow'.",
28
- param_hint="APP_SPECIFIER"
31
+ param_hint="APP_SPECIFIER",
29
32
  )
30
33
 
31
34
  if len(parts) == 1:
@@ -33,7 +36,7 @@ def _parse_app_flow_specifier(specifier: str) -> tuple[str, str | None]:
33
36
 
34
37
  flow_ref_part = parts[1]
35
38
 
36
- if not flow_ref_part: # Handles empty string after colon
39
+ if not flow_ref_part: # Handles empty string after colon
37
40
  return app_ref, None
38
41
 
39
42
  if not flow_ref_part.isidentifier():
@@ -41,10 +44,11 @@ def _parse_app_flow_specifier(specifier: str) -> tuple[str, str | None]:
41
44
  f"Invalid format for flow name part ('{flow_ref_part}') in specifier '{specifier}'. "
42
45
  "If a colon separates the application from the flow name, the flow name should typically be "
43
46
  "a valid identifier (e.g., alphanumeric with underscores, not starting with a number).",
44
- param_hint="APP_SPECIFIER"
47
+ param_hint="APP_SPECIFIER",
45
48
  )
46
49
  return app_ref, flow_ref_part
47
50
 
51
+
48
52
  def _get_app_ref_from_specifier(
49
53
  specifier: str,
50
54
  ) -> str:
@@ -59,12 +63,13 @@ def _get_app_ref_from_specifier(
59
63
  click.style(
60
64
  f"Ignoring flow name '{flow_ref}' in '{specifier}': "
61
65
  f"this command operates on the entire app/module '{app_ref}'.",
62
- fg='yellow'
66
+ fg="yellow",
63
67
  ),
64
- err=True
68
+ err=True,
65
69
  )
66
70
  return app_ref
67
71
 
72
+
68
73
  def _load_user_app(app_target: str) -> types.ModuleType:
69
74
  """
70
75
  Loads the user's application, which can be a file path or an installed module name.
@@ -81,7 +86,7 @@ def _load_user_app(app_target: str) -> types.ModuleType:
81
86
  app_path = os.path.abspath(app_target)
82
87
  app_dir = os.path.dirname(app_path)
83
88
  module_name = os.path.splitext(os.path.basename(app_path))[0]
84
-
89
+
85
90
  if app_dir not in sys.path:
86
91
  sys.path.insert(0, app_dir)
87
92
  try:
@@ -89,7 +94,9 @@ def _load_user_app(app_target: str) -> types.ModuleType:
89
94
  if spec is None:
90
95
  raise ImportError(f"Could not create spec for file: {app_path}")
91
96
  module = importlib.util.module_from_spec(spec)
92
- sys.modules[spec.name] = module
97
+ sys.modules[spec.name] = module
98
+ if spec.loader is None:
99
+ raise ImportError(f"Could not create loader for file: {app_path}")
93
100
  spec.loader.exec_module(module)
94
101
  return module
95
102
  except (ImportError, FileNotFoundError, PermissionError) as e:
@@ -104,19 +111,24 @@ def _load_user_app(app_target: str) -> types.ModuleType:
104
111
  except ImportError as e:
105
112
  raise click.ClickException(f"Failed to load module '{app_target}': {e}")
106
113
  except Exception as e:
107
- raise click.ClickException(f"Unexpected error importing module '{app_target}': {e}")
114
+ raise click.ClickException(
115
+ f"Unexpected error importing module '{app_target}': {e}"
116
+ )
117
+
108
118
 
109
119
  @click.group()
110
120
  @click.version_option(package_name="cocoindex", message="%(prog)s version %(version)s")
111
121
  @click.option(
112
122
  "--env-file",
113
- type=click.Path(exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True),
123
+ type=click.Path(
124
+ exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True
125
+ ),
114
126
  help="Path to a .env file to load environment variables from. "
115
- "If not provided, attempts to load '.env' from the current directory.",
127
+ "If not provided, attempts to load '.env' from the current directory.",
116
128
  default=None,
117
- show_default=False
129
+ show_default=False,
118
130
  )
119
- def cli(env_file: str | None):
131
+ def cli(env_file: str | None) -> None:
120
132
  """
121
133
  CLI for Cocoindex.
122
134
  """
@@ -133,9 +145,10 @@ def cli(env_file: str | None):
133
145
  except Exception as e:
134
146
  raise click.ClickException(f"Failed to initialize CocoIndex library: {e}")
135
147
 
148
+
136
149
  @cli.command()
137
150
  @click.argument("app_target", type=str, required=False)
138
- def ls(app_target: str | None):
151
+ def ls(app_target: str | None) -> None:
139
152
  """
140
153
  List all flows.
141
154
 
@@ -145,45 +158,49 @@ def ls(app_target: str | None):
145
158
  If APP_TARGET is omitted, lists all flows that have a persisted
146
159
  setup in the backend.
147
160
  """
161
+ persisted_flow_names = flow_names_with_setup()
148
162
  if app_target:
149
163
  app_ref = _get_app_ref_from_specifier(app_target)
150
164
  _load_user_app(app_ref)
151
165
 
152
166
  current_flow_names = set(flow.flow_names())
153
- persisted_flow_names = set(flow_names_with_setup())
154
167
 
155
168
  if not current_flow_names:
156
169
  click.echo(f"No flows are defined in '{app_ref}'.")
157
170
  return
158
171
 
159
172
  has_missing = False
173
+ persisted_flow_names_set = set(persisted_flow_names)
160
174
  for name in sorted(current_flow_names):
161
- if name in persisted_flow_names:
175
+ if name in persisted_flow_names_set:
162
176
  click.echo(name)
163
177
  else:
164
178
  click.echo(f"{name} [+]")
165
179
  has_missing = True
166
180
 
167
181
  if has_missing:
168
- click.echo('')
169
- click.echo('Notes:')
170
- click.echo(' [+]: Flows present in the current process, but missing setup.')
182
+ click.echo("")
183
+ click.echo("Notes:")
184
+ click.echo(
185
+ " [+]: Flows present in the current process, but missing setup."
186
+ )
171
187
 
172
188
  else:
173
- persisted_flow_names = sorted(flow_names_with_setup())
174
-
175
189
  if not persisted_flow_names:
176
190
  click.echo("No persisted flow setups found in the backend.")
177
191
  return
178
192
 
179
- for name in persisted_flow_names:
193
+ for name in sorted(persisted_flow_names):
180
194
  click.echo(name)
181
195
 
196
+
182
197
  @cli.command()
183
198
  @click.argument("app_flow_specifier", type=str)
184
- @click.option("--color/--no-color", default=True, help="Enable or disable colored output.")
199
+ @click.option(
200
+ "--color/--no-color", default=True, help="Enable or disable colored output."
201
+ )
185
202
  @click.option("--verbose", is_flag=True, help="Show verbose output with full details.")
186
- def show(app_flow_specifier: str, color: bool, verbose: bool):
203
+ def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
187
204
  """
188
205
  Show the flow spec and schema.
189
206
 
@@ -208,7 +225,7 @@ def show(app_flow_specifier: str, color: bool, verbose: bool):
208
225
  table = Table(
209
226
  title=f"Schema for Flow: {fl.name}",
210
227
  title_style="cyan",
211
- header_style="bold magenta"
228
+ header_style="bold magenta",
212
229
  )
213
230
  table.add_column("Field", style="cyan")
214
231
  table.add_column("Type", style="green")
@@ -217,9 +234,10 @@ def show(app_flow_specifier: str, color: bool, verbose: bool):
217
234
  table.add_row(field_name, field_type, attr_str)
218
235
  console.print(table)
219
236
 
237
+
220
238
  @cli.command()
221
239
  @click.argument("app_target", type=str)
222
- def setup(app_target: str):
240
+ def setup(app_target: str) -> None:
223
241
  """
224
242
  Check and apply backend setup changes for flows, including the internal and target storage
225
243
  (to export).
@@ -235,19 +253,29 @@ def setup(app_target: str):
235
253
  click.echo("No changes need to be pushed.")
236
254
  return
237
255
  if not click.confirm(
238
- "Changes need to be pushed. Continue? [yes/N]", default=False, show_default=False):
256
+ "Changes need to be pushed. Continue? [yes/N]",
257
+ default=False,
258
+ show_default=False,
259
+ ):
239
260
  return
240
261
  apply_setup_changes(setup_status)
241
262
 
263
+
242
264
  @cli.command("drop")
243
265
  @click.argument("app_target", type=str, required=False)
244
266
  @click.argument("flow_name", type=str, nargs=-1)
245
267
  @click.option(
246
- "-a", "--all", "drop_all", is_flag=True, show_default=True, default=False,
268
+ "-a",
269
+ "--all",
270
+ "drop_all",
271
+ is_flag=True,
272
+ show_default=True,
273
+ default=False,
247
274
  help="Drop the backend setup for all flows with persisted setup, "
248
- "even if not defined in the current process."
249
- "If used, APP_TARGET and any listed flow names are ignored.")
250
- def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool):
275
+ "even if not defined in the current process."
276
+ "If used, APP_TARGET and any listed flow names are ignored.",
277
+ )
278
+ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool) -> None:
251
279
  """
252
280
  Drop the backend setup for flows.
253
281
 
@@ -262,20 +290,29 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool):
262
290
 
263
291
  if drop_all:
264
292
  if app_target or flow_name:
265
- click.echo("Warning: When --all is used, APP_TARGET and any individual flow names are ignored.", err=True)
293
+ click.echo(
294
+ "Warning: When --all is used, APP_TARGET and any individual flow names are ignored.",
295
+ err=True,
296
+ )
266
297
  flow_names = flow_names_with_setup()
267
298
  elif app_target:
268
299
  app_ref = _get_app_ref_from_specifier(app_target)
269
300
  _load_user_app(app_ref)
270
301
  if flow_name:
271
302
  flow_names = list(flow_name)
272
- click.echo(f"Preparing to drop specified flows: {', '.join(flow_names)} (in '{app_ref}').", err=True)
303
+ click.echo(
304
+ f"Preparing to drop specified flows: {', '.join(flow_names)} (in '{app_ref}').",
305
+ err=True,
306
+ )
273
307
  else:
274
308
  flow_names = flow.flow_names()
275
309
  if not flow_names:
276
310
  click.echo(f"No flows found defined in '{app_ref}' to drop.")
277
311
  return
278
- click.echo(f"Preparing to drop all flows defined in '{app_ref}': {', '.join(flow_names)}.", err=True)
312
+ click.echo(
313
+ f"Preparing to drop all flows defined in '{app_ref}': {', '.join(flow_names)}.",
314
+ err=True,
315
+ )
279
316
  else:
280
317
  raise click.UsageError(
281
318
  "Missing arguments. You must either provide an APP_TARGET (to target app-specific flows) "
@@ -293,20 +330,33 @@ def drop(app_target: str | None, flow_name: tuple[str, ...], drop_all: bool):
293
330
  return
294
331
  if not click.confirm(
295
332
  f"\nThis will apply changes to drop setup for: {', '.join(flow_names)}. Continue? [yes/N]",
296
- default=False, show_default=False):
333
+ default=False,
334
+ show_default=False,
335
+ ):
297
336
  click.echo("Drop operation aborted by user.")
298
337
  return
299
338
  apply_setup_changes(setup_status)
300
339
 
340
+
301
341
  @cli.command()
302
342
  @click.argument("app_flow_specifier", type=str)
303
343
  @click.option(
304
- "-L", "--live", is_flag=True, show_default=True, default=False,
305
- help="Continuously watch changes from data sources and apply to the target index.")
344
+ "-L",
345
+ "--live",
346
+ is_flag=True,
347
+ show_default=True,
348
+ default=False,
349
+ help="Continuously watch changes from data sources and apply to the target index.",
350
+ )
306
351
  @click.option(
307
- "-q", "--quiet", is_flag=True, show_default=True, default=False,
308
- help="Avoid printing anything to the standard output, e.g. statistics.")
309
- def update(app_flow_specifier: str, live: bool, quiet: bool):
352
+ "-q",
353
+ "--quiet",
354
+ is_flag=True,
355
+ show_default=True,
356
+ default=False,
357
+ help="Avoid printing anything to the standard output, e.g. statistics.",
358
+ )
359
+ def update(app_flow_specifier: str, live: bool, quiet: bool) -> Any:
310
360
  """
311
361
  Update the index to reflect the latest data from data sources.
312
362
 
@@ -324,15 +374,26 @@ def update(app_flow_specifier: str, live: bool, quiet: bool):
324
374
  updater.wait()
325
375
  return updater.update_stats()
326
376
 
377
+
327
378
  @cli.command()
328
379
  @click.argument("app_flow_specifier", type=str)
329
380
  @click.option(
330
- "-o", "--output-dir", type=str, required=False,
331
- help="The directory to dump the output to.")
381
+ "-o",
382
+ "--output-dir",
383
+ type=str,
384
+ required=False,
385
+ help="The directory to dump the output to.",
386
+ )
332
387
  @click.option(
333
- "--cache/--no-cache", is_flag=True, show_default=True, default=True,
334
- help="Use already-cached intermediate data if available.")
335
- def evaluate(app_flow_specifier: str, output_dir: str | None, cache: bool = True):
388
+ "--cache/--no-cache",
389
+ is_flag=True,
390
+ show_default=True,
391
+ default=True,
392
+ help="Use already-cached intermediate data if available.",
393
+ )
394
+ def evaluate(
395
+ app_flow_specifier: str, output_dir: str | None, cache: bool = True
396
+ ) -> None:
336
397
  """
337
398
  Evaluate the flow and dump flow outputs to files.
338
399
 
@@ -358,32 +419,64 @@ def evaluate(app_flow_specifier: str, output_dir: str | None, cache: bool = True
358
419
  options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
359
420
  fl.evaluate_and_dump(options)
360
421
 
422
+
361
423
  @cli.command()
362
424
  @click.argument("app_target", type=str)
363
425
  @click.option(
364
- "-a", "--address", type=str,
426
+ "-a",
427
+ "--address",
428
+ type=str,
365
429
  help="The address to bind the server to, in the format of IP:PORT. "
366
- "If unspecified, the address specified in COCOINDEX_SERVER_ADDRESS will be used.")
430
+ "If unspecified, the address specified in COCOINDEX_SERVER_ADDRESS will be used.",
431
+ )
367
432
  @click.option(
368
- "-c", "--cors-origin", type=str,
433
+ "-c",
434
+ "--cors-origin",
435
+ type=str,
369
436
  help="The origins of the clients (e.g. CocoInsight UI) to allow CORS from. "
370
- "Multiple origins can be specified as a comma-separated list. "
371
- "e.g. `https://cocoindex.io,http://localhost:3000`. "
372
- "Origins specified in COCOINDEX_SERVER_CORS_ORIGINS will also be included.")
437
+ "Multiple origins can be specified as a comma-separated list. "
438
+ "e.g. `https://cocoindex.io,http://localhost:3000`. "
439
+ "Origins specified in COCOINDEX_SERVER_CORS_ORIGINS will also be included.",
440
+ )
373
441
  @click.option(
374
- "-ci", "--cors-cocoindex", is_flag=True, show_default=True, default=False,
375
- help=f"Allow {COCOINDEX_HOST} to access the server.")
442
+ "-ci",
443
+ "--cors-cocoindex",
444
+ is_flag=True,
445
+ show_default=True,
446
+ default=False,
447
+ help=f"Allow {COCOINDEX_HOST} to access the server.",
448
+ )
376
449
  @click.option(
377
- "-cl", "--cors-local", type=int,
378
- help="Allow http://localhost:<port> to access the server.")
450
+ "-cl",
451
+ "--cors-local",
452
+ type=int,
453
+ help="Allow http://localhost:<port> to access the server.",
454
+ )
379
455
  @click.option(
380
- "-L", "--live-update", is_flag=True, show_default=True, default=False,
381
- help="Continuously watch changes from data sources and apply to the target index.")
456
+ "-L",
457
+ "--live-update",
458
+ is_flag=True,
459
+ show_default=True,
460
+ default=False,
461
+ help="Continuously watch changes from data sources and apply to the target index.",
462
+ )
382
463
  @click.option(
383
- "-q", "--quiet", is_flag=True, show_default=True, default=False,
384
- help="Avoid printing anything to the standard output, e.g. statistics.")
385
- def server(app_target: str, address: str | None, live_update: bool, quiet: bool,
386
- cors_origin: str | None, cors_cocoindex: bool, cors_local: int | None):
464
+ "-q",
465
+ "--quiet",
466
+ is_flag=True,
467
+ show_default=True,
468
+ default=False,
469
+ help="Avoid printing anything to the standard output, e.g. statistics.",
470
+ )
471
+ def server(
472
+ app_target: str,
473
+ address: str | None,
474
+ live_update: bool,
475
+ quiet: bool,
476
+ cors_origin: str | None,
477
+ cors_cocoindex: bool,
478
+ cors_local: int | None,
479
+ ) -> None:
387
480
  """
388
481
  Start a HTTP server providing REST APIs.
389
482
 
@@ -420,20 +513,46 @@ def server(app_target: str, address: str | None, live_update: bool, quiet: bool,
420
513
 
421
514
  def _flow_name(name: str | None) -> str:
422
515
  names = flow.flow_names()
423
- available = ', '.join(sorted(names))
516
+ available = ", ".join(sorted(names))
424
517
  if name is not None:
425
518
  if name not in names:
426
- raise click.BadParameter(f"Flow '{name}' not found.\nAvailable: {available if names else 'None'}")
519
+ raise click.BadParameter(
520
+ f"Flow '{name}' not found.\nAvailable: {available if names else 'None'}"
521
+ )
427
522
  return name
428
523
  if len(names) == 0:
429
524
  raise click.UsageError("No flows available in the loaded application.")
430
525
  elif len(names) == 1:
431
526
  return names[0]
432
527
  else:
433
- raise click.UsageError(f"Multiple flows available, please specify which flow to target by appending :FlowName to the APP_TARGET.\nAvailable: {available}")
528
+ console = Console()
529
+ index = 0
530
+
531
+ while True:
532
+ console.clear()
533
+ console.print(
534
+ Panel.fit("Select a Flow", title_align="left", border_style="cyan")
535
+ )
536
+ for i, fname in enumerate(names):
537
+ console.print(
538
+ f"> [bold green]{fname}[/bold green]"
539
+ if i == index
540
+ else f" {fname}"
541
+ )
542
+
543
+ key = click.getchar()
544
+ if key == "\x1b[A": # Up arrow
545
+ index = (index - 1) % len(names)
546
+ elif key == "\x1b[B": # Down arrow
547
+ index = (index + 1) % len(names)
548
+ elif key in ("\r", "\n"): # Enter
549
+ console.clear()
550
+ return names[index]
551
+
434
552
 
435
553
  def _flow_by_name(name: str | None) -> flow.Flow:
436
554
  return flow.flow_by_name(_flow_name(name))
437
555
 
556
+
438
557
  if __name__ == "__main__":
439
- cli()
558
+ cli()