cocoindex 0.3.4__cp311-abi3-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cocoindex/__init__.py +114 -0
- cocoindex/_engine.abi3.so +0 -0
- cocoindex/auth_registry.py +44 -0
- cocoindex/cli.py +830 -0
- cocoindex/engine_object.py +214 -0
- cocoindex/engine_value.py +550 -0
- cocoindex/flow.py +1281 -0
- cocoindex/functions/__init__.py +40 -0
- cocoindex/functions/_engine_builtin_specs.py +66 -0
- cocoindex/functions/colpali.py +247 -0
- cocoindex/functions/sbert.py +77 -0
- cocoindex/index.py +50 -0
- cocoindex/lib.py +75 -0
- cocoindex/llm.py +47 -0
- cocoindex/op.py +1047 -0
- cocoindex/py.typed +0 -0
- cocoindex/query_handler.py +57 -0
- cocoindex/runtime.py +78 -0
- cocoindex/setting.py +171 -0
- cocoindex/setup.py +92 -0
- cocoindex/sources/__init__.py +5 -0
- cocoindex/sources/_engine_builtin_specs.py +120 -0
- cocoindex/subprocess_exec.py +277 -0
- cocoindex/targets/__init__.py +5 -0
- cocoindex/targets/_engine_builtin_specs.py +153 -0
- cocoindex/targets/lancedb.py +466 -0
- cocoindex/tests/__init__.py +0 -0
- cocoindex/tests/test_engine_object.py +331 -0
- cocoindex/tests/test_engine_value.py +1724 -0
- cocoindex/tests/test_optional_database.py +249 -0
- cocoindex/tests/test_transform_flow.py +300 -0
- cocoindex/tests/test_typing.py +553 -0
- cocoindex/tests/test_validation.py +134 -0
- cocoindex/typing.py +834 -0
- cocoindex/user_app_loader.py +53 -0
- cocoindex/utils.py +20 -0
- cocoindex/validation.py +104 -0
- cocoindex-0.3.4.dist-info/METADATA +288 -0
- cocoindex-0.3.4.dist-info/RECORD +42 -0
- cocoindex-0.3.4.dist-info/WHEEL +4 -0
- cocoindex-0.3.4.dist-info/entry_points.txt +2 -0
- cocoindex-0.3.4.dist-info/licenses/THIRD_PARTY_NOTICES.html +13249 -0
cocoindex/cli.py
ADDED
|
@@ -0,0 +1,830 @@
|
|
|
1
|
+
import atexit
|
|
2
|
+
import asyncio
|
|
3
|
+
import datetime
|
|
4
|
+
import importlib.util
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import signal
|
|
8
|
+
import threading
|
|
9
|
+
import sys
|
|
10
|
+
from types import FrameType
|
|
11
|
+
from typing import Any, Iterable
|
|
12
|
+
|
|
13
|
+
import click
|
|
14
|
+
import watchfiles
|
|
15
|
+
from dotenv import find_dotenv, load_dotenv
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.panel import Panel
|
|
18
|
+
from rich.table import Table
|
|
19
|
+
|
|
20
|
+
from . import flow, lib, setting
|
|
21
|
+
from .setup import flow_names_with_setup
|
|
22
|
+
from .runtime import execution_context
|
|
23
|
+
from .subprocess_exec import add_user_app
|
|
24
|
+
from .user_app_loader import load_user_app, Error as UserAppLoaderError
|
|
25
|
+
|
|
26
|
+
COCOINDEX_HOST = "https://cocoindex.io"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _parse_app_flow_specifier(specifier: str) -> tuple[str, str | None]:
|
|
30
|
+
"""Parses 'module_or_path[:flow_name]' into (module_or_path, flow_name | None)."""
|
|
31
|
+
parts = specifier.split(":", 1) # Split only on the first colon
|
|
32
|
+
app_ref = parts[0]
|
|
33
|
+
|
|
34
|
+
if not app_ref:
|
|
35
|
+
raise click.BadParameter(
|
|
36
|
+
f"Application module/path part is missing or invalid in specifier: '{specifier}'. "
|
|
37
|
+
"Expected format like 'myapp.py' or 'myapp:MyFlow'.",
|
|
38
|
+
param_hint="APP_SPECIFIER",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
if len(parts) == 1:
|
|
42
|
+
return app_ref, None
|
|
43
|
+
|
|
44
|
+
flow_ref_part = parts[1]
|
|
45
|
+
|
|
46
|
+
if not flow_ref_part: # Handles empty string after colon
|
|
47
|
+
return app_ref, None
|
|
48
|
+
|
|
49
|
+
if not flow_ref_part.isidentifier():
|
|
50
|
+
raise click.BadParameter(
|
|
51
|
+
f"Invalid format for flow name part ('{flow_ref_part}') in specifier '{specifier}'. "
|
|
52
|
+
"If a colon separates the application from the flow name, the flow name should typically be "
|
|
53
|
+
"a valid identifier (e.g., alphanumeric with underscores, not starting with a number).",
|
|
54
|
+
param_hint="APP_SPECIFIER",
|
|
55
|
+
)
|
|
56
|
+
return app_ref, flow_ref_part
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _get_app_ref_from_specifier(
|
|
60
|
+
specifier: str,
|
|
61
|
+
) -> str:
|
|
62
|
+
"""
|
|
63
|
+
Parses the APP_TARGET to get the application reference (path or module).
|
|
64
|
+
Issues a warning if a flow name component is also provided in it.
|
|
65
|
+
"""
|
|
66
|
+
app_ref, flow_ref = _parse_app_flow_specifier(specifier)
|
|
67
|
+
|
|
68
|
+
if flow_ref is not None:
|
|
69
|
+
click.echo(
|
|
70
|
+
click.style(
|
|
71
|
+
f"Ignoring flow name '{flow_ref}' in '{specifier}': "
|
|
72
|
+
f"this command operates on the entire app/module '{app_ref}'.",
|
|
73
|
+
fg="yellow",
|
|
74
|
+
),
|
|
75
|
+
err=True,
|
|
76
|
+
)
|
|
77
|
+
return app_ref
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _load_user_app(app_target: str) -> None:
|
|
81
|
+
if not app_target:
|
|
82
|
+
raise click.ClickException("Application target not provided.")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
load_user_app(app_target)
|
|
86
|
+
except UserAppLoaderError as e:
|
|
87
|
+
raise ValueError(f"Failed to load APP_TARGET '{app_target}'") from e
|
|
88
|
+
|
|
89
|
+
add_user_app(app_target)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _initialize_cocoindex_in_process() -> None:
|
|
93
|
+
atexit.register(lib.stop)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@click.group()
|
|
97
|
+
@click.version_option(package_name="cocoindex", message="%(prog)s version %(version)s")
|
|
98
|
+
@click.option(
|
|
99
|
+
"--env-file",
|
|
100
|
+
type=click.Path(
|
|
101
|
+
exists=True, file_okay=True, dir_okay=False, readable=True, resolve_path=True
|
|
102
|
+
),
|
|
103
|
+
help="Path to a .env file to load environment variables from. "
|
|
104
|
+
"If not provided, attempts to load '.env' from the current directory.",
|
|
105
|
+
default=None,
|
|
106
|
+
show_default=False,
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--app-dir",
|
|
110
|
+
help="Load apps from the specified directory. Default to the current directory.",
|
|
111
|
+
default="",
|
|
112
|
+
show_default=True,
|
|
113
|
+
)
|
|
114
|
+
def cli(env_file: str | None = None, app_dir: str | None = "") -> None:
|
|
115
|
+
"""
|
|
116
|
+
CLI for Cocoindex.
|
|
117
|
+
"""
|
|
118
|
+
dotenv_path = env_file or find_dotenv(usecwd=True)
|
|
119
|
+
|
|
120
|
+
if load_dotenv(dotenv_path=dotenv_path):
|
|
121
|
+
loaded_env_path = os.path.abspath(dotenv_path)
|
|
122
|
+
click.echo(f"Loaded environment variables from: {loaded_env_path}\n", err=True)
|
|
123
|
+
|
|
124
|
+
if app_dir is not None:
|
|
125
|
+
sys.path.insert(0, app_dir)
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
_initialize_cocoindex_in_process()
|
|
129
|
+
except Exception as e:
|
|
130
|
+
raise click.ClickException(f"Failed to initialize CocoIndex library: {e}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@cli.command()
|
|
134
|
+
@click.argument("app_target", type=str, required=False)
|
|
135
|
+
def ls(app_target: str | None) -> None:
|
|
136
|
+
"""
|
|
137
|
+
List all flows.
|
|
138
|
+
|
|
139
|
+
If `APP_TARGET` (`path/to/app.py` or a module) is provided, lists flows defined in the app and their backend setup status.
|
|
140
|
+
|
|
141
|
+
If `APP_TARGET` is omitted, lists all flows that have a persisted setup in the backend.
|
|
142
|
+
"""
|
|
143
|
+
persisted_flow_names = flow_names_with_setup()
|
|
144
|
+
if app_target:
|
|
145
|
+
app_ref = _get_app_ref_from_specifier(app_target)
|
|
146
|
+
_load_user_app(app_ref)
|
|
147
|
+
|
|
148
|
+
current_flow_names = set(flow.flow_names())
|
|
149
|
+
|
|
150
|
+
if not current_flow_names:
|
|
151
|
+
click.echo(f"No flows are defined in '{app_ref}'.")
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
has_missing = False
|
|
155
|
+
persisted_flow_names_set = set(persisted_flow_names)
|
|
156
|
+
for name in sorted(current_flow_names):
|
|
157
|
+
if name in persisted_flow_names_set:
|
|
158
|
+
click.echo(name)
|
|
159
|
+
else:
|
|
160
|
+
click.echo(f"{name} [+]")
|
|
161
|
+
has_missing = True
|
|
162
|
+
|
|
163
|
+
if has_missing:
|
|
164
|
+
click.echo("")
|
|
165
|
+
click.echo("Notes:")
|
|
166
|
+
click.echo(
|
|
167
|
+
" [+]: Flows present in the current process, but missing setup."
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
else:
|
|
171
|
+
if not persisted_flow_names:
|
|
172
|
+
click.echo("No persisted flow setups found in the backend.")
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
for name in sorted(persisted_flow_names):
|
|
176
|
+
click.echo(name)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@cli.command()
|
|
180
|
+
@click.argument("app_flow_specifier", type=str)
|
|
181
|
+
@click.option(
|
|
182
|
+
"--color/--no-color", default=True, help="Enable or disable colored output."
|
|
183
|
+
)
|
|
184
|
+
@click.option("--verbose", is_flag=True, help="Show verbose output with full details.")
|
|
185
|
+
def show(app_flow_specifier: str, color: bool, verbose: bool) -> None:
|
|
186
|
+
"""
|
|
187
|
+
Show the flow spec and schema.
|
|
188
|
+
|
|
189
|
+
`APP_FLOW_SPECIFIER`: Specifies the application and optionally the target flow. Can be one of the following formats:
|
|
190
|
+
|
|
191
|
+
\b
|
|
192
|
+
- `path/to/your_app.py`
|
|
193
|
+
- `an_installed.module_name`
|
|
194
|
+
- `path/to/your_app.py:SpecificFlowName`
|
|
195
|
+
- `an_installed.module_name:SpecificFlowName`
|
|
196
|
+
|
|
197
|
+
`:SpecificFlowName` can be omitted only if the application defines a single flow.
|
|
198
|
+
"""
|
|
199
|
+
app_ref, flow_ref = _parse_app_flow_specifier(app_flow_specifier)
|
|
200
|
+
_load_user_app(app_ref)
|
|
201
|
+
|
|
202
|
+
fl = _flow_by_name(flow_ref)
|
|
203
|
+
console = Console(no_color=not color)
|
|
204
|
+
console.print(fl._render_spec(verbose=verbose))
|
|
205
|
+
console.print()
|
|
206
|
+
table = Table(
|
|
207
|
+
title=f"Schema for Flow: {fl.name}",
|
|
208
|
+
title_style="cyan",
|
|
209
|
+
header_style="bold magenta",
|
|
210
|
+
)
|
|
211
|
+
table.add_column("Field", style="cyan")
|
|
212
|
+
table.add_column("Type", style="green")
|
|
213
|
+
table.add_column("Attributes", style="yellow")
|
|
214
|
+
for field_name, field_type, attr_str in fl._get_schema():
|
|
215
|
+
table.add_row(field_name, field_type, attr_str)
|
|
216
|
+
console.print(table)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _drop_flows(flows: Iterable[flow.Flow], app_ref: str, force: bool = False) -> None:
|
|
220
|
+
"""
|
|
221
|
+
Helper function to drop flows without user interaction.
|
|
222
|
+
Used internally by --reset flag
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
flows: Iterable of Flow objects to drop
|
|
226
|
+
force: If True, skip confirmation prompts
|
|
227
|
+
"""
|
|
228
|
+
flow_full_names = ", ".join(fl.full_name for fl in flows)
|
|
229
|
+
click.echo(
|
|
230
|
+
f"Preparing to drop specified flows: {flow_full_names} (in '{app_ref}').",
|
|
231
|
+
err=True,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if not flows:
|
|
235
|
+
click.echo("No flows identified for the drop operation.")
|
|
236
|
+
return
|
|
237
|
+
|
|
238
|
+
setup_bundle = flow.make_drop_bundle(flows)
|
|
239
|
+
description, is_up_to_date = setup_bundle.describe()
|
|
240
|
+
click.echo(description)
|
|
241
|
+
if is_up_to_date:
|
|
242
|
+
click.echo("No flows need to be dropped.")
|
|
243
|
+
return
|
|
244
|
+
if not force and not click.confirm(
|
|
245
|
+
f"\nThis will apply changes to drop setup for: {flow_full_names}. Continue? [yes/N]",
|
|
246
|
+
default=False,
|
|
247
|
+
show_default=False,
|
|
248
|
+
):
|
|
249
|
+
click.echo("Drop operation aborted by user.")
|
|
250
|
+
return
|
|
251
|
+
setup_bundle.apply(report_to_stdout=True)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _deprecate_setup_flag(
|
|
255
|
+
ctx: click.Context, param: click.Parameter, value: bool
|
|
256
|
+
) -> bool:
|
|
257
|
+
"""Callback to warn users that --setup flag is deprecated."""
|
|
258
|
+
# Check if the parameter was explicitly provided by the user
|
|
259
|
+
if param.name is not None:
|
|
260
|
+
param_source = ctx.get_parameter_source(param.name)
|
|
261
|
+
if param_source == click.core.ParameterSource.COMMANDLINE:
|
|
262
|
+
click.secho(
|
|
263
|
+
"Warning: The --setup flag is deprecated and will be removed in a future version. "
|
|
264
|
+
"Setup is now always enabled by default.",
|
|
265
|
+
fg="yellow",
|
|
266
|
+
err=True,
|
|
267
|
+
)
|
|
268
|
+
return value
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _setup_flows(
|
|
272
|
+
flow_iter: Iterable[flow.Flow],
|
|
273
|
+
*,
|
|
274
|
+
force: bool,
|
|
275
|
+
quiet: bool = False,
|
|
276
|
+
always_show_setup: bool = False,
|
|
277
|
+
) -> None:
|
|
278
|
+
setup_bundle = flow.make_setup_bundle(flow_iter)
|
|
279
|
+
description, is_up_to_date = setup_bundle.describe()
|
|
280
|
+
if always_show_setup or not is_up_to_date:
|
|
281
|
+
click.echo(description)
|
|
282
|
+
if is_up_to_date:
|
|
283
|
+
if not quiet:
|
|
284
|
+
click.echo("Setup is already up to date.")
|
|
285
|
+
return
|
|
286
|
+
if not force and not click.confirm(
|
|
287
|
+
"Changes need to be pushed. Continue? [yes/N]",
|
|
288
|
+
default=False,
|
|
289
|
+
show_default=False,
|
|
290
|
+
):
|
|
291
|
+
return
|
|
292
|
+
setup_bundle.apply(report_to_stdout=not quiet)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _show_no_live_update_hint() -> None:
|
|
296
|
+
click.secho(
|
|
297
|
+
"NOTE: No change capture mechanism exists. See https://cocoindex.io/docs/core/flow_methods#live-update for more details.\n",
|
|
298
|
+
fg="yellow",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def _update_all_flows_with_hint_async(
|
|
303
|
+
options: flow.FlowLiveUpdaterOptions,
|
|
304
|
+
) -> None:
|
|
305
|
+
await flow.update_all_flows_async(options)
|
|
306
|
+
if options.live_mode:
|
|
307
|
+
_show_no_live_update_hint()
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
@cli.command()
|
|
311
|
+
@click.argument("app_target", type=str)
|
|
312
|
+
@click.option(
|
|
313
|
+
"-f",
|
|
314
|
+
"--force",
|
|
315
|
+
is_flag=True,
|
|
316
|
+
show_default=True,
|
|
317
|
+
default=False,
|
|
318
|
+
help="Force setup without confirmation prompts.",
|
|
319
|
+
)
|
|
320
|
+
@click.option(
|
|
321
|
+
"--reset",
|
|
322
|
+
is_flag=True,
|
|
323
|
+
show_default=True,
|
|
324
|
+
default=False,
|
|
325
|
+
help="Drop existing setup before running setup (equivalent to running 'cocoindex drop' first).",
|
|
326
|
+
)
|
|
327
|
+
def setup(app_target: str, force: bool, reset: bool) -> None:
|
|
328
|
+
"""
|
|
329
|
+
Check and apply backend setup changes for flows, including the internal storage and target (to export to).
|
|
330
|
+
|
|
331
|
+
`APP_TARGET`: `path/to/app.py` or `installed_module`.
|
|
332
|
+
"""
|
|
333
|
+
app_ref = _get_app_ref_from_specifier(app_target)
|
|
334
|
+
_load_user_app(app_ref)
|
|
335
|
+
|
|
336
|
+
# If --reset is specified, drop existing setup first
|
|
337
|
+
if reset:
|
|
338
|
+
_drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
|
|
339
|
+
|
|
340
|
+
_setup_flows(flow.flows().values(), force=force, always_show_setup=True)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
@cli.command("drop")
|
|
344
|
+
@click.argument("app_target", type=str, required=False)
|
|
345
|
+
@click.argument("flow_name", type=str, nargs=-1)
|
|
346
|
+
@click.option(
|
|
347
|
+
"-f",
|
|
348
|
+
"--force",
|
|
349
|
+
is_flag=True,
|
|
350
|
+
show_default=True,
|
|
351
|
+
default=False,
|
|
352
|
+
help="Force drop without confirmation prompts.",
|
|
353
|
+
)
|
|
354
|
+
def drop(app_target: str | None, flow_name: tuple[str, ...], force: bool) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Drop the backend setup for flows.
|
|
357
|
+
|
|
358
|
+
\b
|
|
359
|
+
Modes of operation:
|
|
360
|
+
1. Drop all flows defined in an app: `cocoindex drop <APP_TARGET>`
|
|
361
|
+
2. Drop specific named flows: `cocoindex drop <APP_TARGET> [FLOW_NAME...]`
|
|
362
|
+
"""
|
|
363
|
+
app_ref = None
|
|
364
|
+
|
|
365
|
+
if not app_target:
|
|
366
|
+
raise click.UsageError(
|
|
367
|
+
"Missing arguments. You must either provide an APP_TARGET (to target app-specific flows) "
|
|
368
|
+
"or use the --all flag."
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
app_ref = _get_app_ref_from_specifier(app_target)
|
|
372
|
+
_load_user_app(app_ref)
|
|
373
|
+
|
|
374
|
+
flows: Iterable[flow.Flow]
|
|
375
|
+
if flow_name:
|
|
376
|
+
flows = []
|
|
377
|
+
for name in flow_name:
|
|
378
|
+
try:
|
|
379
|
+
flows.append(flow.flow_by_name(name))
|
|
380
|
+
except KeyError:
|
|
381
|
+
click.echo(
|
|
382
|
+
f"Warning: Failed to get flow `{name}`. Ignored.",
|
|
383
|
+
err=True,
|
|
384
|
+
)
|
|
385
|
+
else:
|
|
386
|
+
flows = flow.flows().values()
|
|
387
|
+
|
|
388
|
+
_drop_flows(flows, app_ref=app_ref, force=force)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@cli.command()
|
|
392
|
+
@click.argument("app_flow_specifier", type=str)
|
|
393
|
+
@click.option(
|
|
394
|
+
"-L",
|
|
395
|
+
"--live",
|
|
396
|
+
is_flag=True,
|
|
397
|
+
show_default=True,
|
|
398
|
+
default=False,
|
|
399
|
+
help="Continuously watch changes from data sources and apply to the target index.",
|
|
400
|
+
)
|
|
401
|
+
@click.option(
|
|
402
|
+
"--reexport",
|
|
403
|
+
is_flag=True,
|
|
404
|
+
show_default=True,
|
|
405
|
+
default=False,
|
|
406
|
+
help="Reexport to targets even if there's no change.",
|
|
407
|
+
)
|
|
408
|
+
@click.option(
|
|
409
|
+
"--setup",
|
|
410
|
+
is_flag=True,
|
|
411
|
+
show_default=True,
|
|
412
|
+
default=True,
|
|
413
|
+
callback=_deprecate_setup_flag,
|
|
414
|
+
help="(DEPRECATED) Automatically setup backends for the flow if it's not setup yet. This is now the default behavior.",
|
|
415
|
+
)
|
|
416
|
+
@click.option(
|
|
417
|
+
"--reset",
|
|
418
|
+
is_flag=True,
|
|
419
|
+
show_default=True,
|
|
420
|
+
default=False,
|
|
421
|
+
help="Drop existing setup before updating (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
|
|
422
|
+
)
|
|
423
|
+
@click.option(
|
|
424
|
+
"-f",
|
|
425
|
+
"--force",
|
|
426
|
+
is_flag=True,
|
|
427
|
+
show_default=True,
|
|
428
|
+
default=False,
|
|
429
|
+
help="Force setup without confirmation prompts.",
|
|
430
|
+
)
|
|
431
|
+
@click.option(
|
|
432
|
+
"-q",
|
|
433
|
+
"--quiet",
|
|
434
|
+
is_flag=True,
|
|
435
|
+
show_default=True,
|
|
436
|
+
default=False,
|
|
437
|
+
help="Avoid printing anything to the standard output, e.g. statistics.",
|
|
438
|
+
)
|
|
439
|
+
def update(
|
|
440
|
+
app_flow_specifier: str,
|
|
441
|
+
live: bool,
|
|
442
|
+
reexport: bool,
|
|
443
|
+
setup: bool, # pylint: disable=redefined-outer-name
|
|
444
|
+
reset: bool,
|
|
445
|
+
force: bool,
|
|
446
|
+
quiet: bool,
|
|
447
|
+
) -> None:
|
|
448
|
+
"""
|
|
449
|
+
Update the index to reflect the latest data from data sources.
|
|
450
|
+
|
|
451
|
+
`APP_FLOW_SPECIFIER`: `path/to/app.py`, module, `path/to/app.py:FlowName`, or `module:FlowName`. If `:FlowName` is omitted, updates all flows.
|
|
452
|
+
"""
|
|
453
|
+
app_ref, flow_name = _parse_app_flow_specifier(app_flow_specifier)
|
|
454
|
+
_load_user_app(app_ref)
|
|
455
|
+
flow_list = (
|
|
456
|
+
[flow.flow_by_name(flow_name)] if flow_name else list(flow.flows().values())
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
# If --reset is specified, drop existing setup first
|
|
460
|
+
if reset:
|
|
461
|
+
_drop_flows(flow_list, app_ref=app_ref, force=force)
|
|
462
|
+
|
|
463
|
+
if live:
|
|
464
|
+
click.secho(
|
|
465
|
+
"NOTE: Flow code changes will NOT be reflected until you restart to load the new code.\n",
|
|
466
|
+
fg="yellow",
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
options = flow.FlowLiveUpdaterOptions(
|
|
470
|
+
live_mode=live,
|
|
471
|
+
reexport_targets=reexport,
|
|
472
|
+
print_stats=not quiet,
|
|
473
|
+
)
|
|
474
|
+
if reset or setup:
|
|
475
|
+
_setup_flows(flow_list, force=force, quiet=quiet)
|
|
476
|
+
|
|
477
|
+
if flow_name is None:
|
|
478
|
+
execution_context.run(_update_all_flows_with_hint_async(options))
|
|
479
|
+
else:
|
|
480
|
+
assert len(flow_list) == 1
|
|
481
|
+
with flow.FlowLiveUpdater(flow_list[0], options) as updater:
|
|
482
|
+
updater.wait()
|
|
483
|
+
if options.live_mode:
|
|
484
|
+
_show_no_live_update_hint()
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
@cli.command()
|
|
488
|
+
@click.argument("app_flow_specifier", type=str)
|
|
489
|
+
@click.option(
|
|
490
|
+
"-o",
|
|
491
|
+
"--output-dir",
|
|
492
|
+
type=str,
|
|
493
|
+
required=False,
|
|
494
|
+
help="The directory to dump the output to.",
|
|
495
|
+
)
|
|
496
|
+
@click.option(
|
|
497
|
+
"--cache/--no-cache",
|
|
498
|
+
is_flag=True,
|
|
499
|
+
show_default=True,
|
|
500
|
+
default=True,
|
|
501
|
+
help="Use already-cached intermediate data if available.",
|
|
502
|
+
)
|
|
503
|
+
def evaluate(
|
|
504
|
+
app_flow_specifier: str, output_dir: str | None, cache: bool = True
|
|
505
|
+
) -> None:
|
|
506
|
+
"""
|
|
507
|
+
Evaluate the flow and dump flow outputs to files.
|
|
508
|
+
|
|
509
|
+
Instead of updating the index, it dumps what should be indexed to files. Mainly used for evaluation purpose.
|
|
510
|
+
|
|
511
|
+
\b
|
|
512
|
+
`APP_FLOW_SPECIFIER`: Specifies the application and optionally the target flow. Can be one of the following formats:
|
|
513
|
+
- `path/to/your_app.py`
|
|
514
|
+
- `an_installed.module_name`
|
|
515
|
+
- `path/to/your_app.py:SpecificFlowName`
|
|
516
|
+
- `an_installed.module_name:SpecificFlowName`
|
|
517
|
+
|
|
518
|
+
`:SpecificFlowName` can be omitted only if the application defines a single flow.
|
|
519
|
+
"""
|
|
520
|
+
app_ref, flow_ref = _parse_app_flow_specifier(app_flow_specifier)
|
|
521
|
+
_load_user_app(app_ref)
|
|
522
|
+
|
|
523
|
+
fl = _flow_by_name(flow_ref)
|
|
524
|
+
if output_dir is None:
|
|
525
|
+
output_dir = f"eval_{setting.get_app_namespace(trailing_delimiter='_')}{fl.name}_{datetime.datetime.now().strftime('%y%m%d_%H%M%S')}"
|
|
526
|
+
options = flow.EvaluateAndDumpOptions(output_dir=output_dir, use_cache=cache)
|
|
527
|
+
fl.evaluate_and_dump(options)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
@cli.command()
|
|
531
|
+
@click.argument("app_target", type=str)
|
|
532
|
+
@click.option(
|
|
533
|
+
"-a",
|
|
534
|
+
"--address",
|
|
535
|
+
type=str,
|
|
536
|
+
help="The address to bind the server to, in the format of IP:PORT. "
|
|
537
|
+
"If unspecified, the address specified in COCOINDEX_SERVER_ADDRESS will be used.",
|
|
538
|
+
)
|
|
539
|
+
@click.option(
|
|
540
|
+
"-c",
|
|
541
|
+
"--cors-origin",
|
|
542
|
+
type=str,
|
|
543
|
+
help="The origins of the clients (e.g. CocoInsight UI) to allow CORS from. "
|
|
544
|
+
"Multiple origins can be specified as a comma-separated list. "
|
|
545
|
+
"e.g. `https://cocoindex.io,http://localhost:3000`. "
|
|
546
|
+
"Origins specified in COCOINDEX_SERVER_CORS_ORIGINS will also be included.",
|
|
547
|
+
)
|
|
548
|
+
@click.option(
|
|
549
|
+
"-ci",
|
|
550
|
+
"--cors-cocoindex",
|
|
551
|
+
is_flag=True,
|
|
552
|
+
show_default=True,
|
|
553
|
+
default=False,
|
|
554
|
+
help=f"Allow {COCOINDEX_HOST} to access the server.",
|
|
555
|
+
)
|
|
556
|
+
@click.option(
|
|
557
|
+
"-cl",
|
|
558
|
+
"--cors-local",
|
|
559
|
+
type=int,
|
|
560
|
+
help="Allow http://localhost:<port> to access the server.",
|
|
561
|
+
)
|
|
562
|
+
@click.option(
|
|
563
|
+
"-L",
|
|
564
|
+
"--live-update",
|
|
565
|
+
is_flag=True,
|
|
566
|
+
show_default=True,
|
|
567
|
+
default=False,
|
|
568
|
+
help="Continuously watch changes from data sources and apply to the target index.",
|
|
569
|
+
)
|
|
570
|
+
@click.option(
|
|
571
|
+
"--setup",
|
|
572
|
+
is_flag=True,
|
|
573
|
+
show_default=True,
|
|
574
|
+
default=True,
|
|
575
|
+
callback=_deprecate_setup_flag,
|
|
576
|
+
help="(DEPRECATED) Automatically setup backends for the flow if it's not setup yet. This is now the default behavior.",
|
|
577
|
+
)
|
|
578
|
+
@click.option(
|
|
579
|
+
"--reset",
|
|
580
|
+
is_flag=True,
|
|
581
|
+
show_default=True,
|
|
582
|
+
default=False,
|
|
583
|
+
help="Drop existing setup before starting server (equivalent to running 'cocoindex drop' first). `--reset` implies `--setup`.",
|
|
584
|
+
)
|
|
585
|
+
@click.option(
|
|
586
|
+
"--reexport",
|
|
587
|
+
is_flag=True,
|
|
588
|
+
show_default=True,
|
|
589
|
+
default=False,
|
|
590
|
+
help="Reexport to targets even if there's no change.",
|
|
591
|
+
)
|
|
592
|
+
@click.option(
|
|
593
|
+
"-f",
|
|
594
|
+
"--force",
|
|
595
|
+
is_flag=True,
|
|
596
|
+
show_default=True,
|
|
597
|
+
default=False,
|
|
598
|
+
help="Force setup without confirmation prompts.",
|
|
599
|
+
)
|
|
600
|
+
@click.option(
|
|
601
|
+
"-q",
|
|
602
|
+
"--quiet",
|
|
603
|
+
is_flag=True,
|
|
604
|
+
show_default=True,
|
|
605
|
+
default=False,
|
|
606
|
+
help="Avoid printing anything to the standard output, e.g. statistics.",
|
|
607
|
+
)
|
|
608
|
+
@click.option(
|
|
609
|
+
"-r",
|
|
610
|
+
"--reload",
|
|
611
|
+
is_flag=True,
|
|
612
|
+
show_default=True,
|
|
613
|
+
default=False,
|
|
614
|
+
help="Enable auto-reload on code changes.",
|
|
615
|
+
)
|
|
616
|
+
def server(
|
|
617
|
+
app_target: str,
|
|
618
|
+
address: str | None,
|
|
619
|
+
live_update: bool,
|
|
620
|
+
setup: bool, # pylint: disable=redefined-outer-name
|
|
621
|
+
reset: bool,
|
|
622
|
+
reexport: bool,
|
|
623
|
+
force: bool,
|
|
624
|
+
quiet: bool,
|
|
625
|
+
cors_origin: str | None,
|
|
626
|
+
cors_cocoindex: bool,
|
|
627
|
+
cors_local: int | None,
|
|
628
|
+
reload: bool,
|
|
629
|
+
) -> None:
|
|
630
|
+
"""
|
|
631
|
+
Start a HTTP server providing REST APIs.
|
|
632
|
+
|
|
633
|
+
It will allow tools like CocoInsight to access the server.
|
|
634
|
+
|
|
635
|
+
`APP_TARGET`: `path/to/app.py` or `installed_module`.
|
|
636
|
+
"""
|
|
637
|
+
app_ref = _get_app_ref_from_specifier(app_target)
|
|
638
|
+
args = (
|
|
639
|
+
app_ref,
|
|
640
|
+
address,
|
|
641
|
+
cors_origin,
|
|
642
|
+
cors_cocoindex,
|
|
643
|
+
cors_local,
|
|
644
|
+
live_update,
|
|
645
|
+
reexport,
|
|
646
|
+
quiet,
|
|
647
|
+
)
|
|
648
|
+
kwargs = {
|
|
649
|
+
"run_reset": reset,
|
|
650
|
+
"run_setup": setup,
|
|
651
|
+
"force": force,
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
if reload:
|
|
655
|
+
watch_paths = {os.getcwd()}
|
|
656
|
+
if os.path.isfile(app_ref):
|
|
657
|
+
watch_paths.add(os.path.dirname(os.path.abspath(app_ref)))
|
|
658
|
+
else:
|
|
659
|
+
try:
|
|
660
|
+
spec = importlib.util.find_spec(app_ref)
|
|
661
|
+
if spec and spec.origin:
|
|
662
|
+
watch_paths.add(os.path.dirname(os.path.abspath(spec.origin)))
|
|
663
|
+
except ImportError:
|
|
664
|
+
pass
|
|
665
|
+
|
|
666
|
+
watchfiles.run_process(
|
|
667
|
+
*watch_paths,
|
|
668
|
+
target=_reloadable_server_target,
|
|
669
|
+
args=args,
|
|
670
|
+
kwargs=kwargs,
|
|
671
|
+
watch_filter=watchfiles.PythonFilter(),
|
|
672
|
+
callback=lambda changes: click.secho(
|
|
673
|
+
f"\nDetected changes in {len(changes)} file(s), reloading server...\n",
|
|
674
|
+
fg="cyan",
|
|
675
|
+
),
|
|
676
|
+
)
|
|
677
|
+
else:
|
|
678
|
+
click.secho(
|
|
679
|
+
"NOTE: Flow code changes will NOT be reflected until you restart to load the new code. Use --reload to enable auto-reload.\n",
|
|
680
|
+
fg="yellow",
|
|
681
|
+
)
|
|
682
|
+
_run_server(*args, **kwargs)
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def _reloadable_server_target(*args: Any, **kwargs: Any) -> None:
|
|
686
|
+
"""Reloadable target for the watchfiles process."""
|
|
687
|
+
_initialize_cocoindex_in_process()
|
|
688
|
+
|
|
689
|
+
kwargs["run_setup"] = kwargs["run_setup"] or kwargs["run_reset"]
|
|
690
|
+
changed_files = json.loads(os.environ.get("WATCHFILES_CHANGES", "[]"))
|
|
691
|
+
if changed_files:
|
|
692
|
+
kwargs["run_reset"] = False
|
|
693
|
+
kwargs["force"] = True
|
|
694
|
+
|
|
695
|
+
_run_server(*args, **kwargs)
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
def _run_server(
|
|
699
|
+
app_ref: str,
|
|
700
|
+
address: str | None = None,
|
|
701
|
+
cors_origin: str | None = None,
|
|
702
|
+
cors_cocoindex: bool = False,
|
|
703
|
+
cors_local: int | None = None,
|
|
704
|
+
live_update: bool = False,
|
|
705
|
+
reexport: bool = False,
|
|
706
|
+
quiet: bool = False,
|
|
707
|
+
/,
|
|
708
|
+
*,
|
|
709
|
+
force: bool = False,
|
|
710
|
+
run_reset: bool = False,
|
|
711
|
+
run_setup: bool = False,
|
|
712
|
+
) -> None:
|
|
713
|
+
"""Helper function to run the server with specified settings."""
|
|
714
|
+
_load_user_app(app_ref)
|
|
715
|
+
|
|
716
|
+
# Check if any flows are registered
|
|
717
|
+
if not flow.flow_names():
|
|
718
|
+
click.secho(
|
|
719
|
+
f"\nError: No flows registered in '{app_ref}'.\n",
|
|
720
|
+
fg="red",
|
|
721
|
+
bold=True,
|
|
722
|
+
err=True,
|
|
723
|
+
)
|
|
724
|
+
click.secho(
|
|
725
|
+
"To use CocoIndex server, you need to define at least one flow.",
|
|
726
|
+
err=True,
|
|
727
|
+
)
|
|
728
|
+
click.secho(
|
|
729
|
+
"See https://cocoindex.io/docs for more information.\n",
|
|
730
|
+
fg="cyan",
|
|
731
|
+
err=True,
|
|
732
|
+
)
|
|
733
|
+
raise click.Abort()
|
|
734
|
+
|
|
735
|
+
# If --reset is specified, drop existing setup first
|
|
736
|
+
if run_reset:
|
|
737
|
+
_drop_flows(flow.flows().values(), app_ref=app_ref, force=force)
|
|
738
|
+
|
|
739
|
+
server_settings = setting.ServerSettings.from_env()
|
|
740
|
+
cors_origins: set[str] = set(server_settings.cors_origins or [])
|
|
741
|
+
if cors_origin is not None:
|
|
742
|
+
cors_origins.update(setting.ServerSettings.parse_cors_origins(cors_origin))
|
|
743
|
+
if cors_cocoindex:
|
|
744
|
+
cors_origins.add(COCOINDEX_HOST)
|
|
745
|
+
if cors_local is not None:
|
|
746
|
+
cors_origins.add(f"http://localhost:{cors_local}")
|
|
747
|
+
server_settings.cors_origins = list(cors_origins)
|
|
748
|
+
|
|
749
|
+
if address is not None:
|
|
750
|
+
server_settings.address = address
|
|
751
|
+
|
|
752
|
+
if run_reset or run_setup:
|
|
753
|
+
_setup_flows(
|
|
754
|
+
flow.flows().values(),
|
|
755
|
+
force=force,
|
|
756
|
+
quiet=quiet,
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
lib.start_server(server_settings)
|
|
760
|
+
|
|
761
|
+
if COCOINDEX_HOST in cors_origins:
|
|
762
|
+
click.echo(f"Open CocoInsight at: {COCOINDEX_HOST}/cocoinsight")
|
|
763
|
+
|
|
764
|
+
click.secho("Press Ctrl+C to stop the server.", fg="yellow")
|
|
765
|
+
|
|
766
|
+
if live_update or reexport:
|
|
767
|
+
options = flow.FlowLiveUpdaterOptions(
|
|
768
|
+
live_mode=live_update,
|
|
769
|
+
reexport_targets=reexport,
|
|
770
|
+
print_stats=not quiet,
|
|
771
|
+
)
|
|
772
|
+
asyncio.run_coroutine_threadsafe(
|
|
773
|
+
_update_all_flows_with_hint_async(options), execution_context.event_loop
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
shutdown_event = threading.Event()
|
|
777
|
+
|
|
778
|
+
def handle_signal(signum: int, frame: FrameType | None) -> None:
|
|
779
|
+
shutdown_event.set()
|
|
780
|
+
|
|
781
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
782
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
783
|
+
shutdown_event.wait()
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _flow_name(name: str | None) -> str:
|
|
787
|
+
names = flow.flow_names()
|
|
788
|
+
available = ", ".join(sorted(names))
|
|
789
|
+
if name is not None:
|
|
790
|
+
if name not in names:
|
|
791
|
+
raise click.BadParameter(
|
|
792
|
+
f"Flow '{name}' not found.\nAvailable: {available if names else 'None'}"
|
|
793
|
+
)
|
|
794
|
+
return name
|
|
795
|
+
if len(names) == 0:
|
|
796
|
+
raise click.UsageError("No flows available in the loaded application.")
|
|
797
|
+
elif len(names) == 1:
|
|
798
|
+
return names[0]
|
|
799
|
+
else:
|
|
800
|
+
console = Console()
|
|
801
|
+
index = 0
|
|
802
|
+
|
|
803
|
+
while True:
|
|
804
|
+
console.clear()
|
|
805
|
+
console.print(
|
|
806
|
+
Panel.fit("Select a Flow", title_align="left", border_style="cyan")
|
|
807
|
+
)
|
|
808
|
+
for i, fname in enumerate(names):
|
|
809
|
+
console.print(
|
|
810
|
+
f"> [bold green]{fname}[/bold green]"
|
|
811
|
+
if i == index
|
|
812
|
+
else f" {fname}"
|
|
813
|
+
)
|
|
814
|
+
|
|
815
|
+
key = click.getchar()
|
|
816
|
+
if key == "\x1b[A": # Up arrow
|
|
817
|
+
index = (index - 1) % len(names)
|
|
818
|
+
elif key == "\x1b[B": # Down arrow
|
|
819
|
+
index = (index + 1) % len(names)
|
|
820
|
+
elif key in ("\r", "\n"): # Enter
|
|
821
|
+
console.clear()
|
|
822
|
+
return names[index]
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
def _flow_by_name(name: str | None) -> flow.Flow:
|
|
826
|
+
return flow.flow_by_name(_flow_name(name))
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
if __name__ == "__main__":
|
|
830
|
+
cli()
|