PyPI - litert-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

litert-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

examples/litert_cli.ipynb +313 -0
examples/models/presets/default.py +19 -0
examples/run_cli_demo.sh +38 -0
examples/run_cli_npu.sh +89 -0
examples/run_commands.sh +67 -0
examples/run_models.sh +63 -0
examples/run_smoke_tests.sh +58 -0
examples/utils.ps1 +163 -0
examples/utils.sh +184 -0
litert_cli/__init__.py +15 -0
litert_cli/commands/benchmark/__init__.py +16 -0
litert_cli/commands/benchmark/android.py +212 -0
litert_cli/commands/benchmark/cli.py +294 -0
litert_cli/commands/benchmark/desktop.py +228 -0
litert_cli/commands/benchmark/gcp.py +336 -0
litert_cli/commands/clean.py +73 -0
litert_cli/commands/compile.py +211 -0
litert_cli/commands/convert/__init__.py +20 -0
litert_cli/commands/convert/cli.py +255 -0
litert_cli/commands/convert/generic.py +211 -0
litert_cli/commands/convert/huggingface.py +175 -0
litert_cli/commands/delete.py +56 -0
litert_cli/commands/download.py +274 -0
litert_cli/commands/import.py +124 -0
litert_cli/commands/list.py +132 -0
litert_cli/commands/lm.py +74 -0
litert_cli/commands/quantize.py +193 -0
litert_cli/commands/run/__init__.py +16 -0
litert_cli/commands/run/android.py +394 -0
litert_cli/commands/run/cli.py +297 -0
litert_cli/commands/run/desktop.py +340 -0
litert_cli/commands/visualize.py +234 -0
litert_cli/core/android_utils.py +304 -0
litert_cli/core/android_utils_test.py +236 -0
litert_cli/core/constants.py +131 -0
litert_cli/core/deps.py +180 -0
litert_cli/core/deps_test.py +101 -0
litert_cli/core/inputs.py +203 -0
litert_cli/core/inputs_test.py +176 -0
litert_cli/core/log_filters.py +50 -0
litert_cli/core/models.py +96 -0
litert_cli/core/npu_utils.py +382 -0
litert_cli/core/targets_manager.py +192 -0
litert_cli/core/utils.py +58 -0
litert_cli/litert.py +119 -0
litert_cli/litert_help_test.py +51 -0
litert_cli/litert_test.py +88 -0
litert_cli/models/__init__.py +145 -0
litert_cli/models/asr/__init__.py +15 -0
litert_cli/models/asr/asr_model.py +108 -0
litert_cli/models/asr/parakeet_ctc.py +165 -0
litert_cli/models/asr/runner.py +482 -0
litert_cli/models/base.py +57 -0
litert_cli/test_data/dummy_calib_data.py +26 -0
litert_cli/test_data/dummy_cv_model.py +52 -0
litert_cli/test_data/dummy_cv_model.tflite +0 -0
litert_cli/test_data/generate_test_inputs.py +51 -0
litert_cli/test_data/mobilenet_v3_calib_data.py +25 -0
litert_cli/test_data/quantize_recipe.json +16 -0
litert_cli/test_data/resnet18.py +31 -0
litert_cli-0.1.0.dist-info/METADATA +38 -0
litert_cli-0.1.0.dist-info/RECORD +67 -0
litert_cli-0.1.0.dist-info/WHEEL +5 -0
litert_cli-0.1.0.dist-info/entry_points.txt +2 -0
litert_cli-0.1.0.dist-info/licenses/LICENSE +202 -0
litert_cli-0.1.0.dist-info/top_level.txt +3 -0
tools/build_wheels.py +122 -0

litert_cli/commands/run/cli.py ADDED Viewed

@@ -0,0 +1,297 @@
+# Copyright 2026 The LiteRT CLI Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Command Line Interface for executing LiteRT models.
+This module provides the `litert run` command, which allows users to
+execute TFLite models on either the local desktop or a connected Android device.
+Key Features:
+- Desktop Execution: Uses the LiteRT Python API (`CompiledModel`) to run
+  inference locally. It automatically inspects the model signature,
+  generates appropriate dummy input data, and prints the output tensor
+  statistics.
+- Android Execution: Seamlessly pushes the model and the compiled `run_model`
+  binary to an attached Android device via `adb`, and executes it remotely.
+"""
+from __future__ import annotations
+from collections.abc import Sequence
+import textwrap
+import click
+from litert_cli.core import constants
+from litert_cli.core import deps
+from litert_cli.core import utils
+@click.command(
+    "run",
+    help=textwrap.dedent("""\
+        Run LiteRT models locally or on device.
+        MODEL: Path to the LiteRT model (.tflite) or a Model Reference (e.g., nvidia/parakeet-ctc-0.6b).
+        Examples:
+          1. Run on desktop (CPU) with dummy inputs:
+            $ litert run model.tflite
+          2. Run on desktop with GPU acceleration:
+            $ litert run model.tflite --gpu
+          3. Run with custom inputs (path or literal):
+            $ litert run model.tflite --input image.jpg
+            OR with named inputs:
+            $ litert run model.tflite --input in1=1.0 --input in2=image.jpg
+          4. Run on an attached Android device:
+            $ litert run model.tflite --android
+          5. Run on Android with GPU acceleration:
+            $ litert run model.tflite --android --gpu
+          6. Benchmark execution with 10 iterations:
+            $ litert run model.tflite --iterations 10
+          7. Print detailed tensor outputs:
+            $ litert run model.tflite --print-tensors --sample-size 10
+          8. Run with multiple accelerators (npu -> gpu -> cpu fallback):
+            $ litert run model.tflite --npu --gpu --cpu
+            OR explicitly:
+            $ litert run model.tflite --accelerator npu,gpu,cpu
+        """),
+)
+@deps.require_extra("run")
+@click.argument("model", type=str)
+@click.option(
+    "--input",
+    "inputs",
+    multiple=True,
+    help=(
+        "Input data for the model. Can be a literal array (e.g. '[1,2]'), "
+        "a path to an image/npy/raw file. "
+        "You can specify multiple inputs using format: --input name=value "
+        "or just --input value if the model has only one input."
+    ),
+)
+@click.option(
+    "--model-params",
+    "model_params",
+    multiple=True,
+    help="Model specific parameters in format key=value.",
+)
+@click.option(
+    "--model-help",
+    is_flag=True,
+    default=False,
+    help="Show help specific to the matched model plugin.",
+)
+@click.option(
+    "--desktop",
+    "target",
+    flag_value="desktop",
+    default=True,
+    help="Target desktop platform to run (Default).",
+)
+@click.option(
+    "--android",
+    "target",
+    flag_value="android",
+    help="Target Android platform to run.",
+)
+@click.option(
+    "--accelerator",
+    type=str,
+    help="Comma-separated list of hardware accelerators (e.g. npu,gpu,cpu).",
+)
+@click.option(
+    "--cpu",
+    is_flag=True,
+    help="Use CPU accelerator.",
+)
+@click.option(
+    "--gpu",
+    is_flag=True,
+    help="Use GPU accelerator.",
+)
+@click.option(
+    "--npu",
+    is_flag=True,
+    help="Use NPU accelerator.",
+)
+@click.option(
+    "--signature-index",
+    type=int,
+    default=0,
+    help="Index of model signature to run. Default is 0.",
+)
+@click.option(
+    "--iterations",
+    type=int,
+    default=1,
+    help="Number of times to execute the model for benchmarking. Default is 1.",
+)
+@click.option(
+    "--print-tensors",
+    is_flag=True,
+    default=False,
+    help="Print output tensor values after execution.",
+)
+@click.option(
+    "--sample-size",
+    type=int,
+    default=5,
+    help="Number of sample elements to print from tensors. Default is 5.",
+)
+@click.pass_context
+def run_cmd(
+    unused_ctx: click.Context,
+    model: str,
+    inputs: Sequence[str],
+    model_params: Sequence[str],
+    model_help: bool,
+    target: str,
+    accelerator: str | None,
+    cpu: bool,
+    gpu: bool,
+    npu: bool,
+    signature_index: int,
+    iterations: int,
+    print_tensors: bool,
+    sample_size: int,
+) -> None:
+  r"""Runs LiteRT models locally or on device.
+  Args:
+    unused_ctx: Click context.
+    model: Path to the LiteRT model (.tflite).
+    inputs: Tuple of input assignments (e.g., 'name=value' or just 'value').
+    model_params: Model specific parameters.
+    model_help: Show help specific to the matched model plugin.
+    target: Execution target ('desktop' or 'android').
+    accelerator: Hardware accelerator ('cpu', 'gpu', or 'npu').
+    cpu: Use CPU accelerator.
+    gpu: Use GPU accelerator.
+    npu: Use NPU accelerator.
+    signature_index: Index of model signature to run.
+    iterations: Number of times to execute the model for benchmarking.
+    print_tensors: Whether to print output tensor elements.
+    sample_size: Number of sample elements to print from tensors.
+  """
+  # Resolve the order of accelerators
+  accelerator_list = []
+  if accelerator:
+    accelerator_list = [
+        a.strip().lower() for a in accelerator.split(",") if a.strip()
+    ]
+  else:
+    if npu:
+      accelerator_list.append("npu")
+    if gpu:
+      accelerator_list.append("gpu")
+    if cpu:
+      accelerator_list.append("cpu")
+    if not accelerator_list:
+      accelerator_list = ["cpu"]
+  accelerator = ",".join(accelerator_list)
+  # Quiet if default is true
+  if constants.DEFAULT_QUIET:
+    utils.enable_quiet_mode()
+  # --- Model Reference and Cache Resolution ---
+  from litert_cli.core import models as core_models  # pylint: disable=g-import-not-at-top
+  resolved_model_path, resolved_hf_id = core_models.resolve_model_reference(
+      model
+  )
+  if resolved_model_path != model:
+    click.echo(f"Resolved model '{model}' to '{resolved_model_path}'")
+  # --- Plugin Dispatch Mechanism ---
+  # Try to delegate to a model-specific plugin first.
+  from litert_cli import models  # pylint: disable=g-import-not-at-top
+  # Parse model-params into a dictionary
+  parsed_model_params = {}
+  if model_params:
+    for p in model_params:
+      if "=" in p:
+        k, v = p.split("=", 1)
+        parsed_model_params[k] = v
+  # Pass the resolved hf_id as model_id to dispatch, and the actual file path
+  # in kwargs
+  plugin_result = models.dispatch_model_intent(
+      intent="run",
+      model_id=resolved_hf_id or str(model),
+      inputs=inputs,
+      model_help=model_help,
+      model_params=parsed_model_params,
+      target=target,
+      accelerator=accelerator,
+      model_path=resolved_model_path,  # Pass the actual file path here!
+  )
+  if plugin_result is not None:
+    # If the plugin handled it or showed help, we exit
+    return
+  # ----------------------------------
+  if target == "desktop":
+    from litert_cli.commands.run import desktop  # pylint: disable=g-import-not-at-top
+    desktop.run_desktop(
+        model_path=str(resolved_model_path),
+        inputs=inputs,
+        accelerator=accelerator,
+        signature_index=signature_index,
+        iterations=iterations,
+        print_tensors=print_tensors,
+        sample_size=sample_size,
+    )
+  elif target == "android":
+    from litert_cli.commands.run import android  # pylint: disable=g-import-not-at-top
+    android.run_android(
+        model_path=str(resolved_model_path),
+        inputs=inputs,
+        accelerator=accelerator,
+        signature_index=signature_index,
+        iterations=iterations,
+        print_tensors=print_tensors,
+        sample_size=sample_size,
+    )
+  else:
+    click.secho(f"Target '{target}' is not yet supported.", fg="red")

litert_cli/commands/run/desktop.py ADDED Viewed

@@ -0,0 +1,340 @@
+# Copyright 2026 The LiteRT CLI Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Desktop execution engine for LiteRT models.
+Uses CompiledModel to load and run models on desktop (CPU/GPU).
+Usage Examples:
+  1. Run a model on desktop (CPU):
+     $ litert run /path/to/model.tflite --desktop
+  2. Run with GPU acceleration:
+     $ litert run /path/to/model.tflite --desktop --gpu
+     OR
+     $ litert run /path/to/model.tflite --desktop --accelerator gpu
+  3. Run with multiple accelerators (gpu -> cpu native fallback):
+     $ litert run /path/to/model.tflite --desktop --gpu --cpu
+     OR
+     $ litert run /path/to/model.tflite --desktop --accelerator gpu,cpu
+  4. Run with custom inputs:
+     $ litert run /path/to/model.tflite --desktop --input input_name=value
+  5. Run with multiple iterations (benchmark):
+     $ litert run /path/to/model.tflite --desktop --iterations 10
+  6. Print tensor details:
+     $ litert run /path/to/model.tflite --desktop --print-tensors
+"""
+from __future__ import annotations
+from collections.abc import Mapping, Sequence
+import contextlib
+import time
+from typing import Any, TYPE_CHECKING
+import click
+from litert_cli.core import constants
+from litert_cli.core import inputs as inputs_utils
+from litert_cli.core import utils
+import numpy as np
+if TYPE_CHECKING:
+  # Import heavy dependencies only for type hinting to improve CLI startup
+  # performance. These are not imported at runtime.
+  from ai_edge_litert.compiled_model import CompiledModel  # pylint: disable=g-import-not-at-top
+def _parse_inputs_dict(inputs: Sequence[str]) -> dict[str, str]:
+  """Parse a tuple of input assignments into a dictionary.
+  Args:
+    inputs: A tuple of input strings, e.g., ('name=value', 'value2').
+  Returns:
+    A dictionary mapping names to values. Unnamed inputs use '_default_'.
+  """
+  parsed_inputs = {}
+  if inputs:
+    for inp in inputs:
+      if "=" in inp:
+        k, v = inp.split("=", 1)
+        parsed_inputs[k] = v
+      else:
+        parsed_inputs["_default_"] = inp
+  return parsed_inputs
+def _prepare_inputs(
+    *,
+    cm: CompiledModel,
+    sig_key: str,
+    parsed_inputs: dict[str, str],
+) -> dict[str, Any]:
+  """Prepare CompiledModel input buffers.
+  Load parsed input assignments or generates random dummy data to load into the
+  CompiledModel TensorBuffers.
+  Args:
+    cm: The loaded CompiledModel structure to interact with.
+    sig_key: Signature key describing the input interface.
+    parsed_inputs: Dictionary mapping input names to file path/literal strings.
+  Returns:
+    A dictionary mapping tensor names to their populated TensorBuffers.
+  Raises:
+    click.ClickException: If input loading or parsing fails.
+  """
+  inputs_dict = {}
+  input_details = cm.get_input_tensor_details(sig_key)
+  for name, details in input_details.items():
+    shape = details.get("shape", [1])
+    tensor_type = details.get("dtype", "?")
+    np_dtype = inputs_utils.get_np_dtype(tensor_type)
+    input_data_str = parsed_inputs.get(name) or parsed_inputs.get("_default_")
+    if input_data_str:
+      click.echo(
+          f"Loading input {name!r} from {input_data_str!r} (shape:"
+          f" {shape}, dtype: {tensor_type})"
+      )
+      try:
+        input_data = inputs_utils.parse_input(input_data_str, shape, np_dtype)
+      except ImportError as ie:
+        click.secho(ie, fg="red")
+        raise click.ClickException("Failed to load input module.") from ie
+      except Exception as e:
+        click.secho(f"Failed to parse input: {e!r}", fg="red")
+        raise click.ClickException(
+            f"Failed to parse input {name!r}: {e!r}"
+        ) from e
+    else:
+      click.echo(
+          f"Generating random dummy input {name!r} with shape {shape} and"
+          f" dtype {tensor_type}"
+      )
+      rng = np.random.default_rng()
+      if np.issubdtype(np_dtype, np.integer):
+        input_data = rng.integers(0, 10, size=shape, dtype=np_dtype)
+      else:
+        input_data = np.asarray(
+            rng.uniform(low=-1.0, high=1.0, size=shape)
+        ).astype(np_dtype)
+    tb = cm.create_input_buffer_by_name(sig_key, name)
+    tb.write(input_data)
+    inputs_dict[name] = tb
+  return inputs_dict
+def _print_outputs(
+    outputs_by_name: Mapping[str, Any],
+    print_tensors: bool,
+    sample_size: int,
+    output_details: Mapping[str, Any],
+) -> None:
+  """Print inference outputs to stdout.
+  Iterate through absolute tensor results and applies heuristics for
+  classification formatting or raw values flattening details.
+  Args:
+    outputs_by_name: Dictionary mapping output names to read-ready
+      TensorBuffers.
+    print_tensors: Boolean flag to trigger full tensor stream printing.
+    sample_size: Constraint on how many elements to print for large arrays.
+    output_details: Dictionary mapping output names to their tensor details.
+  """
+  click.echo("Outputs:")
+  for out_name, out_tb in outputs_by_name.items():
+    try:
+      shape = out_tb.shape if hasattr(out_tb, "shape") else []
+      num_elements = np.prod(shape) if shape else 1
+      details = output_details.get(out_name, {})
+      tensor_type = details.get("dtype", "?")
+      np_dtype = inputs_utils.get_np_dtype(tensor_type)
+      out_np = out_tb.read(num_elements, np_dtype)
+      if shape:
+        out_np = out_np.reshape(shape)
+      if print_tensors:
+        flat_out = out_np.ravel()
+        n_elem = len(flat_out)
+        click.echo(f"  {out_name} (shape: {shape}):")
+        if n_elem <= sample_size * 2:
+          click.echo(f"    {flat_out}")
+        else:
+          p_start = flat_out[:sample_size]
+          p_end = flat_out[-sample_size:]
+          click.echo(
+              f"    [{' '.join(str(x) for x in p_start)} ..."
+              f" {' '.join(str(x) for x in p_end)}]"
+          )
+      else:
+        # Classification inference heuristics fallback
+        if (len(shape) == 1 and shape[0] > 1) or (
+            len(shape) == 2 and shape[0] == 1 and shape[1] > 1
+        ):
+          scores = out_np.flatten()
+          n_top = min(5, len(scores))
+          top_indices = np.argsort(scores)[-n_top:][::-1]
+          click.echo(f"  {out_name} (Top {n_top} Predictions):")
+          for i, idx in enumerate(top_indices):
+            click.echo(f"    {i+1}: index {idx} - score {scores[idx]:.4f}")
+        else:
+          click.echo(
+              f"  {out_name}: mean={np.mean(out_np):.4f},"
+              f" min={np.min(out_np):.4f}, max={np.max(out_np):.4f}"
+          )
+    except Exception as e:  # pylint: disable=broad-exception-caught
+      click.echo(
+          f"  {out_name}: [Unable to read data natively without specific"
+          f" dtype info] (Error: {e!r})"
+      )
+def run_desktop(
+    *,
+    model_path: str,
+    inputs: Sequence[str],
+    accelerator: str,
+    signature_index: int,
+    iterations: int,
+    print_tensors: bool,
+    sample_size: int,
+    quiet: bool = False,
+) -> None:
+  """Runs the model on the desktop target using CompiledModel.
+  Args:
+    model_path: Local path to the LiteRT model file (.tflite).
+    inputs: Tuple of input assignments (e.g., 'name=value').
+    accelerator: Hardware accelerator ('cpu', 'gpu', 'npu').
+    signature_index: Signature index to execute.
+    iterations: Number of execute loops for remote runner.
+    print_tensors: Whether to print absolute stats after execution completes.
+    sample_size: Limit execution sample stream print length per tensor.
+    quiet: Whether to silence stderr output.
+  Raises:
+    click.ClickException: On loading failure or inference execution errors.
+  """
+  accel_list = [a.strip().lower() for a in accelerator.split(",") if a.strip()]
+  # pylint: disable=g-import-not-at-top,reimported
+  from ai_edge_litert.compiled_model import CompiledModel
+  from ai_edge_litert.compiled_model import Environment
+  from ai_edge_litert.hardware_accelerator import HardwareAccelerator
+  hw_accel = HardwareAccelerator(0)
+  for accel in accel_list:
+    if accel == "cpu":
+      hw_accel |= HardwareAccelerator.CPU
+    elif accel == "gpu":
+      hw_accel |= HardwareAccelerator.GPU
+    elif accel == "npu":
+      hw_accel |= HardwareAccelerator.NPU
+    else:
+      raise click.ClickException(f"Unsupported hardware accelerator: {accel!r}")
+  if hw_accel == HardwareAccelerator(0):
+    hw_accel = HardwareAccelerator.CPU
+  click.echo(
+      f"Loading model on desktop: {model_path} with native hardware"
+      f" accelerators: {hw_accel}"
+  )
+  ctx = utils.silence_stderr() if quiet else contextlib.nullcontext()
+  with ctx:
+    try:
+      env = None
+      if constants.IS_INTERNAL_ENV:
+        # In internal environment, we need to fallback to LD_LIBRARY_PATH for
+        # loading GPU accelerators in hermetic .par file. Otherwise, use
+        # default path.
+        env = Environment.create(runtime_path="")
+      cm = CompiledModel.from_file(
+          model_path, hw_accel, environment=env
+      )
+      signatures = cm.get_signature_list()
+      if not signatures:
+        raise click.ClickException(
+            f"No signatures found in the model: {model_path!r}"
+        )
+      try:
+        sig_info = cm.get_signature_by_index(signature_index)
+        sig_key = sig_info["key"]
+      except Exception as e:  # pylint: disable=broad-exception-caught
+        raise click.ClickException(
+            f"Failed to get signature at index {signature_index}: {e!r}"
+        ) from e
+      click.echo(f"Using signature: {sig_key!r}")
+      parsed_inputs = _parse_inputs_dict(inputs)
+      inputs_dict = _prepare_inputs(
+          cm=cm, sig_key=sig_key, parsed_inputs=parsed_inputs
+      )
+      click.echo(f"Running inference {iterations} times...")
+      run_times = []
+      sig_idx = cm.get_signature_index(sig_key)
+      out_buffers = cm.create_output_buffers(sig_idx)
+      out_names = signatures[sig_key]["outputs"]
+      outputs_by_name = dict(zip(out_names, out_buffers))
+      for _ in range(iterations):
+        start_time = time.perf_counter()
+        cm.run_by_name(sig_key, inputs_dict, outputs_by_name)
+        end_time = time.perf_counter()
+        run_times.append((end_time - start_time) * 1000)
+      if iterations == 1:
+        click.echo(f"Inference complete in {run_times[0]:.2f} ms")
+      else:
+        click.echo(f"Benchmark results ({iterations} iterations):")
+        click.echo(f"  First run: {run_times[0]:.2f} ms")
+        click.echo(f"  Average: {np.mean(run_times):.2f} ms")
+        click.echo(f"  Min: {np.min(run_times):.2f} ms")
+        click.echo(f"  Max: {np.max(run_times):.2f} ms")
+        output_details = cm.get_output_tensor_details(sig_key)
+        _print_outputs(
+            outputs_by_name, print_tensors, sample_size, output_details
+        )
+    except Exception as e:  # pylint: disable=broad-exception-caught
+      raise click.ClickException(
+          f"Inference failed for model {model_path!r} with accelerator"
+          f" {accelerator!r}: {e!r}"
+      ) from e