PyPI - orca-sdk - Versions diffs - 0.1.9__py3-none-any.whl - Mend

orca-sdk 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

orca_sdk/__init__.py +30 -0
orca_sdk/_shared/__init__.py +10 -0
orca_sdk/_shared/metrics.py +634 -0
orca_sdk/_shared/metrics_test.py +570 -0
orca_sdk/_utils/__init__.py +0 -0
orca_sdk/_utils/analysis_ui.py +196 -0
orca_sdk/_utils/analysis_ui_style.css +51 -0
orca_sdk/_utils/auth.py +65 -0
orca_sdk/_utils/auth_test.py +31 -0
orca_sdk/_utils/common.py +37 -0
orca_sdk/_utils/data_parsing.py +129 -0
orca_sdk/_utils/data_parsing_test.py +244 -0
orca_sdk/_utils/pagination.py +126 -0
orca_sdk/_utils/pagination_test.py +132 -0
orca_sdk/_utils/prediction_result_ui.css +18 -0
orca_sdk/_utils/prediction_result_ui.py +110 -0
orca_sdk/_utils/tqdm_file_reader.py +12 -0
orca_sdk/_utils/value_parser.py +45 -0
orca_sdk/_utils/value_parser_test.py +39 -0
orca_sdk/async_client.py +4104 -0
orca_sdk/classification_model.py +1165 -0
orca_sdk/classification_model_test.py +887 -0
orca_sdk/client.py +4096 -0
orca_sdk/conftest.py +382 -0
orca_sdk/credentials.py +217 -0
orca_sdk/credentials_test.py +121 -0
orca_sdk/datasource.py +576 -0
orca_sdk/datasource_test.py +463 -0
orca_sdk/embedding_model.py +712 -0
orca_sdk/embedding_model_test.py +206 -0
orca_sdk/job.py +343 -0
orca_sdk/job_test.py +108 -0
orca_sdk/memoryset.py +3811 -0
orca_sdk/memoryset_test.py +1150 -0
orca_sdk/regression_model.py +841 -0
orca_sdk/regression_model_test.py +595 -0
orca_sdk/telemetry.py +742 -0
orca_sdk/telemetry_test.py +119 -0
orca_sdk-0.1.9.dist-info/METADATA +98 -0
orca_sdk-0.1.9.dist-info/RECORD +41 -0
orca_sdk-0.1.9.dist-info/WHEEL +4 -0

orca_sdk/_utils/data_parsing_test.py ADDED Viewed

@@ -0,0 +1,244 @@
+import json
+import pickle
+import tempfile
+from collections import namedtuple
+from dataclasses import dataclass
+import pandas as pd
+import pytest
+from datasets import Dataset
+from datasets.exceptions import DatasetGenerationError
+from torch.utils.data import DataLoader as TorchDataLoader
+from torch.utils.data import Dataset as TorchDataset
+from ..conftest import SAMPLE_DATA
+from .data_parsing import hf_dataset_from_disk, hf_dataset_from_torch
+class PytorchDictDataset(TorchDataset):
+    def __init__(self):
+        self.data = SAMPLE_DATA
+    def __getitem__(self, i):
+        return self.data[i]
+    def __len__(self):
+        return len(self.data)
+def test_hf_dataset_from_torch_dict():
+    # Given a Pytorch dataset that returns a dictionary for each item
+    dataset = PytorchDictDataset()
+    hf_dataset = hf_dataset_from_torch(dataset)
+    # Then the HF dataset should be created successfully
+    assert isinstance(hf_dataset, Dataset)
+    assert len(hf_dataset) == len(dataset)
+    assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id", "partition_id"}
+class PytorchTupleDataset(TorchDataset):
+    def __init__(self):
+        self.data = SAMPLE_DATA
+    def __getitem__(self, i):
+        return self.data[i]["value"], self.data[i]["label"]
+    def __len__(self):
+        return len(self.data)
+def test_hf_dataset_from_torch_tuple():
+    # Given a Pytorch dataset that returns a tuple for each item
+    dataset = PytorchTupleDataset()
+    # And the correct number of column names passed in
+    hf_dataset = hf_dataset_from_torch(dataset, column_names=["value", "label"])
+    # Then the HF dataset should be created successfully
+    assert isinstance(hf_dataset, Dataset)
+    assert len(hf_dataset) == len(dataset)
+    assert hf_dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_torch_tuple_error():
+    # Given a Pytorch dataset that returns a tuple for each item
+    dataset = PytorchTupleDataset()
+    # Then the HF dataset should raise an error if no column names are passed in
+    with pytest.raises(DatasetGenerationError):
+        hf_dataset_from_torch(dataset)
+def test_hf_dataset_from_torch_tuple_error_not_enough_columns():
+    # Given a Pytorch dataset that returns a tuple for each item
+    dataset = PytorchTupleDataset()
+    # Then the HF dataset should raise an error if not enough column names are passed in
+    with pytest.raises(DatasetGenerationError):
+        hf_dataset_from_torch(dataset, column_names=["value"])
+DatasetTuple = namedtuple("DatasetTuple", ["value", "label"])
+class PytorchNamedTupleDataset(TorchDataset):
+    def __init__(self):
+        self.data = SAMPLE_DATA
+    def __getitem__(self, i):
+        return DatasetTuple(self.data[i]["value"], self.data[i]["label"])
+    def __len__(self):
+        return len(self.data)
+def test_hf_dataset_from_torch_named_tuple():
+    # Given a Pytorch dataset that returns a namedtuple for each item
+    dataset = PytorchNamedTupleDataset()
+    # And no column names are passed in
+    hf_dataset = hf_dataset_from_torch(dataset)
+    # Then the HF dataset should be created successfully
+    assert isinstance(hf_dataset, Dataset)
+    assert len(hf_dataset) == len(dataset)
+    assert hf_dataset.column_names == ["value", "label"]
+@dataclass
+class DatasetItem:
+    text: str
+    label: int
+class PytorchDataclassDataset(TorchDataset):
+    def __init__(self):
+        self.data = SAMPLE_DATA
+    def __getitem__(self, i):
+        return DatasetItem(text=self.data[i]["value"], label=self.data[i]["label"])
+    def __len__(self):
+        return len(self.data)
+def test_hf_dataset_from_torch_dataclass():
+    # Given a Pytorch dataset that returns a dataclass for each item
+    dataset = PytorchDataclassDataset()
+    hf_dataset = hf_dataset_from_torch(dataset)
+    # Then the HF dataset should be created successfully
+    assert isinstance(hf_dataset, Dataset)
+    assert len(hf_dataset) == len(dataset)
+    assert hf_dataset.column_names == ["text", "label"]
+class PytorchInvalidDataset(TorchDataset):
+    def __init__(self):
+        self.data = SAMPLE_DATA
+    def __getitem__(self, i):
+        return [self.data[i]["value"], self.data[i]["label"]]
+    def __len__(self):
+        return len(self.data)
+def test_hf_dataset_from_torch_invalid_dataset():
+    # Given a Pytorch dataset that returns a list for each item
+    dataset = PytorchInvalidDataset()
+    # Then the HF dataset should raise an error
+    with pytest.raises(DatasetGenerationError):
+        hf_dataset_from_torch(dataset)
+def test_hf_dataset_from_torchdataloader():
+    # Given a Pytorch dataloader that returns a column-oriented batch of items
+    dataset = PytorchDictDataset()
+    def collate_fn(x: list[dict]):
+        return {"value": [item["value"] for item in x], "label": [item["label"] for item in x]}
+    dataloader = TorchDataLoader(dataset, batch_size=3, collate_fn=collate_fn)
+    hf_dataset = hf_dataset_from_torch(dataloader)
+    # Then the HF dataset should be created successfully
+    assert isinstance(hf_dataset, Dataset)
+    assert len(hf_dataset) == len(dataset)
+    assert hf_dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_pickle_list():
+    with tempfile.NamedTemporaryFile(suffix=".pkl") as temp_file:
+        # Given a pickle file with test data that is a list
+        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
+        with open(temp_file.name, "wb") as f:
+            pickle.dump(test_data, f)
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_pickle_dict():
+    with tempfile.NamedTemporaryFile(suffix=".pkl") as temp_file:
+        # Given a pickle file with test data that is a dict
+        test_data = {"value": [f"test_{i}" for i in range(30)], "label": [i % 2 for i in range(30)]}
+        with open(temp_file.name, "wb") as f:
+            pickle.dump(test_data, f)
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_json():
+    with tempfile.NamedTemporaryFile(suffix=".json") as temp_file:
+        # Given a JSON file with test data
+        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
+        with open(temp_file.name, "w") as f:
+            json.dump(test_data, f)
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_jsonl():
+    with tempfile.NamedTemporaryFile(suffix=".jsonl") as temp_file:
+        # Given a JSONL file with test data
+        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
+        with open(temp_file.name, "w") as f:
+            for item in test_data:
+                f.write(json.dumps(item) + "\n")
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_csv():
+    with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
+        # Given a CSV file with test data
+        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
+        with open(temp_file.name, "w") as f:
+            f.write("value,label\n")
+            for item in test_data:
+                f.write(f"{item['value']},{item['label']}\n")
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]
+def test_hf_dataset_from_disk_parquet():
+    with tempfile.NamedTemporaryFile(suffix=".parquet") as temp_file:
+        # Given a Parquet file with test data
+        data = {
+            "value": [f"test_{i}" for i in range(30)],
+            "label": [i % 2 for i in range(30)],
+        }
+        df = pd.DataFrame(data)
+        df.to_parquet(temp_file.name)
+        dataset = hf_dataset_from_disk(temp_file.name)
+        # Then the HF dataset should be created successfully
+        assert isinstance(dataset, Dataset)
+        assert len(dataset) == 30
+        assert dataset.column_names == ["value", "label"]

orca_sdk/_utils/pagination.py ADDED Viewed

@@ -0,0 +1,126 @@
+from __future__ import annotations
+from typing import Callable, Generic, Iterator, TypedDict, TypeVar, cast, overload
+T = TypeVar("T")
+R = TypeVar("R")
+class Page(TypedDict, Generic[T]):
+    items: list[T]
+    count: int
+class _PagedIterable(Generic[T, R]):
+    def __init__(
+        self,
+        fetch: Callable[[int, int], Page[T]],
+        *,
+        transform: Callable[[T], R] | None = None,
+        page_size: int = 100,
+    ) -> None:
+        """
+        Iterate over a paginated endpoint.
+        Parameters:
+            fetch: function to fetch a page from the endpoint `(limit: int, offset: int) -> TypedDict[{items: list[T], count: int}]`
+            transform: Optional function to transforms item types `(item: T) -> R`, defaults to identity
+            limit: maximum number of items to fetch per page
+        """
+        self.fetch = fetch
+        self.transform = transform or (lambda x: cast(R, x))
+        self.page_size = page_size
+        self.offset = 0  # tracks how much has been yielded, not fetched
+        self.page = fetch(self.page_size, self.offset)  # fetch first page to populate count
+        self.count = self.page["count"]
+    def __iter__(self) -> Iterator[R]:
+        if self.offset >= self.count:
+            self.offset = 0
+            if len(self.page["items"]) < self.count:
+                # refetch first page unless we are still on the first page
+                self.page = self.fetch(self.page_size, self.offset)
+        # yield prefetched first page
+        if self.offset == 0:
+            yield from map(self.transform, self.page["items"])
+            self.offset += len(self.page["items"])
+        # yield remaining pages one by one
+        while self.offset < self.count:
+            self.page = self.fetch(self.page_size, self.offset)
+            yield from map(self.transform, self.page["items"])
+            self.offset += len(self.page["items"])
+    @overload
+    def __getitem__(self, key: int) -> R:
+        pass
+    @overload
+    def __getitem__(self, key: slice) -> list[R]:
+        pass
+    def __getitem__(self, key: int | slice) -> R | list[R]:
+        if isinstance(key, int):
+            effective_key = key
+            if effective_key < 0:
+                effective_key += self.count
+            if not 0 <= effective_key < self.count:
+                raise IndexError(f"Index {key} out of range")
+            # if key is on current page, return item
+            if self.offset <= effective_key < self.offset + len(self.page["items"]):
+                return self.transform(self.page["items"][effective_key - self.offset])
+            # otherwise, fetch and return the single item
+            return self.transform(self.fetch(1, effective_key)["items"][0])
+        elif isinstance(key, slice):
+            start, stop, step = key.indices(self.count)
+            if step != 1:
+                raise ValueError("Stepped slicing is not supported")
+            start = start + self.count if start < 0 else start or 0
+            stop = stop + self.count if stop < 0 else stop or self.count
+            if start >= self.count or stop > self.count:
+                raise IndexError(f"Slice {key} out of range")
+            limit = min(self.page_size, stop - start)
+            if limit <= 0:
+                return []
+            items = []
+            for i in range(start, stop, limit):
+                page = self.fetch(limit, i)
+                items.extend(map(self.transform, page["items"]))
+            return items
+    def __len__(self) -> int:
+        return self.count
+# type checking workaround until python 3.13 allows declaring the class as PagedIterable[T, R = T]
+@overload
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: None = None,
+    page_size: int = 100,
+) -> _PagedIterable[T, T]:
+    pass
+@overload
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: Callable[[T], R],
+    page_size: int = 100,
+) -> _PagedIterable[T, R]:
+    pass
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: Callable[[T], R] | None = None,
+    page_size: int = 100,
+) -> _PagedIterable[T, R]:
+    return _PagedIterable(fetch, transform=transform, page_size=page_size)

orca_sdk/_utils/pagination_test.py ADDED Viewed

@@ -0,0 +1,132 @@
+import pytest
+from .pagination import Page, PagedIterable
+class MockEndpoint:
+    """Mock paginated endpoint for testing"""
+    def __init__(self, total_items: int):
+        self.items = list(range(total_items))
+        self.fetch_count = 0
+    def fetch(self, limit: int, offset: int) -> Page[int]:
+        self.fetch_count += 1
+        end_index = min(offset + limit, len(self.items))
+        items = self.items[offset:end_index]
+        return {"items": items, "count": len(self.items)}
+def test_basic_pagination():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When doing a paginated iteration
+    paginated = PagedIterable(endpoint.fetch, page_size=2)
+    # Then we should be able to iterate through all items
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    # And the length should be correct
+    assert len(paginated) == 5
+    # And 3 requests: [0,1], [2,3], [4] should have been made, one for each page
+    assert endpoint.fetch_count == 3
+def test_empty_results():
+    # Given an empty mock endpoint
+    endpoint = MockEndpoint(0)
+    # When doing a paginated iteration
+    paginated = PagedIterable(endpoint.fetch, page_size=5)
+    # Then we should get an empty list
+    assert list(paginated) == []
+    # And the length should be 0
+    assert len(paginated) == 0
+    # And only one request should have been made, for the first page
+    assert endpoint.fetch_count == 1
+def test_transform_function():
+    # Given a mock endpoint with 4 items
+    endpoint = MockEndpoint(4)
+    # And a transform function that doubles the items
+    transform = lambda x: f"2x={2*x}"
+    # When doing a paginated iteration with a transform function
+    paginated = PagedIterable(endpoint.fetch, transform=transform, page_size=2)
+    # Then we should get the transformed items
+    assert list(paginated) == ["2x=0", "2x=2", "2x=4", "2x=6"]
+def test_multiple_iterations():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When we do 2 paginated iterations
+    paginated = PagedIterable(endpoint.fetch, page_size=2)
+    result1 = list(paginated)
+    result2 = list(paginated)
+    # Then we should get the same items twice
+    assert result1 == result2 == [0, 1, 2, 3, 4]
+    # And 6 requests should have been made, 3 for each iteration
+    assert endpoint.fetch_count == 6
+def test_single_page_optimization():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When doing a paginated iteration with a limit that is greater than the number of items
+    paginated = PagedIterable(endpoint.fetch, page_size=10)
+    # Then we should get all items
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    # And the length should be 5
+    assert len(paginated) == 5
+    # And only one request should have been made
+    assert endpoint.fetch_count == 1
+    # And a second iteration should not make any additional requests
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    assert endpoint.fetch_count == 1
+def test_indexing():
+    # Given a mock endpoint with 7 items
+    endpoint = MockEndpoint(7)
+    # When creating a paginated iterable with page size 3
+    paginated = PagedIterable(endpoint.fetch, page_size=3)
+    # Then we should be able to access items by index
+    assert paginated[0] == 0
+    assert paginated[2] == 2
+    assert paginated[6] == 6
+    # And negative indices should work
+    assert paginated[-1] == 6
+    # And accessing out of bounds should raise IndexError
+    with pytest.raises(IndexError):
+        paginated[7]
+    with pytest.raises(IndexError):
+        paginated[-8]
+    # And transforms are applied
+    assert PagedIterable(endpoint.fetch, transform=lambda x: x * 10, page_size=3)[1] == 10
+def test_slicing():
+    # Given a mock endpoint with 10 items
+    endpoint = MockEndpoint(10)
+    # When creating a paginated iterable
+    paginated = PagedIterable(endpoint.fetch, page_size=3)
+    # Then we should be able to slice it
+    assert list(paginated[2:5]) == [2, 3, 4]
+    assert list(paginated[:3]) == [0, 1, 2]
+    assert list(paginated[7:]) == [7, 8, 9]
+    # And negative indices should work
+    assert list(paginated[:-5]) == [0, 1, 2, 3, 4, 5]
+    assert list(paginated[-3:]) == [7, 8, 9]
+    assert list(paginated[-5:-2]) == [5, 6, 7]
+    # And empty slices should work
+    assert list(paginated[5:5]) == []
+    # And slicing with a start and stop that are out of bounds should raise IndexError
+    with pytest.raises(IndexError):
+        list(paginated[20:25])
+    # And slicing with a step other than 1 should raise ValueError
+    with pytest.raises(ValueError):
+        list(paginated[::2])
+    with pytest.raises(ValueError):
+        list(paginated[1:8:3])
+    with pytest.raises(ValueError):
+        list(paginated[::-1])
+    # And transforms are applied
+    assert list(PagedIterable(endpoint.fetch, transform=lambda x: x * 10, page_size=3)[1:3]) == [10, 20]

orca_sdk/_utils/prediction_result_ui.css ADDED Viewed

@@ -0,0 +1,18 @@
+.white {
+  background-color: white;
+}
+.success {
+  color: gray;
+  font-size: 12px;
+  height: 24px;
+}
+.html-container:has(.no-padding) {
+  padding: 0;
+  height: 24px;
+}
+.progress-bar {
+  background-color: #2b9a66;
+}
+.progress-level-inner {
+  display: none;
+}

orca_sdk/_utils/prediction_result_ui.py ADDED Viewed

@@ -0,0 +1,110 @@
+from __future__ import annotations
+import logging
+import re
+from pathlib import Path
+from typing import TYPE_CHECKING
+import gradio as gr
+from ..memoryset import LabeledMemoryLookup, LabeledMemoryset, ScoredMemoryLookup
+if TYPE_CHECKING:
+    from ..telemetry import PredictionBase
+def inspect_prediction_result(prediction_result: PredictionBase):
+    def update_label(val: str, memory: LabeledMemoryLookup, progress=gr.Progress(track_tqdm=True)):
+        progress(0)
+        match = re.search(r".*\((\d+)\)$", val)
+        if match:
+            progress(0.5)
+            new_label = int(match.group(1))
+            memory.update(label=new_label)
+            progress(1)
+            return "&#9989; Changes saved"
+        else:
+            logging.error(f"Invalid label format: {val}")
+    def update_score(val: float, memory: ScoredMemoryLookup, progress=gr.Progress(track_tqdm=True)):
+        progress(0)
+        memory.update(score=val)
+        progress(1)
+        return "&#9989; Changes saved"
+    with gr.Blocks(
+        fill_width=True,
+        title="Prediction Results",
+        css_paths=str(Path(__file__).parent / "prediction_result_ui.css"),
+    ) as prediction_result_ui:
+        gr.Markdown("# Prediction Results")
+        gr.Markdown(f"**Input:** {prediction_result.input_value}")
+        if isinstance(prediction_result.memoryset, LabeledMemoryset) and prediction_result.label is not None:
+            label_names = prediction_result.memoryset.label_names
+            gr.Markdown(f"**Prediction:** {label_names[prediction_result.label]} ({prediction_result.label})")
+        else:
+            gr.Markdown(f"**Prediction:** {prediction_result.score:.2f}")
+        gr.Markdown("### Memory Lookups")
+        with gr.Row(equal_height=True, variant="panel"):
+            with gr.Column(scale=7):
+                gr.Markdown("**Value**")
+            with gr.Column(scale=3, min_width=150):
+                gr.Markdown("**Label**" if prediction_result.label is not None else "**Score**")
+        for i, mem_lookup in enumerate(prediction_result.memory_lookups):
+            with gr.Row(equal_height=True, variant="panel", elem_classes="white" if i % 2 == 0 else None):
+                with gr.Column(scale=7):
+                    gr.Markdown(
+                        (
+                            mem_lookup.value
+                            if isinstance(mem_lookup.value, str)
+                            else "Time series data" if isinstance(mem_lookup.value, list) else "Image data"
+                        ),
+                        label="Value",
+                        height=50,
+                    )
+                with gr.Column(scale=3, min_width=150):
+                    if (
+                        isinstance(prediction_result.memoryset, LabeledMemoryset)
+                        and prediction_result.label is not None
+                        and isinstance(mem_lookup, LabeledMemoryLookup)
+                    ):
+                        label_names = prediction_result.memoryset.label_names
+                        dropdown = gr.Dropdown(
+                            choices=[f"{label_name} ({i})" for i, label_name in enumerate(label_names)],
+                            label="Label",
+                            value=(
+                                f"{label_names[mem_lookup.label]} ({mem_lookup.label})"
+                                if mem_lookup.label is not None
+                                else "None"
+                            ),
+                            interactive=True,
+                            container=False,
+                        )
+                        changes_saved = gr.HTML(lambda: "", elem_classes="success no-padding", every=15)
+                        dropdown.change(
+                            lambda val, mem=mem_lookup: update_label(val, mem),
+                            inputs=[dropdown],
+                            outputs=[changes_saved],
+                            show_progress="full",
+                        )
+                    elif prediction_result.score is not None and isinstance(mem_lookup, ScoredMemoryLookup):
+                        input = gr.Number(
+                            value=mem_lookup.score,
+                            label="Score",
+                            interactive=True,
+                            container=False,
+                        )
+                        changes_saved = gr.HTML(lambda: "", elem_classes="success no-padding", every=15)
+                        input.change(
+                            lambda val, mem=mem_lookup: update_score(val, mem),
+                            inputs=[input],
+                            outputs=[changes_saved],
+                            show_progress="full",
+                        )
+    prediction_result_ui.launch()

orca_sdk/_utils/tqdm_file_reader.py ADDED Viewed

@@ -0,0 +1,12 @@
+class TqdmFileReader:
+    def __init__(self, file_obj, pbar):
+        self.file_obj = file_obj
+        self.pbar = pbar
+    def read(self, size=-1):
+        data = self.file_obj.read(size)
+        self.pbar.update(len(data))
+        return data
+    def __getattr__(self, attr):
+        return getattr(self.file_obj, attr)

orca_sdk/_utils/value_parser.py ADDED Viewed

@@ -0,0 +1,45 @@
+import base64
+import io
+from typing import cast
+import numpy as np
+from numpy.typing import NDArray
+from PIL import Image as pil
+ValueType = str | pil.Image | NDArray[np.float32]
+"""
+The type of a value in a memoryset
+- `str`: string
+- `pil.Image`: image
+- `NDArray[np.float32]`: univariate or multivariate timeseries
+"""
+def decode_value(value: str) -> ValueType:
+    if value.startswith("data:image"):
+        header, data = value.split(",", 1)
+        return pil.open(io.BytesIO(base64.b64decode(data)))
+    if value.startswith("data:numpy"):
+        header, data = value.split(",", 1)
+        return np.load(io.BytesIO(base64.b64decode(data)))
+    return value
+def encode_value(value: ValueType) -> str:
+    if isinstance(value, pil.Image):
+        header = f"data:image/{value.format.lower()};base64," if value.format else "data:image;base64,"
+        buffer = io.BytesIO()
+        value.save(buffer, format=value.format)
+        bytes = buffer.getvalue()
+        return header + base64.b64encode(bytes).decode("utf-8")
+    if isinstance(value, np.ndarray):
+        header = f"data:numpy/{value.dtype.name};base64,"
+        buffer = io.BytesIO()
+        np.save(buffer, value)
+        return header + base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return value