PyPI - orca-sdk - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

orca-sdk 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

orca_sdk/_utils/pagination.py ADDED Viewed

@@ -0,0 +1,126 @@
+from __future__ import annotations
+from typing import Callable, Generic, Iterator, TypedDict, TypeVar, cast, overload
+T = TypeVar("T")
+R = TypeVar("R")
+class Page(TypedDict, Generic[T]):
+    items: list[T]
+    count: int
+class _PagedIterable(Generic[T, R]):
+    def __init__(
+        self,
+        fetch: Callable[[int, int], Page[T]],
+        *,
+        transform: Callable[[T], R] | None = None,
+        page_size: int = 100,
+    ) -> None:
+        """
+        Iterate over a paginated endpoint.
+        Parameters:
+            fetch: function to fetch a page from the endpoint `(limit: int, offset: int) -> TypedDict[{items: list[T], count: int}]`
+            transform: Optional function to transforms item types `(item: T) -> R`, defaults to identity
+            limit: maximum number of items to fetch per page
+        """
+        self.fetch = fetch
+        self.transform = transform or (lambda x: cast(R, x))
+        self.page_size = page_size
+        self.offset = 0  # tracks how much has been yielded, not fetched
+        self.page = fetch(self.page_size, self.offset)  # fetch first page to populate count
+        self.count = self.page["count"]
+    def __iter__(self) -> Iterator[R]:
+        if self.offset >= self.count:
+            self.offset = 0
+            if len(self.page["items"]) < self.count:
+                # refetch first page unless we are still on the first page
+                self.page = self.fetch(self.page_size, self.offset)
+        # yield prefetched first page
+        if self.offset == 0:
+            yield from map(self.transform, self.page["items"])
+            self.offset += len(self.page["items"])
+        # yield remaining pages one by one
+        while self.offset < self.count:
+            self.page = self.fetch(self.page_size, self.offset)
+            yield from map(self.transform, self.page["items"])
+            self.offset += len(self.page["items"])
+    @overload
+    def __getitem__(self, key: int) -> R:
+        pass
+    @overload
+    def __getitem__(self, key: slice) -> list[R]:
+        pass
+    def __getitem__(self, key: int | slice) -> R | list[R]:
+        if isinstance(key, int):
+            effective_key = key
+            if effective_key < 0:
+                effective_key += self.count
+            if not 0 <= effective_key < self.count:
+                raise IndexError(f"Index {key} out of range")
+            # if key is on current page, return item
+            if self.offset <= effective_key < self.offset + len(self.page["items"]):
+                return self.transform(self.page["items"][effective_key - self.offset])
+            # otherwise, fetch and return the single item
+            return self.transform(self.fetch(1, effective_key)["items"][0])
+        elif isinstance(key, slice):
+            start, stop, step = key.indices(self.count)
+            if step != 1:
+                raise ValueError("Stepped slicing is not supported")
+            start = start + self.count if start < 0 else start or 0
+            stop = stop + self.count if stop < 0 else stop or self.count
+            if start >= self.count or stop > self.count:
+                raise IndexError(f"Slice {key} out of range")
+            limit = min(self.page_size, stop - start)
+            if limit <= 0:
+                return []
+            items = []
+            for i in range(start, stop, limit):
+                page = self.fetch(limit, i)
+                items.extend(map(self.transform, page["items"]))
+            return items
+    def __len__(self) -> int:
+        return self.count
+# type checking workaround until python 3.13 allows declaring the class as PagedIterable[T, R = T]
+@overload
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: None = None,
+    page_size: int = 100,
+) -> _PagedIterable[T, T]:
+    pass
+@overload
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: Callable[[T], R],
+    page_size: int = 100,
+) -> _PagedIterable[T, R]:
+    pass
+def PagedIterable(
+    fetch: Callable[[int, int], Page[T]],
+    *,
+    transform: Callable[[T], R] | None = None,
+    page_size: int = 100,
+) -> _PagedIterable[T, R]:
+    return _PagedIterable(fetch, transform=transform, page_size=page_size)

orca_sdk/_utils/pagination_test.py ADDED Viewed

@@ -0,0 +1,132 @@
+import pytest
+from .pagination import Page, PagedIterable
+class MockEndpoint:
+    """Mock paginated endpoint for testing"""
+    def __init__(self, total_items: int):
+        self.items = list(range(total_items))
+        self.fetch_count = 0
+    def fetch(self, limit: int, offset: int) -> Page[int]:
+        self.fetch_count += 1
+        end_index = min(offset + limit, len(self.items))
+        items = self.items[offset:end_index]
+        return {"items": items, "count": len(self.items)}
+def test_basic_pagination():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When doing a paginated iteration
+    paginated = PagedIterable(endpoint.fetch, page_size=2)
+    # Then we should be able to iterate through all items
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    # And the length should be correct
+    assert len(paginated) == 5
+    # And 3 requests: [0,1], [2,3], [4] should have been made, one for each page
+    assert endpoint.fetch_count == 3
+def test_empty_results():
+    # Given an empty mock endpoint
+    endpoint = MockEndpoint(0)
+    # When doing a paginated iteration
+    paginated = PagedIterable(endpoint.fetch, page_size=5)
+    # Then we should get an empty list
+    assert list(paginated) == []
+    # And the length should be 0
+    assert len(paginated) == 0
+    # And only one request should have been made, for the first page
+    assert endpoint.fetch_count == 1
+def test_transform_function():
+    # Given a mock endpoint with 4 items
+    endpoint = MockEndpoint(4)
+    # And a transform function that doubles the items
+    transform = lambda x: f"2x={2*x}"
+    # When doing a paginated iteration with a transform function
+    paginated = PagedIterable(endpoint.fetch, transform=transform, page_size=2)
+    # Then we should get the transformed items
+    assert list(paginated) == ["2x=0", "2x=2", "2x=4", "2x=6"]
+def test_multiple_iterations():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When we do 2 paginated iterations
+    paginated = PagedIterable(endpoint.fetch, page_size=2)
+    result1 = list(paginated)
+    result2 = list(paginated)
+    # Then we should get the same items twice
+    assert result1 == result2 == [0, 1, 2, 3, 4]
+    # And 6 requests should have been made, 3 for each iteration
+    assert endpoint.fetch_count == 6
+def test_single_page_optimization():
+    # Given a mock endpoint with 5 items
+    endpoint = MockEndpoint(5)
+    # When doing a paginated iteration with a limit that is greater than the number of items
+    paginated = PagedIterable(endpoint.fetch, page_size=10)
+    # Then we should get all items
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    # And the length should be 5
+    assert len(paginated) == 5
+    # And only one request should have been made
+    assert endpoint.fetch_count == 1
+    # And a second iteration should not make any additional requests
+    assert list(paginated) == [0, 1, 2, 3, 4]
+    assert endpoint.fetch_count == 1
+def test_indexing():
+    # Given a mock endpoint with 7 items
+    endpoint = MockEndpoint(7)
+    # When creating a paginated iterable with page size 3
+    paginated = PagedIterable(endpoint.fetch, page_size=3)
+    # Then we should be able to access items by index
+    assert paginated[0] == 0
+    assert paginated[2] == 2
+    assert paginated[6] == 6
+    # And negative indices should work
+    assert paginated[-1] == 6
+    # And accessing out of bounds should raise IndexError
+    with pytest.raises(IndexError):
+        paginated[7]
+    with pytest.raises(IndexError):
+        paginated[-8]
+    # And transforms are applied
+    assert PagedIterable(endpoint.fetch, transform=lambda x: x * 10, page_size=3)[1] == 10
+def test_slicing():
+    # Given a mock endpoint with 10 items
+    endpoint = MockEndpoint(10)
+    # When creating a paginated iterable
+    paginated = PagedIterable(endpoint.fetch, page_size=3)
+    # Then we should be able to slice it
+    assert list(paginated[2:5]) == [2, 3, 4]
+    assert list(paginated[:3]) == [0, 1, 2]
+    assert list(paginated[7:]) == [7, 8, 9]
+    # And negative indices should work
+    assert list(paginated[:-5]) == [0, 1, 2, 3, 4, 5]
+    assert list(paginated[-3:]) == [7, 8, 9]
+    assert list(paginated[-5:-2]) == [5, 6, 7]
+    # And empty slices should work
+    assert list(paginated[5:5]) == []
+    # And slicing with a start and stop that are out of bounds should raise IndexError
+    with pytest.raises(IndexError):
+        list(paginated[20:25])
+    # And slicing with a step other than 1 should raise ValueError
+    with pytest.raises(ValueError):
+        list(paginated[::2])
+    with pytest.raises(ValueError):
+        list(paginated[1:8:3])
+    with pytest.raises(ValueError):
+        list(paginated[::-1])
+    # And transforms are applied
+    assert list(PagedIterable(endpoint.fetch, transform=lambda x: x * 10, page_size=3)[1:3]) == [10, 20]

orca_sdk/_utils/prediction_result_ui.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import logging
 import re
 from pathlib import Path
@@ -5,14 +7,13 @@ from typing import TYPE_CHECKING
 import gradio as gr
-from ..memoryset import LabeledMemoryLookup
+from ..memoryset import LabeledMemoryLookup, LabeledMemoryset, ScoredMemoryLookup
 if TYPE_CHECKING:
-    from ..telemetry import LabelPrediction
+    from ..telemetry import PredictionBase
-def inspect_prediction_result(prediction_result: "LabelPrediction"):
-    label_names = prediction_result.memoryset.label_names
+def inspect_prediction_result(prediction_result: PredictionBase):
     def update_label(val: str, memory: LabeledMemoryLookup, progress=gr.Progress(track_tqdm=True)):
         progress(0)
@@ -26,6 +27,12 @@ def inspect_prediction_result(prediction_result: "LabelPrediction"):
         else:
             logging.error(f"Invalid label format: {val}")
+    def update_score(val: float, memory: ScoredMemoryLookup, progress=gr.Progress(track_tqdm=True)):
+        progress(0)
+        memory.update(score=val)
+        progress(1)
+        return "&#9989; Changes saved"
     with gr.Blocks(
         fill_width=True,
         title="Prediction Results",
@@ -33,32 +40,71 @@ def inspect_prediction_result(prediction_result: "LabelPrediction"):
     ) as prediction_result_ui:
         gr.Markdown("# Prediction Results")
         gr.Markdown(f"**Input:** {prediction_result.input_value}")
-        gr.Markdown(f"**Prediction:** {label_names[prediction_result.label]} ({prediction_result.label})")
+        if isinstance(prediction_result.memoryset, LabeledMemoryset) and prediction_result.label is not None:
+            label_names = prediction_result.memoryset.label_names
+            gr.Markdown(f"**Prediction:** {label_names[prediction_result.label]} ({prediction_result.label})")
+        else:
+            gr.Markdown(f"**Prediction:** {prediction_result.score:.2f}")
         gr.Markdown("### Memory Lookups")
         with gr.Row(equal_height=True, variant="panel"):
             with gr.Column(scale=7):
                 gr.Markdown("**Value**")
             with gr.Column(scale=3, min_width=150):
-                gr.Markdown("**Label**")
+                gr.Markdown("**Label**" if prediction_result.label is not None else "**Score**")
         for i, mem_lookup in enumerate(prediction_result.memory_lookups):
             with gr.Row(equal_height=True, variant="panel", elem_classes="white" if i % 2 == 0 else None):
                 with gr.Column(scale=7):
-                    gr.Markdown(mem_lookup.value, label="Value", height=50)
-                with gr.Column(scale=3, min_width=150):
-                    dropdown = gr.Dropdown(
-                        choices=[f"{label_name} ({i})" for i, label_name in enumerate(label_names)],
-                        label="Label",
-                        value=f"{label_names[mem_lookup.label]} ({mem_lookup.label})",
-                        interactive=True,
-                        container=False,
-                    )
-                    changes_saved = gr.HTML(lambda: "", elem_classes="success no-padding", every=15)
-                    dropdown.change(
-                        lambda val, mem_lookup=mem_lookup: update_label(val, mem_lookup),
-                        inputs=[dropdown],
-                        outputs=[changes_saved],
-                        show_progress="full",
+                    gr.Markdown(
+                        (
+                            mem_lookup.value
+                            if isinstance(mem_lookup.value, str)
+                            else "Time series data" if isinstance(mem_lookup.value, list) else "Image data"
+                        ),
+                        label="Value",
+                        height=50,
                     )
+                with gr.Column(scale=3, min_width=150):
+                    if (
+                        isinstance(prediction_result.memoryset, LabeledMemoryset)
+                        and prediction_result.label is not None
+                        and isinstance(mem_lookup, LabeledMemoryLookup)
+                    ):
+                        label_names = prediction_result.memoryset.label_names
+                        dropdown = gr.Dropdown(
+                            choices=[f"{label_name} ({i})" for i, label_name in enumerate(label_names)],
+                            label="Label",
+                            value=(
+                                f"{label_names[mem_lookup.label]} ({mem_lookup.label})"
+                                if mem_lookup.label is not None
+                                else "None"
+                            ),
+                            interactive=True,
+                            container=False,
+                        )
+                        changes_saved = gr.HTML(lambda: "", elem_classes="success no-padding", every=15)
+                        dropdown.change(
+                            lambda val, mem=mem_lookup: update_label(val, mem),
+                            inputs=[dropdown],
+                            outputs=[changes_saved],
+                            show_progress="full",
+                        )
+                    elif prediction_result.score is not None and isinstance(mem_lookup, ScoredMemoryLookup):
+                        input = gr.Number(
+                            value=mem_lookup.score,
+                            label="Score",
+                            interactive=True,
+                            container=False,
+                        )
+                        changes_saved = gr.HTML(lambda: "", elem_classes="success no-padding", every=15)
+                        input.change(
+                            lambda val, mem=mem_lookup: update_score(val, mem),
+                            inputs=[input],
+                            outputs=[changes_saved],
+                            show_progress="full",
+                        )
     prediction_result_ui.launch()

orca_sdk/_utils/tqdm_file_reader.py ADDED Viewed

@@ -0,0 +1,12 @@
+class TqdmFileReader:
+    def __init__(self, file_obj, pbar):
+        self.file_obj = file_obj
+        self.pbar = pbar
+    def read(self, size=-1):
+        data = self.file_obj.read(size)
+        self.pbar.update(len(data))
+        return data
+    def __getattr__(self, attr):
+        return getattr(self.file_obj, attr)

orca_sdk/_utils/value_parser.py ADDED Viewed

@@ -0,0 +1,45 @@
+import base64
+import io
+from typing import cast
+import numpy as np
+from numpy.typing import NDArray
+from PIL import Image as pil
+ValueType = str | pil.Image | NDArray[np.float32]
+"""
+The type of a value in a memoryset
+- `str`: string
+- `pil.Image`: image
+- `NDArray[np.float32]`: univariate or multivariate timeseries
+"""
+def decode_value(value: str) -> ValueType:
+    if value.startswith("data:image"):
+        header, data = value.split(",", 1)
+        return pil.open(io.BytesIO(base64.b64decode(data)))
+    if value.startswith("data:numpy"):
+        header, data = value.split(",", 1)
+        return np.load(io.BytesIO(base64.b64decode(data)))
+    return value
+def encode_value(value: ValueType) -> str:
+    if isinstance(value, pil.Image):
+        header = f"data:image/{value.format.lower()};base64," if value.format else "data:image;base64,"
+        buffer = io.BytesIO()
+        value.save(buffer, format=value.format)
+        bytes = buffer.getvalue()
+        return header + base64.b64encode(bytes).decode("utf-8")
+    if isinstance(value, np.ndarray):
+        header = f"data:numpy/{value.dtype.name};base64,"
+        buffer = io.BytesIO()
+        np.save(buffer, value)
+        return header + base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return value

orca_sdk/_utils/value_parser_test.py ADDED Viewed

@@ -0,0 +1,39 @@
+import numpy as np
+from PIL import Image as pil
+from .value_parser import decode_value, encode_value
+def test_string_parsing():
+    encoded = encode_value("hello world")
+    assert encoded == "hello world"
+    decoded = decode_value(encoded)
+    assert decoded == "hello world"
+def test_image_parsing():
+    img = pil.new("RGB", (10, 10), color="red")
+    img.format = "PNG"
+    encoded = encode_value(img)
+    assert isinstance(encoded, str)
+    assert encoded.startswith("data:image/png;base64,")
+    decoded = decode_value(encoded)
+    assert isinstance(decoded, pil.Image)
+    assert decoded.size == img.size
+def test_timeseries_parsing():
+    timeseries = np.random.rand(20, 3).astype(np.float32)
+    encoded = encode_value(timeseries)
+    assert isinstance(encoded, str)
+    assert encoded.startswith(f"data:numpy/{timeseries.dtype.name};base64,")
+    decoded = decode_value(encoded)
+    assert isinstance(decoded, np.ndarray)
+    assert decoded.shape == timeseries.shape
+    assert decoded.dtype == timeseries.dtype
+    assert np.allclose(decoded, timeseries)

orca-sdk 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

orca-sdk 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl