PyPI - orca-sdk - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

orca-sdk 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

orca_sdk/__init__.py +3 -3
orca_sdk/_utils/analysis_ui.py +4 -1
orca_sdk/_utils/auth.py +2 -3
orca_sdk/_utils/common.py +24 -1
orca_sdk/_utils/prediction_result_ui.py +4 -1
orca_sdk/_utils/torch_parsing.py +77 -0
orca_sdk/_utils/torch_parsing_test.py +142 -0
orca_sdk/_utils/value_parser.py +44 -17
orca_sdk/_utils/value_parser_test.py +6 -5
orca_sdk/async_client.py +234 -22
orca_sdk/classification_model.py +203 -66
orca_sdk/classification_model_test.py +85 -25
orca_sdk/client.py +234 -20
orca_sdk/conftest.py +97 -16
orca_sdk/credentials_test.py +5 -8
orca_sdk/datasource.py +44 -21
orca_sdk/datasource_test.py +8 -2
orca_sdk/embedding_model.py +15 -33
orca_sdk/embedding_model_test.py +30 -1
orca_sdk/memoryset.py +558 -425
orca_sdk/memoryset_test.py +120 -185
orca_sdk/regression_model.py +186 -65
orca_sdk/regression_model_test.py +62 -3
orca_sdk/telemetry.py +16 -7
{orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/METADATA +4 -8
orca_sdk-0.1.12.dist-info/RECORD +38 -0
orca_sdk/_shared/__init__.py +0 -10
orca_sdk/_shared/metrics.py +0 -634
orca_sdk/_shared/metrics_test.py +0 -570
orca_sdk/_utils/data_parsing.py +0 -129
orca_sdk/_utils/data_parsing_test.py +0 -244
orca_sdk-0.1.10.dist-info/RECORD +0 -41
{orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL +0 -0

orca_sdk/_utils/data_parsing_test.py DELETED Viewed

@@ -1,244 +0,0 @@
-import json
-import pickle
-import tempfile
-from collections import namedtuple
-from dataclasses import dataclass
-import pandas as pd
-import pytest
-from datasets import Dataset
-from datasets.exceptions import DatasetGenerationError
-from torch.utils.data import DataLoader as TorchDataLoader
-from torch.utils.data import Dataset as TorchDataset
-from ..conftest import SAMPLE_DATA
-from .data_parsing import hf_dataset_from_disk, hf_dataset_from_torch
-class PytorchDictDataset(TorchDataset):
-    def __init__(self):
-        self.data = SAMPLE_DATA
-    def __getitem__(self, i):
-        return self.data[i]
-    def __len__(self):
-        return len(self.data)
-def test_hf_dataset_from_torch_dict():
-    # Given a Pytorch dataset that returns a dictionary for each item
-    dataset = PytorchDictDataset()
-    hf_dataset = hf_dataset_from_torch(dataset)
-    # Then the HF dataset should be created successfully
-    assert isinstance(hf_dataset, Dataset)
-    assert len(hf_dataset) == len(dataset)
-    assert set(hf_dataset.column_names) == {"value", "label", "key", "score", "source_id", "partition_id"}
-class PytorchTupleDataset(TorchDataset):
-    def __init__(self):
-        self.data = SAMPLE_DATA
-    def __getitem__(self, i):
-        return self.data[i]["value"], self.data[i]["label"]
-    def __len__(self):
-        return len(self.data)
-def test_hf_dataset_from_torch_tuple():
-    # Given a Pytorch dataset that returns a tuple for each item
-    dataset = PytorchTupleDataset()
-    # And the correct number of column names passed in
-    hf_dataset = hf_dataset_from_torch(dataset, column_names=["value", "label"])
-    # Then the HF dataset should be created successfully
-    assert isinstance(hf_dataset, Dataset)
-    assert len(hf_dataset) == len(dataset)
-    assert hf_dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_torch_tuple_error():
-    # Given a Pytorch dataset that returns a tuple for each item
-    dataset = PytorchTupleDataset()
-    # Then the HF dataset should raise an error if no column names are passed in
-    with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset)
-def test_hf_dataset_from_torch_tuple_error_not_enough_columns():
-    # Given a Pytorch dataset that returns a tuple for each item
-    dataset = PytorchTupleDataset()
-    # Then the HF dataset should raise an error if not enough column names are passed in
-    with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset, column_names=["value"])
-DatasetTuple = namedtuple("DatasetTuple", ["value", "label"])
-class PytorchNamedTupleDataset(TorchDataset):
-    def __init__(self):
-        self.data = SAMPLE_DATA
-    def __getitem__(self, i):
-        return DatasetTuple(self.data[i]["value"], self.data[i]["label"])
-    def __len__(self):
-        return len(self.data)
-def test_hf_dataset_from_torch_named_tuple():
-    # Given a Pytorch dataset that returns a namedtuple for each item
-    dataset = PytorchNamedTupleDataset()
-    # And no column names are passed in
-    hf_dataset = hf_dataset_from_torch(dataset)
-    # Then the HF dataset should be created successfully
-    assert isinstance(hf_dataset, Dataset)
-    assert len(hf_dataset) == len(dataset)
-    assert hf_dataset.column_names == ["value", "label"]
-@dataclass
-class DatasetItem:
-    text: str
-    label: int
-class PytorchDataclassDataset(TorchDataset):
-    def __init__(self):
-        self.data = SAMPLE_DATA
-    def __getitem__(self, i):
-        return DatasetItem(text=self.data[i]["value"], label=self.data[i]["label"])
-    def __len__(self):
-        return len(self.data)
-def test_hf_dataset_from_torch_dataclass():
-    # Given a Pytorch dataset that returns a dataclass for each item
-    dataset = PytorchDataclassDataset()
-    hf_dataset = hf_dataset_from_torch(dataset)
-    # Then the HF dataset should be created successfully
-    assert isinstance(hf_dataset, Dataset)
-    assert len(hf_dataset) == len(dataset)
-    assert hf_dataset.column_names == ["text", "label"]
-class PytorchInvalidDataset(TorchDataset):
-    def __init__(self):
-        self.data = SAMPLE_DATA
-    def __getitem__(self, i):
-        return [self.data[i]["value"], self.data[i]["label"]]
-    def __len__(self):
-        return len(self.data)
-def test_hf_dataset_from_torch_invalid_dataset():
-    # Given a Pytorch dataset that returns a list for each item
-    dataset = PytorchInvalidDataset()
-    # Then the HF dataset should raise an error
-    with pytest.raises(DatasetGenerationError):
-        hf_dataset_from_torch(dataset)
-def test_hf_dataset_from_torchdataloader():
-    # Given a Pytorch dataloader that returns a column-oriented batch of items
-    dataset = PytorchDictDataset()
-    def collate_fn(x: list[dict]):
-        return {"value": [item["value"] for item in x], "label": [item["label"] for item in x]}
-    dataloader = TorchDataLoader(dataset, batch_size=3, collate_fn=collate_fn)
-    hf_dataset = hf_dataset_from_torch(dataloader)
-    # Then the HF dataset should be created successfully
-    assert isinstance(hf_dataset, Dataset)
-    assert len(hf_dataset) == len(dataset)
-    assert hf_dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_pickle_list():
-    with tempfile.NamedTemporaryFile(suffix=".pkl") as temp_file:
-        # Given a pickle file with test data that is a list
-        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
-        with open(temp_file.name, "wb") as f:
-            pickle.dump(test_data, f)
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_pickle_dict():
-    with tempfile.NamedTemporaryFile(suffix=".pkl") as temp_file:
-        # Given a pickle file with test data that is a dict
-        test_data = {"value": [f"test_{i}" for i in range(30)], "label": [i % 2 for i in range(30)]}
-        with open(temp_file.name, "wb") as f:
-            pickle.dump(test_data, f)
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_json():
-    with tempfile.NamedTemporaryFile(suffix=".json") as temp_file:
-        # Given a JSON file with test data
-        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
-        with open(temp_file.name, "w") as f:
-            json.dump(test_data, f)
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_jsonl():
-    with tempfile.NamedTemporaryFile(suffix=".jsonl") as temp_file:
-        # Given a JSONL file with test data
-        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
-        with open(temp_file.name, "w") as f:
-            for item in test_data:
-                f.write(json.dumps(item) + "\n")
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_csv():
-    with tempfile.NamedTemporaryFile(suffix=".csv") as temp_file:
-        # Given a CSV file with test data
-        test_data = [{"value": f"test_{i}", "label": i % 2} for i in range(30)]
-        with open(temp_file.name, "w") as f:
-            f.write("value,label\n")
-            for item in test_data:
-                f.write(f"{item['value']},{item['label']}\n")
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]
-def test_hf_dataset_from_disk_parquet():
-    with tempfile.NamedTemporaryFile(suffix=".parquet") as temp_file:
-        # Given a Parquet file with test data
-        data = {
-            "value": [f"test_{i}" for i in range(30)],
-            "label": [i % 2 for i in range(30)],
-        }
-        df = pd.DataFrame(data)
-        df.to_parquet(temp_file.name)
-        dataset = hf_dataset_from_disk(temp_file.name)
-        # Then the HF dataset should be created successfully
-        assert isinstance(dataset, Dataset)
-        assert len(dataset) == 30
-        assert dataset.column_names == ["value", "label"]

orca_sdk-0.1.10.dist-info/RECORD DELETED Viewed

@@ -1,41 +0,0 @@
-orca_sdk/__init__.py,sha256=xyjNwkLQXaX8A-UYgGwYDjv2btOXArT_yiMTfmW7KA8,1003
-orca_sdk/_shared/__init__.py,sha256=3Kt0Hu3QLI5FEp9nqGTxqAm3hAoBJKcagfaGQZ-lbJQ,223
-orca_sdk/_shared/metrics.py,sha256=faeL1B1ftmns1ikfKrIlU3xOn6j0iAGLNUupxvAFza8,24968
-orca_sdk/_shared/metrics_test.py,sha256=vDIXoj8EuuLcdPJz_7EiVPgQ-FXiVT81JG30jxsg9HM,20752
-orca_sdk/_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-orca_sdk/_utils/analysis_ui.py,sha256=R0xc4RyJKyBHJEEF_ztI4Dm5w8Y1uF0Wpkn4LQgXqBE,9258
-orca_sdk/_utils/analysis_ui_style.css,sha256=q_ba_-_KtgztepHg829zLzypaxKayl7ySC1-oYDzV3k,836
-orca_sdk/_utils/auth.py,sha256=nC252O171_3_wn4KBAN7kg8GNvoZFiQ5Xtzkrm5dWDo,2645
-orca_sdk/_utils/auth_test.py,sha256=ygVWv1Ex53LaxIP7p2hzPHl8l9qYyBD5IGmEFJMps6s,1056
-orca_sdk/_utils/common.py,sha256=wUm2pNDWytEecC5WiDWd02-yCZw3Akx0bIutG4lHsFA,805
-orca_sdk/_utils/data_parsing.py,sha256=5vaTpvUOS-ldlcgnSARYw7s9mce-imzkU7kA48-pdIM,5396
-orca_sdk/_utils/data_parsing_test.py,sha256=u7BEjxtsU9gMs3tAZI0lJ--vOLlwKwH3hemdCedzxA0,8826
-orca_sdk/_utils/pagination.py,sha256=986z0QPZixrZeurJWorF6eMgnTRdDF84AagEA6qNbMw,4245
-orca_sdk/_utils/pagination_test.py,sha256=BUylCrcHnwoKEBmMUzVr0lwLpA35ivcCwdBK4rMw9y8,4887
-orca_sdk/_utils/prediction_result_ui.css,sha256=sqBlkRLnovb5X5EcUDdB6iGpH63nVRlTW4uAmXuD0WM,258
-orca_sdk/_utils/prediction_result_ui.py,sha256=Ur_FY7dz3oWNmtPiP3Wl3yRlEMgK8q9UfT-SDu9UPxA,4805
-orca_sdk/_utils/tqdm_file_reader.py,sha256=Lw7Cg1UgNuRUoN6jjqZb-IlV00H-kbRcrZLdudr1GxE,324
-orca_sdk/_utils/value_parser.py,sha256=c3qMABCCDQcIjn9N1orYYnlRwDW9JWdGwW_2TDZPLdI,1286
-orca_sdk/_utils/value_parser_test.py,sha256=OybsiC-Obi32RRi9NIuwrVBRAnlyPMV1xVAaevSrb7M,1079
-orca_sdk/async_client.py,sha256=PM7N-ggmtucfcUF1vQGtTZOCJpSNTOgd7l3LDNF5kP4,137192
-orca_sdk/classification_model.py,sha256=C58euWnNvwXnthR9RtVVCOcgPEbxCjjp3sHMb86V6YA,50197
-orca_sdk/classification_model_test.py,sha256=ElqxtR6gNwwk8dNXwfwAhpT7l0ZIP3H4pHmOyFXyTWk,37370
-orca_sdk/client.py,sha256=SKZv3zGG6OwLe_FlB5wL2cxltOLPCcHvoo2CbMwyKgA,136241
-orca_sdk/conftest.py,sha256=0O1VY-SPKNAvi9fBLdY1RMnYVgZvMjP92y99bNAqqiw,12461
-orca_sdk/credentials.py,sha256=2SwC3tq5akP-F_u2s4xMZDp8mlsKMUT1T5T9Z99-eSY,6588
-orca_sdk/credentials_test.py,sha256=TLbXJMz3IlThvtSrHeLM7jRsKnrncA_ahOTpHg15Ei4,4089
-orca_sdk/datasource.py,sha256=Qn5QloE84UXeyPk2wcy1lWe5wmh1iDBS044eWnxck_E,22371
-orca_sdk/datasource_test.py,sha256=sCk3IcQJbDut5oN4Wf7PXhTxyMwalxMuCXJekSxy9wk,16665
-orca_sdk/embedding_model.py,sha256=vLGnlO9I-cN1lklNBl_LxZ8m9oK3vkegFOpvYYw8u8g,28038
-orca_sdk/embedding_model_test.py,sha256=Lc6fZ0ifT0hh6ldkUfjwMPcP6OgN0Umlzu8XDLs7UO4,8144
-orca_sdk/job.py,sha256=wHwVt-s7i-v8udhLGybB-90Kp4dwOLrY806bE4Tam5Q,13092
-orca_sdk/job_test.py,sha256=nRSWxd_1UIfrj9oMVvrXjt6OBkBpddYAjb2y6P-DTUg,4327
-orca_sdk/memoryset.py,sha256=06v34fHabpkEaOv9VCKc0NhpMi_mNZGbQP_9GiW_nuE,157157
-orca_sdk/memoryset_test.py,sha256=O2o42XETtffXtZy0kbLk2b8cUDXU-w2ZAzXLi5-vDPQ,51278
-orca_sdk/regression_model.py,sha256=AXRzJG15sDJQSiDCDfRdcLnZDNkJWORYjhHqKyyL-Fc,33960
-orca_sdk/regression_model_test.py,sha256=90EyrhaMk1kTf87RFkMNz1PTItmeUISs6AvHmyp08DU,25447
-orca_sdk/telemetry.py,sha256=ZyCMiyyo_SchjadWZH55TlLrC4Ucq5S316NbW26LL4Y,27834
-orca_sdk/telemetry_test.py,sha256=eT66C5lFdNg-pQdo2I__BP7Tn5fTc9aTkVo9ZhWwhU0,5519
-orca_sdk-0.1.10.dist-info/METADATA,sha256=j_TIalbL8oztP39lnXjyAI6Aosvb6rnJKUc3gcuxD0k,3710
-orca_sdk-0.1.10.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-orca_sdk-0.1.10.dist-info/RECORD,,

{orca_sdk-0.1.10.dist-info → orca_sdk-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

orca-sdk 0.1.10__py3-none-any.whl → 0.1.12__py3-none-any.whl

orca-sdk 0.1.10py3-none-any.whl → 0.1.12py3-none-any.whl