PyPI - rapidata - Versions diffs - 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl - Mend

rapidata 2.37.0py3-none-any.whl → 2.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (117) hide show

rapidata/rapidata_client/order/rapidata_results.py CHANGED Viewed

@@ -2,56 +2,63 @@ import pandas as pd
 from typing import Any
 from pandas.core.indexes.base import Index
 import json
-from rapidata.rapidata_client.logging import managed_print
+from rapidata.rapidata_client.config import managed_print
 class RapidataResults(dict):
     """
     A specialized dictionary class for handling Rapidata API results.
     Extends the built-in dict class with specialized methods.
     """
     def to_pandas(self, split_details: bool = False) -> pd.DataFrame:
         """
         Warning:
             This method is currently under development. The structure of the results may change in the future.
         Converts the results to a pandas DataFrame.
         For Compare results, creates standardized A/B columns for metrics.
         For regular results, flattens nested dictionaries into columns with underscore-separated names.
         Args:
             split_details: If True, splits each datapoint by its detailed results,
                           creating a row for each response with global metrics copied.
         Returns:
             pd.DataFrame: A DataFrame containing the processed results
         Raises:
             ValueError: If split_details is True but no detailed results are found
         """
         if "results" not in self or not self["results"]:
             return pd.DataFrame()
         if self["info"].get("orderType") is None:
-            managed_print("Warning: Results are old and Order type is not specified. Dataframe might be wrong.")
+            managed_print(
+                "Warning: Results are old and Order type is not specified. Dataframe might be wrong."
+            )
         # Check for detailed results if split_details is True
         if split_details:
             if not self._has_detailed_results():
                 raise ValueError("No detailed results found in the data")
             return self._to_pandas_with_detailed_results()
-        if self["info"].get("orderType") == "Compare" or self["info"].get("orderType") == "Ranking":
+        if (
+            self["info"].get("orderType") == "Compare"
+            or self["info"].get("orderType") == "Ranking"
+        ):
             return self._compare_to_pandas()
         # Get the structure from first item
         first_item = self["results"][0]
         columns = []
         path_map = {}  # Maps flattened column names to paths to reach the values
         # Build the column structure once
         self._build_column_structure(first_item, columns, path_map)
         # Extract data using the known structure
         data = []
         for item in self["results"]:
@@ -60,82 +67,84 @@ class RapidataResults(dict):
                 value = self._get_value_from_path(item, path)
                 row.append(value)
             data.append(row)
         return pd.DataFrame(data, columns=Index(columns))
     def _has_detailed_results(self) -> bool:
         """
         Checks if the results contain detailed results.
         Returns:
             bool: True if detailed results exist, False otherwise
         """
         if not self.get("results"):
             return False
         first_result = self["results"][0]
-        return "detailedResults" in first_result and isinstance(first_result["detailedResults"], list)
+        return "detailedResults" in first_result and isinstance(
+            first_result["detailedResults"], list
+        )
     def _to_pandas_with_detailed_results(self) -> pd.DataFrame:
         """
         Converts results to a pandas DataFrame with detailed results split into separate rows.
         Returns:
             pd.DataFrame: A DataFrame with one row per detailed result
         """
         rows = []
         for result in self["results"]:
             # Get all non-detailed results fields
             base_data = {k: v for k, v in result.items() if k != "detailedResults"}
             # Process each detailed result
             for detailed_result in result["detailedResults"]:
                 row = base_data.copy()  # Copy base data for each detailed result
                 # Add flattened detailed result data
                 flattened = self._flatten_dict(detailed_result)
                 for key, value in flattened.items():
                     row[key] = value
                 rows.append(row)
         return pd.DataFrame(rows)
-    def _flatten_dict(self, d: dict[str, Any], parent_key: str = '') -> dict[str, Any]:
+    def _flatten_dict(self, d: dict[str, Any], parent_key: str = "") -> dict[str, Any]:
         """
         Flattens a nested dictionary into a single-level dictionary with underscore-separated keys.
         Args:
             d: The dictionary to flatten
             parent_key: The parent key for nested dictionaries
         Returns:
             dict: A flattened dictionary
         """
         items: list[tuple[str, Any]] = []
         for key, value in d.items():
             new_key = f"{parent_key}_{key}" if parent_key else key
             if isinstance(value, dict):
                 items.extend(self._flatten_dict(value, new_key).items())
             else:
                 items.append((new_key, value))
         return dict(items)
     def _build_column_structure(
-        self,
-        d: dict[str, Any],
-        columns: list[str],
-        path_map: dict[str, list[str]],
-        parent_key: str = '',
-        current_path: list[str] | None = None
+        self,
+        d: dict[str, Any],
+        columns: list[str],
+        path_map: dict[str, list[str]],
+        parent_key: str = "",
+        current_path: list[str] | None = None,
     ) -> None:
         """
         Builds the column structure and paths to reach values in nested dictionaries.
         Args:
             d: The dictionary to analyze
             columns: List to store column names
@@ -145,25 +154,27 @@ class RapidataResults(dict):
         """
         if current_path is None:
             current_path = []
         for key, value in d.items():
             new_key = f"{parent_key}_{key}" if parent_key else key
             new_path: list[str] = current_path + [key]
             if isinstance(value, dict):
-                self._build_column_structure(value, columns, path_map, new_key, new_path)
+                self._build_column_structure(
+                    value, columns, path_map, new_key, new_path
+                )
             else:
                 columns.append(new_key)
                 path_map[new_key] = new_path
     def _get_value_from_path(self, d: dict[str, Any], path: list[str]) -> Any:
         """
         Retrieves a value from a nested dictionary using a path list.
         Args:
             d: The dictionary to retrieve the value from
             path: List of keys forming the path to the desired value
         Returns:
             The value at the specified path, or None if the path doesn't exist
         """
@@ -190,10 +201,11 @@ class RapidataResults(dict):
                 continue
             assets = [asset for asset in assets if asset not in ["Both", "Neither"]]
             # Initialize row with non-comparative fields
             row = {
-                key: value for key, value in result.items()
+                key: value
+                for key, value in result.items()
                 if not isinstance(value, dict)
             }
             row["assetA"] = assets[0]
@@ -203,26 +215,28 @@ class RapidataResults(dict):
             for key, values in result.items():
                 if isinstance(values, dict) and len(values) >= 2:
                     # Add main asset columns
-                    for i, asset in enumerate(assets[:2]):  # Limit to first 2 main assets
+                    for i, asset in enumerate(
+                        assets[:2]
+                    ):  # Limit to first 2 main assets
                         column_prefix = "A_" if i == 0 else "B_"
-                        row[f'{column_prefix}{key}'] = values.get(asset, 0)
+                        row[f"{column_prefix}{key}"] = values.get(asset, 0)
                     # Add special option columns if they exist
                     if "Both" in values:
-                        row[f'Both_{key}'] = values.get("Both", 0)
+                        row[f"Both_{key}"] = values.get("Both", 0)
                     if "Neither" in values:
-                        row[f'Neither_{key}'] = values.get("Neither", 0)
+                        row[f"Neither_{key}"] = values.get("Neither", 0)
             rows.append(row)
         return pd.DataFrame(rows)
-    def to_json(self, path: str="./results.json") -> None:
+    def to_json(self, path: str = "./results.json") -> None:
         """
         Saves the results to a JSON file.
         Args:
             path: The file path where the JSON should be saved. Defaults to "./results.json".
         """
-        with open(path, 'w') as f:
+        with open(path, "w") as f:
             json.dump(self, f)

rapidata/rapidata_client/rapidata_client.py CHANGED Viewed

@@ -15,8 +15,12 @@ from rapidata.rapidata_client.validation.validation_set_manager import (
 from rapidata.rapidata_client.demographic.demographic_manager import DemographicManager
-from rapidata.rapidata_client.logging import logger, managed_print
-from rapidata.rapidata_client.config.rapidata_config import rapidata_config
+from rapidata.rapidata_client.config import (
+    logger,
+    tracer,
+    managed_print,
+    rapidata_config,
+)
 class RapidataClient:
@@ -48,31 +52,36 @@ class RapidataClient:
             order (RapidataOrderManager): The RapidataOrderManager instance.
             validation (ValidationSetManager): The ValidationSetManager instance.
         """
-        logger.debug("Checking version")
-        self._check_version()
-        logger.debug("Initializing OpenAPIService")
-        self._openapi_service = OpenAPIService(
-            client_id=client_id,
-            client_secret=client_secret,
-            environment=environment,
-            oauth_scope=oauth_scope,
-            cert_path=cert_path,
-            token=token,
-            leeway=leeway,
-        )
+        with tracer.start_as_current_span("RapidataClient.__init__"):
+            logger.debug("Checking version")
+            self._check_version()
+            logger.debug("Initializing OpenAPIService")
+            self._openapi_service = OpenAPIService(
+                client_id=client_id,
+                client_secret=client_secret,
+                environment=environment,
+                oauth_scope=oauth_scope,
+                cert_path=cert_path,
+                token=token,
+                leeway=leeway,
+            )
-        logger.debug("Initializing RapidataOrderManager")
-        self.order = RapidataOrderManager(openapi_service=self._openapi_service)
+            logger.debug("Initializing RapidataOrderManager")
+            self.order = RapidataOrderManager(openapi_service=self._openapi_service)
-        logger.debug("Initializing ValidationSetManager")
-        self.validation = ValidationSetManager(openapi_service=self._openapi_service)
+            logger.debug("Initializing ValidationSetManager")
+            self.validation = ValidationSetManager(
+                openapi_service=self._openapi_service
+            )
-        logger.debug("Initializing DemographicManager")
-        self._demographic = DemographicManager(openapi_service=self._openapi_service)
+            logger.debug("Initializing DemographicManager")
+            self._demographic = DemographicManager(
+                openapi_service=self._openapi_service
+            )
-        logger.debug("Initializing RapidataBenchmarkManager")
-        self.mri = RapidataBenchmarkManager(openapi_service=self._openapi_service)
+            logger.debug("Initializing RapidataBenchmarkManager")
+            self.mri = RapidataBenchmarkManager(openapi_service=self._openapi_service)
     def reset_credentials(self):
         """Reset the credentials saved in the configuration file for the current environment."""
@@ -97,5 +106,9 @@ class RapidataClient:
                         f"""A new version of the Rapidata SDK is available: {latest_version}
 Your current version is: {__version__}"""
                     )
+                else:
+                    logger.debug(
+                        "Current version is up to date. Version: %s", __version__
+                    )
         except Exception as e:
             logger.debug("Failed to check for updates: %s", e)

rapidata/rapidata_client/referee/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from ._base_referee import Referee
-from ._naive_referee import NaiveReferee #as MaxVoteReferee
+from ._naive_referee import NaiveReferee  # as MaxVoteReferee
 from ._early_stopping_referee import EarlyStoppingReferee

rapidata/rapidata_client/referee/_base_referee.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from abc import ABC, abstractmethod
 from typing import Any, Mapping
 class Referee(ABC):
     """
-    The referee defines when a rapid is considered complete.
+    The referee defines when a rapid is considered complete.
     """
     @abstractmethod
     def _to_dict(self) -> Mapping[str, str | int | float]:
         """

rapidata/rapidata_client/referee/_early_stopping_referee.py CHANGED Viewed

@@ -15,7 +15,7 @@ class EarlyStoppingReferee(Referee):
     The threshold behaves logarithmically, meaning small increments (e.g., from 0.99
     to 0.999) can significantly impact the stopping criteria.
-    This referee is supported for the classification and compare tasks (in compare,
+    This referee is supported for the classification and compare tasks (in compare,
     the two options are treated as the categories).
     Args:
@@ -34,7 +34,7 @@ class EarlyStoppingReferee(Referee):
             raise ValueError("The threshold must be between 0 and 1.")
         if max_vote_count < 1:
             raise ValueError("The number of responses must be greater than 0.")
         self.threshold = threshold
         self.max_vote_count = max_vote_count

rapidata/rapidata_client/selection/_base_selection.py CHANGED Viewed

@@ -7,3 +7,9 @@ class RapidataSelection:
     @abstractmethod
     def _to_model(self) -> Any:
         pass
+    def __str__(self) -> str:
+        return f"{self.__class__.__name__}()"
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}()"

rapidata/rapidata_client/selection/ab_test_selection.py CHANGED Viewed

@@ -10,17 +10,21 @@ from typing import Sequence
 class AbTestSelection(RapidataSelection):
     """AbTestSelection Class
     Splits the userbase into two segments and serves them a different collection of rapids.
     Useful for A/B Test.
     Args:
         a_selections (Sequence[RapidataSelection]): List of selections for group A.
         b_selections (Sequence[RapidataSelection]): List of selections for group B.
     """
-    def __init__(self, a_selections: Sequence[RapidataSelection], b_selections: Sequence[RapidataSelection]):
+    def __init__(
+        self,
+        a_selections: Sequence[RapidataSelection],
+        b_selections: Sequence[RapidataSelection],
+    ):
         self.a_selections = a_selections
         self.b_selections = b_selections

rapidata/rapidata_client/selection/capped_selection.py CHANGED Viewed

@@ -12,9 +12,9 @@ class CappedSelection(RapidataSelection):
     """CappedSelection Class
     Takes in different selections and caps the amount of rapids that can be shown.
     Useful for demographic and conditional validation selections.
     Args:
         selections (Sequence[RapidataSelection]): List of selections to cap.
         max_rapids (int): The maximum amount of rapids that can be shown for this selection.

rapidata/rapidata_client/selection/conditional_validation_selection.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from rapidata.rapidata_client.config.logger import logger
 from rapidata.rapidata_client.selection._base_selection import RapidataSelection
 from rapidata.api_client.models.conditional_validation_rapid_selection_config import (
     ValidationChance,
@@ -12,13 +13,13 @@ class ConditionalValidationSelection(RapidataSelection):
     """Conditional validation selection class.
     Probabilistically decides how many validation rapids you want to show per session based on the user score.
     Args:
         validation_set_id (str): The id of the validation set to be used.
         thresholds (list[float]): The thresholds to use for the user score.
         chances (list[float]): The chances of showing a validation rapid for each threshold.
         rapid_counts (list[int]): The amount of validation rapids that will be shown per session of this validation set for each threshold if selected by probability. (all or nothing)
-        dimension (Optional[str], optional): The dimension of the userScore that will be used in the thresholds. Defaults to None.
+        dimensions (Optional[list[str]], optional): The dimensions of the userScore that will be used in the thresholds. Defaults to None.
     Example:
         ```python
@@ -29,7 +30,7 @@ class ConditionalValidationSelection(RapidataSelection):
             rapid_counts=[1, 1]
         )
         ```
-        This means that there's a 100% chance of showing a validation rapid if the user score is between 0 and 0.7,
+        This means that there's a 100% chance of showing a validation rapid if the user score is between 0 and 0.7,
         and a 20% chance of showing a validation rapid if the user score is between 0.7 and 1.
     """
@@ -40,17 +41,22 @@ class ConditionalValidationSelection(RapidataSelection):
         chances: list[float],
         rapid_counts: list[int],
         dimension: Optional[str] = None,
+        dimensions: Optional[list[str]] = None,
     ):
         if len(thresholds) != len(chances) or len(thresholds) != len(rapid_counts):
             raise ValueError(
                 "The lengths of thresholds, chances and rapid_counts must be equal."
             )
+        if dimension:
+            logger.warning("dimension is deprecated, use dimensions instead")
+            dimensions = (dimensions or []) + [dimension]
         self.validation_set_id = validation_set_id
         self.thresholds = thresholds
         self.chances = chances
         self.rapid_counts = rapid_counts
-        self.dimension = dimension
+        self.dimensions = dimensions
     def _to_model(self):
         return ConditionalValidationSelectionModel(
@@ -64,5 +70,5 @@ class ConditionalValidationSelection(RapidataSelection):
                     self.thresholds, self.chances, self.rapid_counts
                 )
             ],
-            dimension=self.dimension,
+            dimensions=self.dimensions,
         )

rapidata/rapidata_client/selection/demographic_selection.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from rapidata.rapidata_client.selection._base_selection import RapidataSelection
-from rapidata.api_client.models.demographic_selection import DemographicSelection as DemographicSelectionModel
+from rapidata.api_client.models.demographic_selection import (
+    DemographicSelection as DemographicSelectionModel,
+)
 class DemographicSelection(RapidataSelection):
     """Demographic selection class.
-    This is used to ask demographic questions in an order.
+    This is used to ask demographic questions in an order.
     The keys will select the rapids based on the confidence we already saved for each user.
@@ -15,7 +17,7 @@ class DemographicSelection(RapidataSelection):
         keys (list[str]): List of keys for the demographic rapids to be shown. As an example: "age"
         max_rapids (int): The maximum number of rapids to run.\n
             Allows to provide more keys, in case some of the earlier ones are not selected because of high confidence.
     Example:
         ```python
         DemographicSelection(["age", "gender"], 1)
@@ -23,11 +25,12 @@ class DemographicSelection(RapidataSelection):
         This will try to ask the user about their age, if that is not selected due to an already high confidence, it will try asking about their gender.
         The gender question may also be skipped if the confidence is high enough.
     """
     def __init__(self, keys: list[str], max_rapids: int):
         self.keys = keys
         self.max_rapids = max_rapids
     def _to_model(self):
-        return DemographicSelectionModel(_t="DemographicSelection", keys=self.keys, maxRapids=self.max_rapids)
+        return DemographicSelectionModel(
+            _t="DemographicSelection", keys=self.keys, maxRapids=self.max_rapids
+        )

rapidata/rapidata_client/selection/rapidata_selections.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from rapidata.rapidata_client.selection import (
-    DemographicSelection,
-    LabelingSelection,
-    ValidationSelection,
-    ConditionalValidationSelection,
+    DemographicSelection,
+    LabelingSelection,
+    ValidationSelection,
+    ConditionalValidationSelection,
     CappedSelection,
-    ShufflingSelection)
+    ShufflingSelection,
+)
 class RapidataSelections:
     """RapidataSelections Classes
-    Selections are used to define what type of tasks and in what order they are shown to the user.
+    Selections are used to define what type of tasks and in what order they are shown to the user.
     All selections combined are called a "Session". A session can contain multiple tasks of different types of tasks.
     As an example, a session might be 1 validation task, 2 labeling tasks.
@@ -20,16 +22,17 @@ class RapidataSelections:
         demographic (DemographicSelection): Decides if and how many demographic questions you want to show per session.
         capped (CappedSelection): Takes in different selections and caps the amount of rapids that can be shown.
         shuffling (ShufflingSelection): Shuffles the selections provided in the list.
     Example:
         ```python
         from rapidata import LabelingSelection, ValidationSelection
-        selections=[ValidationSelection("your-validation-set-id", 1),
+        selections=[ValidationSelection("your-validation-set-id", 1),
                     LabelingSelection(2)]
         ```
         This will require annotators to complete one validation task followed by two labeling tasks.
     """
     labeling = LabelingSelection
     validation = ValidationSelection
     conditional_validation = ConditionalValidationSelection

rapidata/rapidata_client/selection/shuffling_selection.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from rapidata.api_client.models.ab_test_selection_a_inner import AbTestSelectionAInner
-from rapidata.api_client.models.shuffling_selection import ShufflingSelection as ShufflingSelectionModel
+from rapidata.api_client.models.shuffling_selection import (
+    ShufflingSelection as ShufflingSelectionModel,
+)
 from rapidata.rapidata_client.selection._base_selection import RapidataSelection
 from typing import Sequence
@@ -10,7 +11,7 @@ class ShufflingSelection(RapidataSelection):
     """ShufflingSelection Class
     Shuffles the selections provided in the list.
     Args:
         selections (Sequence[RapidataSelection]): List of selections to shuffle.
@@ -31,6 +32,5 @@ class ShufflingSelection(RapidataSelection):
             selections=[
                 AbTestSelectionAInner(selection._to_model())
                 for selection in self.selections
-            ]
+            ],
         )

rapidata/rapidata_client/selection/static_selection.py CHANGED Viewed

@@ -1,22 +1,17 @@
-from rapidata.api_client.models.static_selection import StaticSelection as StaticSelectionModel
+from rapidata.api_client.models.static_selection import (
+    StaticSelection as StaticSelectionModel,
+)
 from rapidata.rapidata_client.selection._base_selection import RapidataSelection
 class StaticSelection(RapidataSelection):
     """StaticSelection Class
     Given a list of RapidIds, theses specific rapids will be shown in order for every session.
     Args:
         rapid_ids (list[str]): List of rapid ids to show.
     """
     def __init__(self, rapid_ids: list[str]):
         self.rapid_ids = rapid_ids
-    def _to_model(self) -> StaticSelectionModel:
-        return StaticSelectionModel(
-            _t="StaticSelection",
-            rapidIds=self.rapid_ids
-        )

rapidata 2.37.0__py3-none-any.whl → 2.39.0__py3-none-any.whl

Potentially problematic release.

rapidata 2.37.0py3-none-any.whl → 2.39.0py3-none-any.whl