PyPI - aiverify-moonshot - Versions diffs - 0.5.1__tar.gz → 0.6.1__tar.gz - Mend

aiverify-moonshot 0.5.1tar.gz → 0.6.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (209) hide show

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/integration-test.yaml RENAMED Viewed

@@ -36,7 +36,7 @@ jobs:
   integration-test:
     runs-on: ubuntu-latest
-    timeout-minutes: 120
+    timeout-minutes: 300
     steps:
@@ -53,7 +53,7 @@ jobs:
       with:
           repository: aiverify-foundation/moonshot
           ref: ${{ inputs.moonshot_branch }}
     - name: Setup Python 3.11
       uses: actions/setup-python@v4
       with:
@@ -110,24 +110,24 @@ jobs:
       run: |
         source venv/bin/activate
         pip install nltk
-        python -c "import nltk; nltk.download('stopwords');nltk.download('averaged_perceptron_tagger'); nltk.download('omw');nltk.download('universal_tagset'); nltk.download('wordnet');nltk.download('punkt')"
+        python -c "import nltk; nltk.download('stopwords');nltk.download('punkt');nltk.download('punkt_tab');nltk.download('averaged_perceptron_tagger_eng')"
     - name: Setup Moonshot UI
       run: |
+        source venv/bin/activate
         cd moonshot-ui
-        npm ci
+        npm install
         npm run build
         cd ../
-        source venv/bin/activate
-        python -m moonshot web &
+        nohup python -m moonshot web &
     - name: Checkout Integration Test
       uses: actions/checkout@v4
       with:
           repository: aiverify-foundation/moonshot-integration-testing
           path: moonshot-integration-testing
-    - name: Run Integration Test
+    - name: Run Integration UI Test
       env:
         URI: ${{ secrets.URI }}
         TOKEN: ${{ secrets.TOKEN }}
@@ -139,13 +139,52 @@ jobs:
         AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
         AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
         GOOGLE_TOKEN: ${{ secrets.GOOGLE_TOKEN }}
+      id: integrationuitest
       run: |
+         source venv/bin/activate
          cd moonshot-integration-testing/ui-integration-testing
          npm ci
-         npm install dotenv
-         npx playwright install --with-deps
-         URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" npx playwright test
+         npx playwright install
+         npx playwright install-deps
+         npm install dotenv --save
+         echo "Running Home Page Test Cases"
+         URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/homepage.spec.ts
+         echo "Running Endpoint Test Cases"
+         URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/endpoint.spec.ts
+         echo "Running Red Teaming Test Cases"
+         URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/red_teaming.spec.ts
+         echo "Running Benchmarking Test Cases"
+         URI="$URI" TOKEN="$TOKEN" URI2="$URI2" TOKEN2="$TOKEN2" ADDITIONAL_PARAMETERS="$ADDITIONAL_PARAMETERS" TOGETHER_TOKEN="$TOGETHER_TOKEN" OPENAI_TOKEN="$OPENAI_TOKEN" AWS_ACCESS_KEY_ID="$AWS_ACCESS_KEY_ID" AWS_SECRET_ACCESS_KEY="$AWS_SECRET_ACCESS_KEY" GOOGLE_TOKEN="$GOOGLE_TOKEN" DEBUG=pw:api npx playwright test tests/benchmarking.spec.ts
+    - name: Print Environment Variables
+      run: env
+    - name: Run Integration CLI Test
+      env:
+        AZURE_OPENAI_URI: ${{ secrets.AZURE_OPENAI_URI }}
+        AZURE_OPENAI_TOKEN: ${{ secrets.AZURE_OPENAI_TOKEN }}
+        ADDITIONAL_PARAMETERS: ${{ secrets.ADDITIONAL_PARAMETERS }}
+        MOONSHOT_URL: ${{ secrets.MOONSHOT_URL }}
+        MOONSHOT_PORT_NUMBER: ${{ secrets.MOONSHOT_PORT_NUMBER }}
+        CLI_DIR: ${{ secrets.CLI_DIR }}
+        ACTIONS_STEP_DEBUG: true
+        ACTIONS_RUNNER_DEBUG: true
+      run: |
+         source venv/bin/activate
+         cd moonshot-integration-testing/cli-integration-testing
+         echo "Current Directory: $(pwd)"
+         pip install python-dotenv
+         pip install pytest
+         pytest
+    - name: Upload Playwright Traces
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+          name: playwright-trace
+          path: |
+            /home/runner/work/moonshot/moonshot/moonshot-integration-testing/ui-integration-testing/test-results
     - name: TestRail CLI upload results
       env:
         TESTRAIL_USERNAME: ${{ secrets.TESTRAIL_USERNAME }}

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/pypi-deployment.yaml RENAMED Viewed

@@ -76,7 +76,7 @@ jobs:
         name: python-package-distributions
         path: dist/
     - name: Sign the dists with Sigstore
-      uses: sigstore/gh-action-sigstore-python@v2.1.1
+      uses: sigstore/gh-action-sigstore-python@v3.6.0
       with:
         inputs: >-
           ./dist/*.tar.gz

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/.github/workflows/smoke-test-cli.yaml RENAMED Viewed

@@ -37,7 +37,7 @@ jobs:
   smoke-test:
     if: (github.event_name == 'pull_request' && github.event.pull_request.assignee != null) || github.event_name == 'workflow_dispatch'
     runs-on: ubuntu-latest
-    timeout-minutes: 20
+    timeout-minutes: 100
     steps:

aiverify_moonshot-0.5.1/.github/workflows/uat-deploy.yaml → aiverify_moonshot-0.6.1/.github/workflows/uat-build.yaml RENAMED Viewed

@@ -1,5 +1,6 @@
-name: UAT Deploy
+name: UAT Build
+# Trigger when the PR to merge to main is merged
 on:
   pull_request:
     branches:
@@ -30,13 +31,17 @@ jobs:
       - name: Bump version
         run: |
-          echo "Bumping version..."
+          echo "Bump version..."
           pip install bump2version
           bump2version patch
+      - name: Generate notices file
+        run: |
+          echo "Generate notice file..."
       - name: Package test PyPI
         run: |
-          echo "Packaging test PyPI..."
+          echo "Package test PyPI..."
           pip install build
           python3 -m build
@@ -70,7 +75,17 @@ jobs:
         with:
           repository-url: https://test.pypi.org/legacy/
+# Deploy moonshot to UAT by installing moonshot package from test pypi
+  # deploy-to-uat:
+  #   needs:
+  #     - publish-to-testpypi
+  #   runs-on: ubuntu-latest
+# Run integration test
+  # integration-test:
+  #   needs:
+  #     - publish-to-testpypi:
+  #     runs-on: ubuntu-latest

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: aiverify-moonshot
-Version: 0.5.1
+Version: 0.6.1
 Summary: AI Verify advances Gen AI testing with Project Moonshot.
 Project-URL: Repository, https://github.com/aiverify-foundation/moonshot
 Project-URL: Documentation, https://aiverify-foundation.github.io/moonshot/
@@ -47,7 +47,7 @@ Description-Content-Type: text/markdown
 ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
-**Version 0.5.1**
+**Version 0.6.1**
 A simple and modular tool to evaluate any LLM application.

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 ![Moonshot Logo](https://github.com/aiverify-foundation/moonshot/raw/main/misc/aiverify-moonshot-logo.png)
-**Version 0.5.1**
+**Version 0.6.1**
 A simple and modular tool to evaluate any LLM application.

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/cookbook.py RENAMED Viewed

@@ -37,7 +37,8 @@ from moonshot.integrations.cli.cli_errors import (
     ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1,
     ERROR_BENCHMARK_RUN_COOKBOOK_NAME_VALIDATION,
     ERROR_BENCHMARK_RUN_COOKBOOK_NO_RESULT,
-    ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION,
+    ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
+    ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
     ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION,
     ERROR_BENCHMARK_RUN_COOKBOOK_RESULT_PROC_MOD_VALIDATION,
     ERROR_BENCHMARK_RUN_COOKBOOK_RUNNER_PROC_MOD_VALIDATION,
@@ -212,11 +213,12 @@ def run_cookbook(args) -> None:
     The cookbooks are run against the specified endpoints, and the results are processed and displayed.
     Args:
-        args: A namespace object from argparse. It should have the following attributes:
+        args (argparse.Namespace): The arguments provided to the command line interface.
+        Expected keys are:
             name (str): The name of the cookbook runner.
             cookbooks (str): A string representation of a list of cookbooks to run.
             endpoints (str): A string representation of a list of endpoints to run.
-            num_of_prompts (int): The number of prompts to run.
+            prompt_selection_percentage (int): The percentage of prompts to run.
             random_seed (int): The random seed number for reproducibility.
             system_prompt (str): The system prompt to use.
             runner_proc_module (str): The runner processing module to use.
@@ -248,10 +250,19 @@ def run_cookbook(args) -> None:
         ):
             raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION)
-        if isinstance(args.num_of_prompts, bool) or not isinstance(
-            args.num_of_prompts, int
+        if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
+            args.prompt_selection_percentage, int
+        ):
+            raise TypeError(
+                ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION
+            )
+        elif (
+            args.prompt_selection_percentage < 1
+            or args.prompt_selection_percentage > 100
         ):
-            raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION)
+            raise ValueError(
+                ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
+            )
         if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
             raise TypeError(ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION)
@@ -297,7 +308,7 @@ def run_cookbook(args) -> None:
         async def run():
             await cb_runner.run_cookbooks(
                 cookbooks,
-                args.num_of_prompts,
+                args.prompt_selection_percentage,
                 args.random_seed,
                 args.system_prompt,
                 args.runner_proc_module,
@@ -436,9 +447,20 @@ def _display_cookbooks(cookbooks_list):
     table.add_column("Cookbook", justify="left", width=78)
     table.add_column("Contains", justify="left", width=20, overflow="fold")
     for idx, cookbook in enumerate(cookbooks_list, 1):
-        id, name, description, recipes, *other_args = cookbook.values()
+        (
+            id,
+            name,
+            tags,
+            categories,
+            description,
+            recipes,
+            *other_args,
+        ) = cookbook.values()
         idx = cookbook.get("idx", idx)
-        cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n{description}"
+        cookbook_info = f"[red]ID: {id}[/red]\n\n[blue]{name}[/blue]\n\n{description}"
+        cookbook_info += (
+            f"\n\n[blue]Tags: {tags}[/blue]\n[blue]Categories: {categories}[/blue]\n"
+        )
         recipes_info = display_view_list_format("Recipes", recipes)
         table.add_section()
         table.add_row(str(idx), cookbook_info, recipes_info)
@@ -459,11 +481,11 @@ def _display_view_cookbook(cookbook_info):
     Returns:
         None
     """
-    id, name, description, recipes = cookbook_info.values()
+    id, name, tags, categories, description, recipes = cookbook_info.values()
     recipes_list = api_read_recipes(recipes)
     if recipes_list:
         table = Table(
-            title=f'Cookbook "{name}"',
+            title=f'Cookbook: "{name}"\n Tags: {tags}\n Categories: {categories}\n',
             show_lines=True,
             expand=True,
             header_style="bold",
@@ -471,6 +493,7 @@ def _display_view_cookbook(cookbook_info):
         table.add_column("No.", width=2)
         table.add_column("Recipe", justify="left", width=78)
         table.add_column("Contains", justify="left", width=20, overflow="fold")
         for recipe_id, recipe in enumerate(recipes_list, 1):
             (
                 id,
@@ -718,7 +741,11 @@ run_cookbook_args.add_argument("name", type=str, help="Name of cookbook runner")
 run_cookbook_args.add_argument("cookbooks", type=str, help="List of cookbooks to run")
 run_cookbook_args.add_argument("endpoints", type=str, help="List of endpoints to run")
 run_cookbook_args.add_argument(
-    "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
+    "-n",
+    "--prompt_selection_percentage",
+    type=int,
+    default=100,
+    help="Percentage of prompts to run",
 )
 run_cookbook_args.add_argument(
     "-r", "--random_seed", type=int, default=0, help="Random seed number"

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/recipe.py RENAMED Viewed

@@ -40,7 +40,8 @@ from moonshot.integrations.cli.cli_errors import (
     ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1,
     ERROR_BENCHMARK_RUN_RECIPE_NAME_VALIDATION,
     ERROR_BENCHMARK_RUN_RECIPE_NO_RESULT,
-    ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION,
+    ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION,
+    ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION,
     ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION,
     ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION,
     ERROR_BENCHMARK_RUN_RECIPE_RECIPES_VALIDATION_1,
@@ -293,11 +294,12 @@ def run_recipe(args) -> None:
     The recipes are run against the specified endpoints, and the results are processed and displayed.
     Args:
-        args: A namespace object from argparse. It should have the following attributes:
+        args (argparse.Namespace): The arguments provided to the command line interface.
+        Expected keys are:
             name (str): The name of the recipe runner.
             recipes (str): A string representation of a list of recipes to run.
             endpoints (str): A string representation of a list of endpoints to run.
-            num_of_prompts (int): The number of prompts to run.
+            prompt_selection_percentage (int): The percentage of prompts to run.
             random_seed (int): The random seed number for reproducibility.
             system_prompt (str): The system prompt to use.
             runner_proc_module (str): The runner processing module to use.
@@ -329,10 +331,19 @@ def run_recipe(args) -> None:
         ):
             raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION)
-        if isinstance(args.num_of_prompts, bool) or not isinstance(
-            args.num_of_prompts, int
+        if isinstance(args.prompt_selection_percentage, bool) or not isinstance(
+            args.prompt_selection_percentage, int
+        ):
+            raise TypeError(
+                ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION
+            )
+        elif (
+            args.prompt_selection_percentage < 1
+            or args.prompt_selection_percentage > 100
         ):
-            raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION)
+            raise ValueError(
+                ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION
+            )
         if isinstance(args.random_seed, bool) or not isinstance(args.random_seed, int):
             raise TypeError(ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION)
@@ -377,7 +388,7 @@ def run_recipe(args) -> None:
         async def run():
             await rec_runner.run_recipes(
                 recipes,
-                args.num_of_prompts,
+                args.prompt_selection_percentage,
                 args.random_seed,
                 args.system_prompt,
                 args.runner_proc_module,
@@ -809,7 +820,11 @@ run_recipe_args.add_argument("name", type=str, help="Name of recipe runner")
 run_recipe_args.add_argument("recipes", type=str, help="List of recipes to run")
 run_recipe_args.add_argument("endpoints", type=str, help="List of endpoints to run")
 run_recipe_args.add_argument(
-    "-n", "--num_of_prompts", type=int, default=0, help="Number of prompts to run"
+    "-n",
+    "--prompt_selection_percentage",
+    type=int,
+    default=100,
+    help="Percentage of prompts to run",
 )
 run_recipe_args.add_argument(
     "-r", "--random_seed", type=int, default=0, help="Random seed number"

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/benchmark/result.py RENAMED Viewed

@@ -190,7 +190,7 @@ def _display_results(results_list):
         recipes = metadata["recipes"]
         cookbooks = metadata["cookbooks"]
         endpoints = metadata["endpoints"]
-        num_of_prompts = metadata["num_of_prompts"]
+        prompt_selection_percentage = metadata["prompt_selection_percentage"]
         random_seed = metadata["random_seed"]
         system_prompt = metadata["system_prompt"]
         idx = result.get("idx", idx)
@@ -200,7 +200,9 @@ def _display_results(results_list):
         recipes_info = display_view_list_format("Recipes", recipes)
         cookbooks_info = display_view_list_format("Cookbooks", cookbooks)
         endpoints_info = display_view_list_format("Endpoints", endpoints)
-        prompts_info = display_view_str_format("Number of Prompts", num_of_prompts)
+        prompts_info = display_view_str_format(
+            "Prompt Selection Percentage", prompt_selection_percentage
+        )
         seed_info = display_view_str_format("Seed", random_seed)
         system_prompt_info = display_view_str_format("System Prompt", system_prompt)

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/cli/cli_errors.py RENAMED Viewed

@@ -52,8 +52,11 @@ ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION = (
 ERROR_BENCHMARK_RUN_COOKBOOK_ENDPOINTS_VALIDATION_1 = (
     "The 'endpoints' argument must evaluate to a list of strings."
 )
-ERROR_BENCHMARK_RUN_COOKBOOK_NUM_OF_PROMPTS_VALIDATION = (
-    "The 'num_of_prompts' argument must be an integer."
+ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
+    "The 'prompt_selection_percentage' argument must be an integer."
+)
+ERROR_BENCHMARK_RUN_COOKBOOK_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
+    "The 'prompt_selection_percentage' argument must be between 1 - 100."
 )
 ERROR_BENCHMARK_RUN_COOKBOOK_RANDOM_SEED_VALIDATION = (
     "The 'random_seed' argument must be an integer."
@@ -278,8 +281,11 @@ ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION = (
 ERROR_BENCHMARK_RUN_RECIPE_ENDPOINTS_VALIDATION_1 = (
     "The 'endpoints' argument must evaluate to a list of strings."
 )
-ERROR_BENCHMARK_RUN_RECIPE_NUM_OF_PROMPTS_VALIDATION = (
-    "The 'num_of_prompts' argument must be an integer."
+ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_VALIDATION = (
+    "The 'prompt_selection_percentage' argument must be an integer."
+)
+ERROR_BENCHMARK_RUN_RECIPE_PROMPT_SELECTION_PERCENTAGE_RANGE_VALIDATION = (
+    "The 'prompt_selection_percentage' argument must be between 1 - 100."
 )
 ERROR_BENCHMARK_RUN_RECIPE_RANDOM_SEED_VALIDATION = (
     "The 'random_seed' argument must be an integer."

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/app.py RENAMED Viewed

@@ -71,7 +71,7 @@ def create_app(cfg: providers.Configuration) -> CustomFastAPI:
     }
     app: CustomFastAPI = CustomFastAPI(
-        title="Project Moonshot", version="0.5.1", **app_kwargs
+        title="Project Moonshot", version="0.6.1", **app_kwargs
     )
     if cfg.cors.enabled():

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/benchmark_runner_dto.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 class BenchmarkRunnerDTO(BaseModel):
@@ -7,7 +7,7 @@ class BenchmarkRunnerDTO(BaseModel):
     description: str
     endpoints: list[str]
     inputs: list[str]
-    num_of_prompts: int
+    prompt_selection_percentage: int = Field(..., ge=1, le=100)
     random_seed: int
     system_prompt: str
     runner_processing_module: str

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/cookbook_create_dto.py RENAMED Viewed

@@ -9,6 +9,8 @@ class CookbookCreateDTO(CookbookPydanticModel):
     id: Optional[str] = None
     name: str = Field(..., min_length=1)
     description: Optional[str] = Field(default="", min_length=1)
+    tags: Optional[list[str]] = []
+    categories: Optional[list[str]] = []
     recipes: list[str] = Field(..., min_length=1)
@@ -16,4 +18,6 @@ class CookbookUpdateDTO(CookbookPydanticModel):
     id: Optional[str] = None
     name: Optional[str] = Field(default=None, min_length=1)
     description: Optional[str] = Field(default=None, min_length=1)
+    tags: Optional[list[str]] = None
+    categories: Optional[list[str]] = None
     recipes: Optional[list[str]] = Field(default=None, min_length=1)

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/schemas/dataset_create_dto.py RENAMED Viewed

@@ -1,7 +1,6 @@
-from typing import Optional
+from typing import Any, Optional
 from pydantic import Field
-from pyparsing import Iterator
 from moonshot.src.datasets.dataset_arguments import (
     DatasetArguments as DatasetPydanticModel,
@@ -10,7 +9,7 @@ from moonshot.src.datasets.dataset_arguments import (
 class CSV_Dataset_DTO(DatasetPydanticModel):
     id: Optional[str] = None  # Not a required from user
-    examples: Optional[Iterator[dict]] = None  # Not a required from user
+    examples: Optional[Any] = None  # Not a required from user
     name: str = Field(..., min_length=1)
     description: str = Field(default="", min_length=1)
     license: Optional[str] = ""
@@ -20,7 +19,7 @@ class CSV_Dataset_DTO(DatasetPydanticModel):
 class HF_Dataset_DTO(DatasetPydanticModel):
     id: Optional[str] = None  # Not a required from user
-    examples: Optional[Iterator[dict]] = None  # Not a required from user
+    examples: Optional[Any] = None  # Not a required from user
     name: str = Field(..., min_length=1)
     description: str = Field(default="", min_length=1)
     license: Optional[str] = ""

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/benchmark_test_manager.py RENAMED Viewed

@@ -60,14 +60,14 @@ class BenchmarkTestManager(BaseService):
             if benchmark_type == BenchmarkCollectionType.COOKBOOK:
                 async_run = moonshot_runner.run_cookbooks(
                     cookbooks=benchmark_input_data.inputs,
-                    num_of_prompts=benchmark_input_data.num_of_prompts,
+                    prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
                     random_seed=benchmark_input_data.random_seed,
                     system_prompt=benchmark_input_data.system_prompt,
                 )
             else:
                 async_run = moonshot_runner.run_recipes(
                     recipes=benchmark_input_data.inputs,
-                    num_of_prompts=benchmark_input_data.num_of_prompts,
+                    prompt_selection_percentage=benchmark_input_data.prompt_selection_percentage,
                     random_seed=benchmark_input_data.random_seed,
                     system_prompt=benchmark_input_data.system_prompt,
                 )

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/services/cookbook_service.py RENAMED Viewed

@@ -71,7 +71,7 @@ class CookbookService(BaseService):
                             cookbook.total_dataset_in_cookbook,
                         ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
-            if tags and cookbooks_recipe_has_tags(tags, cookbook):
+            if tags and cookbook_has_tags(tags, cookbook):
                 if cookbook not in cookbooks_list:
                     cookbooks_list.append(cookbook)
                     if count:
@@ -80,7 +80,7 @@ class CookbookService(BaseService):
                             cookbook.total_dataset_in_cookbook,
                         ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
-            if categories and cookbooks_recipe_has_categories(categories, cookbook):
+            if categories and cookbook_has_categories(categories, cookbook):
                 if cookbook not in cookbooks_list:
                     cookbooks_list.append(cookbook)
                     if count:
@@ -89,10 +89,16 @@ class CookbookService(BaseService):
                             cookbook.total_dataset_in_cookbook,
                         ) = get_total_prompt_and_dataset_in_cookbook(cookbook)
-            if categories_excluded and cookbooks_recipe_has_categories(
-                categories_excluded, cookbook
-            ):
-                cookbooks_list.remove(cookbook)
+            if categories_excluded:
+                excluded_categories_set = set(
+                    category.lower() for category in categories_excluded.split(",")
+                )
+                cookbook_categories_set = set(
+                    category.lower() for category in cookbook.categories
+                )
+                # Exclude only if all categories in the cookbook are in the excluded list
+                if cookbook_categories_set.issubset(excluded_categories_set):
+                    cookbooks_list.remove(cookbook)
         for cookbook in cookbooks_list:
             cookbook.required_config = cookbook_metrics_dependency(cookbook)
@@ -160,50 +166,40 @@ def get_total_prompt_and_dataset_in_cookbook(cookbook: Cookbook) -> tuple[int, i
 @staticmethod
-def cookbooks_recipe_has_tags(tags: str, cookbook: Cookbook) -> bool:
+def cookbook_has_tags(tags: str, cookbook: Cookbook) -> bool:
     """
-    Check if any recipe in a cookbook has the specified tags.
+    Check if a cookbook has the specified tags.
     Args:
-        tags (str): The tags to check for in the cookbook's recipes.
-        cookbook (Cookbook): The cookbook object containing the recipe IDs.
+        tags (str): The tags to check for in the cookbook.
+        cookbook (Cookbook): The cookbook object.
     Returns:
-        bool: True if any recipe in the cookbook has the specified tags, False otherwise.
+        bool: True if the cookbook has the specified tags, False otherwise.
     """
-    recipe_ids = cookbook.recipes
-    recipes = moonshot_api.api_read_recipes(recipe_ids)
-    for recipe in recipes:
-        recipe = Recipe(**recipe)
-        if tags in recipe.tags:
-            return True
-    return False
+    tags_list = [tag.lower() for tag in tags.split(",")]
+    return any(tag in [ctag.lower() for ctag in cookbook.tags] for tag in tags_list)
 @staticmethod
-def cookbooks_recipe_has_categories(categories: str, cookbook: Cookbook) -> bool:
+def cookbook_has_categories(categories: str, cookbook: Cookbook) -> bool:
     """
-    Check if any recipe in a cookbook has the specified categories.
+    Check if a cookbook has the specified categories.
     Args:
-        categories (str): The categories to check for in the cookbook's recipes.
-        cookbook (Cookbook): The cookbook object containing the recipe IDs.
-        exclude_categories (str): The categories to exclude
+        categories (str): The categories to check for in the cookbook.
+        cookbook (Cookbook): The cookbook object.
     Returns:
-        bool: True if any recipe in the cookbook has the specified categories, False otherwise.
+        bool: True if the cookbook has the specified categories, False otherwise.
     """
-    recipe_ids = cookbook.recipes
     categories_list = [category.lower() for category in categories.split(",")]
-    recipes = moonshot_api.api_read_recipes(recipe_ids)
-    for recipe in recipes:
-        recipe = Recipe(**recipe)
-        if any(
-            category in [rcat.lower() for rcat in recipe.categories]
-            for category in categories_list
-        ):
-            return True
-    return False
+    return any(
+        category in [ccat.lower() for ccat in cookbook.categories]
+        for category in categories_list
+    )
 @staticmethod

{aiverify_moonshot-0.5.1 → aiverify_moonshot-0.6.1}/moonshot/integrations/web_api/types/types.py RENAMED Viewed

@@ -85,7 +85,7 @@ class ResultMetadata(TypedDict):
     recipes: List[str]
     cookbooks: List[str]
     endpoints: List[str]
-    num_of_prompts: int
+    prompt_selection_percentage: int
     status: str

aiverify-moonshot 0.5.1__tar.gz → 0.6.1__tar.gz

aiverify-moonshot 0.5.1tar.gz → 0.6.1tar.gz