PyPI - parsagon - Versions diffs - 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl - Mend

parsagon 0.10.20py3-none-any.whl → 0.10.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

parsagon/api.py +12 -4
parsagon/exceptions.py +5 -0
parsagon/main.py +111 -29
parsagon/secrets.py +15 -0
parsagon/tests/test_secrets.py +53 -0
{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/METADATA +1 -1
{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/RECORD +10 -8
{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/WHEEL +1 -1
{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/entry_points.txt +0 -0
{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/top_level.txt +0 -0

parsagon/api.py CHANGED Viewed

@@ -135,11 +135,11 @@ def get_bool_about_data(data, question):
     return data["result"]
-def create_pipeline(name, description, program_sketch, pseudocode):
+def create_pipeline(name, description, program_sketch, pseudocode, secrets):
     return _api_call(
         httpx.post,
         "/pipelines/",
-        json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode},
+        json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode, "secrets": secrets},
     )
@@ -192,11 +192,19 @@ def get_pipeline_code(pipeline_name, variables, headless):
         )
-def create_pipeline_run(pipeline_id, variables):
+def create_pipeline_run(pipeline_id, variables, is_local):
     return _api_call(
         httpx.post,
         f"/pipelines/{pipeline_id}/runs/",
-        json={"variables": variables},
+        json={"variables": variables, "is_local": is_local},
+    )
+def update_pipeline_run(run_id, data):
+    return _api_call(
+        httpx.patch,
+        f"/pipelines/runs/{run_id}/",
+        json=data,
     )

parsagon/exceptions.py CHANGED Viewed

@@ -24,3 +24,8 @@ class ProgramNotFoundException(ParsagonException):
     def to_string(self, verbose):
         return f"A program with name {self.program} does not exist."
+class RunFailedException(ParsagonException):
+    """Raised when a run fails."""
+    pass

parsagon/main.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import argparse
+import datetime
 import json
 import logging
 import logging.config
 import psutil
 import time
+import traceback
 from halo import Halo
 from tqdm import tqdm
@@ -15,15 +17,16 @@ from parsagon.api import (
     create_custom_function,
     add_examples_to_custom_function,
     create_pipeline_run,
+    update_pipeline_run,
     get_pipeline,
     get_pipelines,
     get_pipeline_code,
     get_run,
     poll_data,
-    APIException,
 )
-from parsagon.exceptions import ParsagonException
+from parsagon.exceptions import ParsagonException, APIException, RunFailedException
 from parsagon.executor import Executor, custom_functions_to_descriptions
+from parsagon.secrets import extract_secrets
 from parsagon.settings import get_api_key, get_settings, clear_settings, save_setting, get_logging_config
 logger = logging.getLogger(__name__)
@@ -139,6 +142,11 @@ def get_args():
         action="store_true",
         help="run the program in the cloud",
     )
+    parser_run.add_argument(
+        "--output_log",
+        action="store_true",
+        help="output log data from the run",
+    )
     parser_run.set_defaults(func=run)
     # Delete
@@ -185,12 +193,15 @@ def main():
 def create(task=None, program_name=None, headless=False, infer=False, verbose=False):
+    configure_logging(verbose)
     if task:
         logger.info("Launched with task description:\n%s", task)
     else:
         task = input("Type what you want to do: ")
     logger.info("Analyzing task description...")
+    task, secrets = extract_secrets(task)
     program_sketches = get_program_sketches(task)
     full_program = program_sketches["full"]
@@ -198,7 +209,8 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
     pseudocode = program_sketches["pseudocode"]
     logger.info(f"Created a program based on task description. Program does the following:\n\n{pseudocode}\n\nNow executing the program to identify web elements to be scraped:\n")
     logger.debug("Program:\n%s", abridged_program)
-    abridged_program += "\n\noutput = func()\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n"  # Make the program runnable
+    args = ", ".join(f"{k}={repr(v)}" for k, v in secrets.items())
+    abridged_program += f"\n\noutput = func({args})" + "\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n"  # Make the program runnable
     # Execute the abridged program to gather examples
     executor = Executor(headless=headless, infer=infer)
@@ -211,7 +223,7 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
         if program_name:
             logger.info(f"Saving program as {program_name}")
             try:
-                pipeline = create_pipeline(program_name, task, full_program, pseudocode)
+                pipeline = create_pipeline(program_name, task, full_program, pseudocode, secrets)
             except APIException as e:
                 if isinstance(e.value, list) and "Pipeline with name already exists" in e.value:
                     logger.info("A program with this name already exists. Please choose another name.")
@@ -243,6 +255,8 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
 def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
+    configure_logging(verbose)
     pipeline = get_pipeline(program_name)
     abridged_program = pipeline["abridged_sketch"]
     # Make the program runnable
@@ -274,8 +288,7 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
             add_examples_to_custom_function(pipeline_id, call_id, custom_function, replace)
         logger.info(f"Saved.")
     except Exception as e:
-        print(e)
-        logger.info(f"An error occurred while saving the program. The program was not updated.")
+        logger.error(f"An error occurred while saving the program. The program was not updated.")
 def detail(program_name=None, verbose=False):
@@ -289,96 +302,165 @@ def detail(program_name=None, verbose=False):
         )
-def run(program_name, variables={}, headless=False, remote=False, verbose=False):
+def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
     """
     Executes pipeline code
     """
+    configure_logging(verbose)
     if headless and remote:
         raise ParsagonException("Cannot run a program remotely in headless mode")
+    logger.info("Preparing to run program %s", program_name)
+    pipeline_id = get_pipeline(program_name)["id"]
     if remote:
-        pipeline_id = get_pipeline(program_name)["id"]
-        result = create_pipeline_run(pipeline_id, variables)
+        result = create_pipeline_run(pipeline_id, variables, False)
         with Halo(text="Program running remotely...", spinner="dots"):
             while True:
                 run = get_run(result["id"])
                 status = run["status"]
+                if output_log and status in ("FINISHED", "ERROR"):
+                    return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
                 if status == "FINISHED":
+                    if verbose:
+                        logger.info(run["log"])
+                        for warning in run["warnings"]:
+                            logger.warning(warning)
                     logger.info("Program finished running.")
                     return run["output"]
                 elif status == "ERROR":
                     raise ParsagonException(f"Program failed to run: {run['error']}")
                 elif status == "CANCELED":
                     raise ParsagonException("Program execution was canceled")
                 time.sleep(5)
-    logger.info("Preparing to run program %s", program_name)
+    run = create_pipeline_run(pipeline_id, variables, True)
     code = get_pipeline_code(program_name, variables, headless)["code"]
+    start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
+    run_data = {"start_time": start_time}
     logger.info("Running program...")
     globals_locals = {"PARSAGON_API_KEY": get_api_key()}
     try:
         exec(code, globals_locals, globals_locals)
+        run_data["status"] = "FINISHED"
+    except:
+        run_data["status"] = "ERROR"
+        run_data["error"] = str(traceback.format_exc())
+        if not output_log:
+            raise
     finally:
+        end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
+        run_data["end_time"] = end_time
         if "driver" in globals_locals:
             globals_locals["driver"].quit()
         if "display" in globals_locals:
             globals_locals["display"].stop()
+        if "parsagon_log" in globals_locals:
+            run_data["log"] = "\n".join(globals_locals["parsagon_log"])
+            logger.info(run_data["log"])
+        if "parsagon_warnings" in globals_locals:
+            run_data["warnings"] = globals_locals["parsagon_warnings"]
         for proc in psutil.process_iter():
             try:
                 if proc.name() == "chromedriver":
                     proc.kill()
             except psutil.NoSuchProcess:
                 continue
+        run = update_pipeline_run(run["id"], run_data)
     logger.info("Done.")
+    if output_log:
+        if "error" not in run_data:
+            run["output"] = globals_locals["output"]
+        return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
     return globals_locals["output"]
-def batch_runs(batch_name, program_name, runs=[], headless=False, ignore_errors=False, error_value=None):
+def batch_runs(batch_name, program_name, runs=[], headless=False, ignore_errors=False, error_value=None, rerun_warnings=False, rerun_warning_types=[], rerun_errors=False, verbose=False):
+    configure_logging(verbose)
     save_file = f"{batch_name}.json"
     try:
         with open(save_file) as f:
-            results = json.load(f)
+            outputs = json.load(f)
     except FileNotFoundError:
-        results = []
-    num_initial_results = len(results)
+        outputs = []
+    metadata_file = f"{batch_name}_metadata.json"
+    try:
+        with open(metadata_file) as f:
+            metadata = json.load(f)
+    except FileNotFoundError:
+        metadata = []
+    num_initial_results = len(outputs)
     pbar = tqdm(runs)
     default_desc = f'Running program "{program_name}"'
     pbar.set_description(default_desc)
     error = None
-    error_variables = None
+    variables = None
     try:
         for i, variables in enumerate(pbar):
             if i < num_initial_results:
-                continue
+                if rerun_errors and metadata[i]["status"] == "ERROR":
+                    pass
+                elif rerun_warnings and metadata[i]["warnings"]:
+                    if not rerun_warning_types or any(warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]):
+                        pass
+                    else:
+                        continue
+                else:
+                    continue
             for j in range(3):
-                try:
-                    results.append(run(program_name, variables, headless))
+                result = run(program_name, variables, headless, output_log=True)
+                if result["status"] != "ERROR":
+                    output = result.pop("output")
+                    if i < num_initial_results:
+                        outputs[i] = output
+                        metadata[i] = result
+                    else:
+                        outputs.append(output)
+                        metadata.append(result)
                     break
-                except Exception as e:
-                    error = e
-                    error_variables = variables
+                else:
+                    error = result["error"].strip().split("\n")[-1]
                     if j < 2:
-                        pbar.set_description(f"An error occurred: {e} - Waiting 60s before retrying (Attempt {j+2}/3)")
+                        pbar.set_description(f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)")
                         time.sleep(60)
                         pbar.set_description(default_desc)
                         error = None
-                        error_variables = None
                         continue
                     else:
                         if ignore_errors:
                             error = None
-                            error_variables = None
-                            results.append(error_value)
+                            if i < num_initial_results:
+                                outputs[i] = error_value
+                            else:
+                                outputs.append(error_value)
                             break
                         else:
-                            raise
+                            raise RunFailedException
+    except RunFailedException:
+        logger.error(f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
     except Exception as e:
-        logger.error(f"Unresolvable error occurred on run with variables {error_variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
+        error = str(e)
+        logger.error(f"Unresolvable error occurred while looping over runs: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
     finally:
         with open(save_file, "w") as f:
-            json.dump(results, f)
-    return None if error else results
+            json.dump(outputs, f)
+        with open(metadata_file, "w") as f:
+            json.dump(metadata, f)
+    num_warnings = 0
+    num_runs_with_warnings = 0
+    for m in metadata:
+        if m["warnings"]:
+            num_warnings += len(m["warnings"])
+            num_runs_with_warnings += 1
+    logger.info(f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n")
+    return None if error else outputs
 def delete(program_name, verbose=False, confirm_with_user=False):

parsagon/secrets.py ADDED Viewed

@@ -0,0 +1,15 @@
+import ast
+import re
+def extract_secrets(task):
+    secrets = {}
+    matches = list(re.finditer(r'\{\s*(?P<var>[A-Za-z_]+)\s*:\s*(?P<value>"([^"]|\\")*")\}', task))
+    for match in matches:
+        var_name = match.group("var")
+        if not var_name.startswith("SECRET"):
+            continue
+        new_match = re.sub(r'\{([A-Za-z_]+):\s*"([^"]|\\")*"\}', '{\\1: "******"}', match.group(0))
+        task = task.replace(match.group(0), new_match)
+        secrets[match.group(1)] = ast.literal_eval(match.group(2))
+    return task, secrets

parsagon/tests/test_secrets.py ADDED Viewed

@@ -0,0 +1,53 @@
+import pytest
+from parsagon.secrets import extract_secrets
+def test_non_secrets_are_not_extracted():
+    """
+    Non-secrets should not be extracted from task descriptions.
+    """
+    task = 'Go to https://example.com. Type {username: "myusername"} in the username field'
+    task, secrets = extract_secrets(task)
+    assert secrets == {}
+    assert task == 'Go to https://example.com. Type {username: "myusername"} in the username field'
+def test_secret_is_extracted():
+    """
+    A secret should be extracted and replaced in a task description.
+    """
+    task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword"} in the password field'
+    task, secrets = extract_secrets(task)
+    assert secrets == {"SECRET_PASSWORD": "mypassword"}
+    assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field'
+def test_secret_with_quotes_is_extracted():
+    """
+    A secret with quotes in its value should be extracted and replaced in a task description.
+    """
+    task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword\\"?!1"} in the password field'
+    task, secrets = extract_secrets(task)
+    assert secrets == {"SECRET_PASSWORD": 'mypassword"?!1'}
+    assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field'
+def test_multiple_secrets_are_extracted():
+    """
+    Multiple secrets should be extracted and replaced in the same task description.
+    """
+    task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword"} in the password field. Type {SECRET_ADDRESS: "myaddress"} in the address field'
+    task, secrets = extract_secrets(task)
+    assert secrets == {"SECRET_PASSWORD": "mypassword", "SECRET_ADDRESS": "myaddress"}
+    assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field. Type {SECRET_ADDRESS: "******"} in the address field'
+def test_secrets_mixed_with_non_secrets_are_extracted():
+    """
+    Multiple secrets should be extracted and replaced in the same task description, and non-secrets should remain the same.
+    """
+    task = 'Go to https://example.com. Type {USERNAME: "myusername"} in the username field. Type {SECRET_PASSWORD: "mypassword"} in the password field. Type {SECRET_ADDRESS: "myaddress"} in the address field'
+    task, secrets = extract_secrets(task)
+    assert secrets == {"SECRET_PASSWORD": "mypassword", "SECRET_ADDRESS": "myaddress"}
+    assert task == 'Go to https://example.com. Type {USERNAME: "myusername"} in the username field. Type {SECRET_PASSWORD: "******"} in the password field. Type {SECRET_ADDRESS: "******"} in the address field'

{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: parsagon
-Version: 0.10.20
+Version: 0.10.22
 Summary: Allows you to create browser automations with natural language
 Author-email: Sandy Suh <sandy@parsagon.io>
 Project-URL: Homepage, https://parsagon.io

{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,11 @@
 parsagon/__init__.py,sha256=n4-wiFVVuyW_KOJeNiycggAg9BTa5bbBIVpD_DkdOO4,125
-parsagon/api.py,sha256=eJULOzTyWqA4Mio7tH9PszwTrZyxRBI0uO9t1h3R7rw,6634
+parsagon/api.py,sha256=nDTDe0LdDTn1hSXbgqd8j1qxe_3xWm3wZXhrTsmbwOE,6842
 parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
-parsagon/exceptions.py,sha256=NYpFaSLZplBTv9fov_1LKPzDPIqb7Ffe7IunnjntxvA,819
+parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
 parsagon/executor.py,sha256=e_e9p5eLvf7wYHk1BNJf0j_qt0H17BfivPb8CoOKMHE,22791
 parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
-parsagon/main.py,sha256=yQbIzqJ7Ea6XZJ3Eolx2lTh7Di87qPJvDI0WRNeoX14,14736
+parsagon/main.py,sha256=mHmeXPUskTXyxJvuDnmOKF_MXkaOXB2oYYu5VOAE8s4,18344
+parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
 parsagon/settings.py,sha256=s5_MsDMFM5tB8U8tfHaFnKibCoEqPnAu8b_ueg07Ftw,2947
 parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 parsagon/tests/api_mocks.py,sha256=M8xhiyPa1dI8Vx-odDk7ETopfFAfcjfAf-ApmSqgvfw,3127
@@ -13,8 +14,9 @@ parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,42
 parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
 parsagon/tests/test_invalid_args.py,sha256=kOjMpbZvviR1CwvXReteZMxBvuhq_rOv5Tm1muBSzNk,676
 parsagon/tests/test_pipeline_operations.py,sha256=TpBKCuRA8LHYWx3PD_k9mYCSsA_9SZjrOX-rS4mE8XE,1089
-parsagon-0.10.20.dist-info/METADATA,sha256=vA0gDtSRv8FeLScDZJHTNQjSgBw2MaAetLcrDJ9bIaw,2410
-parsagon-0.10.20.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
-parsagon-0.10.20.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
-parsagon-0.10.20.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
-parsagon-0.10.20.dist-info/RECORD,,
+parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
+parsagon-0.10.22.dist-info/METADATA,sha256=WMKXNXXNse8ftQwxVvwnV9LTuur6NmF2KCOJr9C8yZI,2410
+parsagon-0.10.22.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
+parsagon-0.10.22.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
+parsagon-0.10.22.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
+parsagon-0.10.22.dist-info/RECORD,,

{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.41.2)
+Generator: bdist_wheel (0.41.3)
 Root-Is-Purelib: true
 Tag: py3-none-any

{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{parsagon-0.10.20.dist-info → parsagon-0.10.22.dist-info}/top_level.txt RENAMED Viewed

File without changes

parsagon 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl

parsagon 0.10.20py3-none-any.whl → 0.10.22py3-none-any.whl