PyPI - parsagon - Versions diffs - 0.12.4__tar.gz → 0.14.0__tar.gz - Mend

parsagon 0.12.4tar.gz → 0.14.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{parsagon-0.12.4 → parsagon-0.14.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: parsagon
-Version: 0.12.4
+Version: 0.14.0
 Summary: Allows you to create browser automations with natural language
 Author-email: Sandy Suh <sandy@parsagon.io>
 Project-URL: Homepage, https://parsagon.io

{parsagon-0.12.4 → parsagon-0.14.0}/pyproject.toml RENAMED Viewed

@@ -16,7 +16,7 @@ line-length = 120
 [project]
 name = "parsagon"
-version = "0.12.4"
+version = "0.14.0"
 description = "Allows you to create browser automations with natural language"
 readme = "README.md"
 requires-python = ">=3.8"

parsagon-0.14.0/src/parsagon/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from parsagon.main import update, detail, delete, get_product, get_review_article, get_article_list
+from parsagon.create import create_program as create
+from parsagon.runs import run, batch_runs

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/api.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from json import JSONDecodeError
 import time
+from urllib.parse import quote
 import httpx
@@ -166,7 +167,13 @@ def create_pipeline(name, description, program_sketch, pseudocode, secrets):
     return _api_call(
         httpx.post,
         "/pipelines/",
-        json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode, "secrets": secrets},
+        json={
+            "name": name,
+            "description": description,
+            "program_sketch": program_sketch,
+            "pseudocode": pseudocode,
+            "secrets": secrets,
+        },
     )
@@ -195,11 +202,21 @@ def add_examples_to_custom_function(pipeline_id, call_id, custom_function, remov
     )
+def double_quote(string):
+    """
+    A bug in Django disallows URLs even with quoted slashes as in:
+    /api/pipelines/name/stripe%2Fstuff/code/ HTTP/1.1" 405
+    Therefore we must double quote
+    """
+    return quote(quote(string, safe=""), safe="")
 def get_pipeline(pipeline_name):
+    escaped_pipeline_name = double_quote(pipeline_name)
     with RaiseProgramNotFound(pipeline_name):
         return _api_call(
             httpx.get,
-            f"/pipelines/name/{pipeline_name}/",
+            f"/pipelines/name/{escaped_pipeline_name}/?double_escaped=True",
         )
@@ -207,14 +224,16 @@ def get_pipelines():
     return _api_call(httpx.get, f"/pipelines/")
-def get_pipeline_code(pipeline_name, variables, headless):
+def get_pipeline_code(pipeline_name, variables, headless, use_uc):
+    escaped_pipeline_name = double_quote(pipeline_name)
     with RaiseProgramNotFound(pipeline_name):
         return _api_call(
             httpx.post,
-            f"/pipelines/name/{pipeline_name}/code/",
+            f"/pipelines/name/{escaped_pipeline_name}/code/?double_escaped=True",
             json={
                 "variables": variables,
                 "headless": headless,
+                "use_uc": use_uc,
             },
         )
@@ -246,11 +265,17 @@ def get_run(run_id):
 def send_assistant_message(message, thread_id=None):
-    return _api_call(httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id})
+    return _api_call(
+        httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id}
+    )
 def send_assistant_function_outputs(outputs, thread_id, run_id):
-    return _api_call(httpx.post, "/transformers/send-assistant-function-outputs/", json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id})
+    return _api_call(
+        httpx.post,
+        "/transformers/send-assistant-function-outputs/",
+        json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id},
+    )
 def poll_extract(url, page_type):

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/assistant.py RENAMED Viewed

@@ -4,9 +4,11 @@ from parsagon.create import create_program
 from parsagon.executor import Executor
 from parsagon.print import assistant_print, assistant_spinner, browser_print, error_print
 from rich.prompt import Prompt
+from parsagon.runs import run, batch_runs
-def assist(task, headless, infer):
+def assist(verbose=False):
+    task = Prompt.ask("Type what do you want to do")
     with assistant_spinner():
         response = send_assistant_message(task)
     while True:
@@ -35,7 +37,16 @@ def assist(task, headless, infer):
                     output["output"] = html
                     outputs.append(output)
                 elif name == "create_program":
-                    result = create_program(args["description"], headless=headless, infer=infer)
+                    result = create_program(args["description"])
+                    output["output"] = json.dumps(result)
+                    outputs.append(output)
+                elif name == "run_program":
+                    result = run(**args)
+                    output["output"] = json.dumps(result)
+                    outputs.append(output)
+                elif name == "batch_runs":
+                    batch_name = input("Please enter a name for the batch run (for saving of intermediate results): ")
+                    result = batch_runs(batch_name, **args)
                     output["output"] = json.dumps(result)
                     outputs.append(output)
             with assistant_spinner():

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/create.py RENAMED Viewed

@@ -6,7 +6,7 @@ from parsagon.secrets import extract_secrets
 from rich.prompt import Prompt
-def create_program(task, headless=False, infer=False):
+def create_program(task, headless=False, infer=False, undetected=False):
     assistant_print("Creating a program based on your specifications...")
     task, secrets = extract_secrets(task)
     program_sketches = get_program_sketches(task)
@@ -28,7 +28,7 @@ def create_program(task, headless=False, infer=False):
     abridged_program += f"\n\noutput = func({args})\n"  # Make the program runnable
     # Execute the abridged program to gather examples
-    executor = Executor(headless=headless, infer=infer)
+    executor = Executor(headless=headless, infer=infer, use_uc=undetected)
     executor.execute(abridged_program)
     # The user must select a name

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/executor.py RENAMED Viewed

@@ -54,6 +54,7 @@ ELEMENT_TYPES = {
     "html": "HTML",
     "element": "ACTION",
     "textarea": "TEXT",
+    "text_markdown": "TEXT",
     "markdown": "TEXT",
     "elem_id": "ACTION",
 }

parsagon-0.14.0/src/parsagon/main.py ADDED Viewed

@@ -0,0 +1,300 @@
+import argparse
+import json
+import logging.config
+import time
+from rich.console import Console
+from rich.prompt import Prompt
+from parsagon.api import (
+    delete_pipeline,
+    add_examples_to_custom_function,
+    get_pipeline,
+    get_pipelines,
+    poll_extract
+)
+from parsagon.assistant import assist
+from parsagon.create import create_program
+from parsagon.exceptions import ParsagonException
+from parsagon.executor import Executor, custom_functions_to_descriptions
+from parsagon.runs import run
+from parsagon.settings import get_api_key, save_setting, configure_logging
+console = Console()
+logger = logging.getLogger(__name__)
+def get_args(argv):
+    parser = argparse.ArgumentParser(
+        prog="parsagon", description="Scrapes and interacts with web pages based on natural language.", add_help=False
+    )
+    parser.add_argument("-v", "--verbose", action="store_true", help="run the task in verbose mode")
+    subparsers = parser.add_subparsers()
+    # Create
+    parser_create = subparsers.add_parser("create", description="Creates a program.")
+    parser_create.add_argument(
+        "--headless",
+        action="store_true",
+        help="run the browser in headless mode",
+    )
+    parser_create.add_argument(
+        "--infer",
+        action="store_true",
+        help="let Parsagon infer all elements to be scraped",
+    )
+    parser_create.add_argument(
+        "--undetected",
+        action="store_true",
+        help="run in undetected mode",
+    )
+    parser_create.set_defaults(func=create_cli)
+    # Detail
+    parser_detail = subparsers.add_parser(
+        "detail",
+        description="Outputs details of a created program.",
+    )
+    parser_detail.add_argument(
+        "-p",
+        "--program",
+        dest="program_name",
+        type=str,
+        help="the name of the program",
+    )
+    parser_detail.set_defaults(func=detail)
+    # Update
+    parser_update = subparsers.add_parser(
+        "update",
+        description="Updates a created program.",
+    )
+    parser_update.add_argument(
+        "program_name",
+        type=str,
+        help="the name of the program to update",
+    )
+    parser_update.add_argument(
+        "--variables",
+        type=json.loads,
+        default="{}",
+        help="a JSON object mapping variables to values",
+    )
+    parser_update.add_argument(
+        "--headless",
+        action="store_true",
+        help="run the browser in headless mode",
+    )
+    parser_update.add_argument(
+        "--infer",
+        action="store_true",
+        help="let Parsagon infer all elements to be scraped",
+    )
+    parser_update.add_argument(
+        "--replace",
+        action="store_true",
+        help="remove old example data while updating the program",
+    )
+    parser_update.set_defaults(func=update)
+    # Run
+    parser_run = subparsers.add_parser(
+        "run",
+        description="Runs a created program.",
+    )
+    parser_run.add_argument(
+        "program_name",
+        type=str,
+        help="the name of the program to run",
+    )
+    parser_run.add_argument(
+        "--variables",
+        type=json.loads,
+        default="{}",
+        help="a JSON object mapping variables to values",
+    )
+    parser_run.add_argument(
+        "--headless",
+        action="store_true",
+        help="run the browser in headless mode",
+    )
+    parser_run.add_argument(
+        "--remote",
+        action="store_true",
+        help="run the program in the cloud",
+    )
+    parser_run.add_argument(
+        "--output_log",
+        action="store_true",
+        help="output log data from the run",
+    )
+    parser_run.add_argument(
+        "--output_file",
+        type=str,
+        help="write the data to the given file path",
+    )
+    parser_run.add_argument(
+        "--undetected",
+        action="store_true",
+        help="run in undetected mode",
+    )
+    parser_run.set_defaults(func=run)
+    # Delete
+    parser_delete = subparsers.add_parser(
+        "delete",
+        description="Deletes a program.",
+    )
+    parser_delete.add_argument(
+        "program_name",
+        type=str,
+        help="the name of the program to run",
+    )
+    parser_delete.add_argument(
+        "-y", "--yes", dest="confirm_with_user", action="store_false", help="auto-confirm option"
+    )
+    parser_delete.set_defaults(func=delete)
+    # Setup
+    parser_setup = subparsers.add_parser(
+        "setup",
+        description="Interactively sets up Parsagon with an API key.",
+    )
+    parser_setup.set_defaults(func=setup)
+    # Help
+    parser_help = subparsers.add_parser(
+        "help",
+        description="Shows help.",
+    )
+    parser_help.set_defaults(func=help, parser=parser)
+    args = parser.parse_args(argv)
+    kwargs = vars(args)
+    return kwargs, parser
+def main(argv=None):
+    kwargs, parser = get_args(argv)
+    func = kwargs.pop("func", None)
+    if func is None:
+        func = assist
+    verbose = kwargs["verbose"]
+    configure_logging(verbose)
+    try:
+        return func(**kwargs)
+    except ParsagonException as e:
+        error_message = "Error:\n" + e.to_string(verbose)
+        logger.error(error_message)
+def create_cli(headless=False, infer=False, undetected=False, verbose=False):
+    task = Prompt.ask("Enter a detailed scraping task")
+    create_program(task, headless=headless, infer=infer, undetected=undetected)
+def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
+    configure_logging(verbose)
+    pipeline = get_pipeline(program_name)
+    abridged_program = pipeline["abridged_sketch"]
+    # Make the program runnable
+    variables_str = ", ".join(f"{k}={repr(v)}" for k, v in variables.items())
+    abridged_program += f"\n\noutput = func({variables_str})\n"
+    # Execute the abridged program to gather examples
+    executor = Executor(headless=headless, infer=infer)
+    executor.execute(abridged_program)
+    while True:
+        program_name_input = input(
+            f'Type "{program_name}" to update this program, or press enter without typing a name to CANCEL: '
+        )
+        if not program_name_input:
+            logger.info("Canceled update.")
+            return
+        if program_name_input == program_name:
+            break
+    pipeline_id = pipeline["id"]
+    try:
+        for call_id, custom_function in executor.custom_functions.items():
+            debug_suffix = f" ({custom_function.name})"
+            description = custom_functions_to_descriptions.get(custom_function.name)
+            description = " to " + description if description else ""
+            if verbose:
+                description += debug_suffix
+            logger.info(f"  Saving function{description}...")
+            add_examples_to_custom_function(pipeline_id, call_id, custom_function, replace)
+        logger.info(f"Saved.")
+    except Exception as e:
+        logger.error(f"An error occurred while saving the program. The program was not updated.")
+def detail(program_name=None, verbose=False):
+    if program_name:
+        data = [get_pipeline(program_name)]
+    else:
+        data = get_pipelines()
+    for pipeline in data:
+        print(
+            f"Program: {pipeline['name']}\nDescription: {pipeline['description']}\nVariables: {pipeline['variables']}\n"
+        )
+def delete(program_name, verbose=False, confirm_with_user=False):
+    if (
+        confirm_with_user
+        and input(f"Are you sure you want to delete program with name {program_name}? (y/N) ").lower().strip() != "y"
+    ):
+        logger.error("Cancelled operation.")
+        return
+    logger.info("Preparing to delete program %s", program_name)
+    pipeline_id = get_pipeline(program_name)["id"]
+    logger.info("Deleting program...")
+    delete_pipeline(pipeline_id)
+    logger.info("Done.")
+def setup(verbose=False):
+    try:
+        old_api_key = get_api_key()
+    except ParsagonException:
+        old_api_key = None
+    try:
+        save_setting("api_key", None)
+        get_api_key(interactive=True)
+    except KeyboardInterrupt:
+        save_setting("api_key", old_api_key)
+        logger.error("\nCancelled operation.")
+        return
+    logger.info("Setup complete.")
+def help(parser, verbose):
+    parser.print_help()
+def _get_data(url, page_type, timeout):
+    start_time = time.time()
+    with console.status("Extracting data...") as status:
+        while time.time() - start_time <= timeout:
+            result = poll_extract(url, page_type)
+            if result["done"]:
+                return result["result"]
+            time.sleep(15)
+    logger.info("No data found")
+    return None
+def get_product(url, timeout=300):
+    return _get_data(url, "PRODUCT_DETAIL", timeout)
+def get_review_article(url, timeout=300):
+    return _get_data(url, "REVIEW_ARTICLE_DETAIL", timeout)
+def get_article_list(url, timeout=300):
+    return _get_data(url, "ARTICLE_LIST", timeout)

parsagon-0.14.0/src/parsagon/runs.py ADDED Viewed

@@ -0,0 +1,227 @@
+import datetime
+import json
+import logging.config
+import time
+import traceback
+import psutil
+from rich.console import Console
+from rich.progress import Progress
+from rich.prompt import Prompt
+from parsagon.api import (
+    create_pipeline_run,
+    update_pipeline_run,
+    get_pipeline,
+    get_pipeline_code,
+    get_run,
+)
+from parsagon.exceptions import ParsagonException, RunFailedException
+from parsagon.settings import get_api_key
+console = Console()
+logger = logging.getLogger(__name__)
+def run(program_name, variables={}, headless=False, remote=False, output_log=False, output_file=None, undetected=False, verbose=False):
+    """
+    Executes pipeline code
+    """
+    if headless and remote:
+        raise ParsagonException("Cannot run a program remotely in headless mode")
+    if not isinstance(variables, dict):
+        raise ParsagonException("Variables must be a dictionary")
+    logger.info("Preparing to run program %s", program_name)
+    pipeline_id = get_pipeline(program_name)["id"]
+    if remote:
+        result = create_pipeline_run(pipeline_id, variables, False)
+        with console.status("Program running remotely...") as status:
+            while True:
+                run = get_run(result["id"])
+                status = run["status"]
+                if output_log and status in ("FINISHED", "ERROR"):
+                    result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
+                    if output_file:
+                        with open(output_file, "w") as f:
+                            json.dump(result, f, indent=4)
+                        return
+                    else:
+                        return result
+                if status == "FINISHED":
+                    if verbose:
+                        logger.info(run["log"])
+                        for warning in run["warnings"]:
+                            logger.warning(warning)
+                    logger.info("Program finished running.")
+                    result = run["output"]
+                    if output_file:
+                        with open(output_file, "w") as f:
+                            json.dump(result, f, indent=4)
+                        return
+                    else:
+                        return result
+                elif status == "ERROR":
+                    raise ParsagonException(f"Program failed to run: {run['error']}")
+                elif status == "CANCELED":
+                    raise ParsagonException("Program execution was canceled")
+                time.sleep(5)
+    run = create_pipeline_run(pipeline_id, variables, True)
+    code = get_pipeline_code(program_name, variables, headless, undetected)["code"]
+    start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
+    run_data = {"start_time": start_time}
+    logger.info("Running program...")
+    globals_locals = {"PARSAGON_API_KEY": get_api_key()}
+    try:
+        exec(code, globals_locals, globals_locals)
+        run_data["status"] = "FINISHED"
+    except:
+        run_data["status"] = "ERROR"
+        run_data["error"] = str(traceback.format_exc())
+        if not output_log:
+            raise
+    finally:
+        end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
+        run_data["end_time"] = end_time
+        if "driver" in globals_locals:
+            globals_locals["driver"].quit()
+        if "display" in globals_locals:
+            globals_locals["display"].stop()
+        if "parsagon_log" in globals_locals:
+            run_data["log"] = "\n".join(globals_locals["parsagon_log"])
+            logger.info(run_data["log"])
+        if "parsagon_warnings" in globals_locals:
+            run_data["warnings"] = globals_locals["parsagon_warnings"]
+        for proc in psutil.process_iter():
+            try:
+                if proc.name() == "chromedriver":
+                    proc.kill()
+            except psutil.NoSuchProcess:
+                continue
+        run = update_pipeline_run(run["id"], run_data)
+    logger.info("Done.")
+    result = globals_locals["output"]
+    if output_log:
+        if "error" not in run_data:
+            run["output"] = globals_locals["output"]
+        result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
+    if output_file:
+        with open(output_file, "w") as f:
+            json.dump(result, f, indent=4)
+        return
+    else:
+        return result
+def batch_runs(
+    batch_name,
+    program_name,
+    runs,
+    headless=False,
+    ignore_errors=False,
+    error_value=None,
+    rerun_warnings=False,
+    rerun_warning_types=[],
+    rerun_errors=False,
+    verbose=False,
+):
+    # Validate runs
+    if not all(isinstance(run_, dict) for run_ in runs):
+        raise ParsagonException("Runs must be a list of dictionaries")
+    save_file = f"{batch_name}.json"
+    try:
+        with open(save_file) as f:
+            outputs = json.load(f)
+    except FileNotFoundError:
+        outputs = []
+    metadata_file = f"{batch_name}_metadata.json"
+    try:
+        with open(metadata_file) as f:
+            metadata = json.load(f)
+    except FileNotFoundError:
+        metadata = []
+    num_initial_results = len(outputs)
+    error = None
+    variables = None
+    try:
+        default_desc = f'Running program "{program_name}"'
+        with Progress() as progress:
+            task = progress.add_task(default_desc, total=len(runs))
+            for i, variables in progress.track(enumerate(runs), task_id=task):
+                if i < num_initial_results:
+                    if rerun_errors and metadata[i]["status"] == "ERROR":
+                        pass
+                    elif rerun_warnings and metadata[i]["warnings"]:
+                        if not rerun_warning_types or any(
+                            warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]
+                        ):
+                            pass
+                        else:
+                            continue
+                    else:
+                        continue
+                for j in range(3):
+                    result = run(program_name, variables, headless, output_log=True)
+                    if result["status"] != "ERROR":
+                        output = result.pop("output")
+                        if i < num_initial_results:
+                            outputs[i] = output
+                            metadata[i] = result
+                        else:
+                            outputs.append(output)
+                            metadata.append(result)
+                        break
+                    else:
+                        error = result["error"].strip().split("\n")[-1]
+                        if j < 2:
+                            progress.update(
+                                task,
+                                description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)",
+                            )
+                            time.sleep(60)
+                            progress.update(task, description=default_desc)
+                            error = None
+                            continue
+                        else:
+                            if ignore_errors:
+                                error = None
+                                if i < num_initial_results:
+                                    outputs[i] = error_value
+                                else:
+                                    outputs.append(error_value)
+                                break
+                            else:
+                                raise RunFailedException
+    except RunFailedException:
+        pass
+    except Exception as e:
+        error = repr(e)
+    finally:
+        if error:
+            logger.error(
+                f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume."
+            )
+        with open(save_file, "w") as f:
+            json.dump(outputs, f)
+        with open(metadata_file, "w") as f:
+            json.dump(metadata, f)
+    num_warnings = 0
+    num_runs_with_warnings = 0
+    for m in metadata:
+        if m["warnings"]:
+            num_warnings += len(m["warnings"])
+            num_runs_with_warnings += 1
+    logger.info(
+        f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n"
+    )
+    return None if error else outputs

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/settings.py RENAMED Viewed

@@ -3,6 +3,7 @@ import logging
 import sys
 from os import environ
 from pathlib import Path
+import logging.config
 from parsagon.exceptions import ParsagonException
@@ -114,3 +115,7 @@ def get_logging_config(log_level="INFO"):
             },
         },
     }
+def configure_logging(verbose):
+    logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/api_mocks.py RENAMED Viewed

@@ -53,7 +53,7 @@ def mock_httpx_method_func(*args, **kwargs):
             },
         )
-    if match := re.search(r"/pipelines/name/(.+)/$", url):
+    if match := re.search(r"/pipelines/name/(.+)/", url):
         assert method == "get"
         pipeline_name = match.group(1)
         if pipeline_name == not_found_pipeline_name:

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/test_invalid_args.py RENAMED Viewed

@@ -1,23 +1,13 @@
 import pytest
-from parsagon import run
-from parsagon.tests.cli_mocks import call_cli
+from parsagon.main import main
 def test_headless_remote_run_invalid(mocker, debug_logs):
     """
     Tests that we are unable to run a program in headless mode when the environment is remote, and that this is logged to the user.
     """
-    call_cli(
-        mocker,
-        {
-            "func": run,
-            "program_name": "test_program",
-            "headless": True,
-            "remote": True,
-            "verbose": False,
-        },
-    )
+    main(["run", "test_program", "--headless", "--remote"])
     debug_logs_lower = debug_logs.text.lower()
     assert "error" in debug_logs_lower
     assert "headless" in debug_logs_lower

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/test_pipeline_operations.py RENAMED Viewed

@@ -6,7 +6,6 @@ import pytest
 from parsagon import delete, run
 from parsagon.main import main
 from parsagon.tests.api_mocks import install_api_mocks, not_found_pipeline_name
-from parsagon.tests.cli_mocks import call_cli
 def test_pipeline_delete(mocker):
@@ -18,24 +17,10 @@ def test_pipeline_not_found(mocker, debug_logs):
     install_api_mocks(mocker, {"code_to_return": 'raise Exception("Should not exec this code if pipeline not found.")'})
     # On delete
-    call_cli(
-        mocker,
-        {
-            "func": delete,
-            "program_name": not_found_pipeline_name,
-            "verbose": False,
-        },
-    )
+    main(["delete", not_found_pipeline_name, "-y"])
     assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
     debug_logs.clear()
     # On attempted run
-    call_cli(
-        mocker,
-        {
-            "func": run,
-            "program_name": not_found_pipeline_name,
-            "verbose": False,
-        },
-    )
+    main(["run", not_found_pipeline_name])
     assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: parsagon
-Version: 0.12.4
+Version: 0.14.0
 Summary: Allows you to create browser automations with natural language
 Author-email: Sandy Suh <sandy@parsagon.io>
 Project-URL: Homepage, https://parsagon.io

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,6 +11,7 @@ src/parsagon/executor.py
 src/parsagon/highlights.js
 src/parsagon/main.py
 src/parsagon/print.py
+src/parsagon/runs.py
 src/parsagon/secrets.py
 src/parsagon/settings.py
 src/parsagon.egg-info/PKG-INFO
@@ -21,7 +22,6 @@ src/parsagon.egg-info/requires.txt
 src/parsagon.egg-info/top_level.txt
 src/parsagon/tests/__init__.py
 src/parsagon/tests/api_mocks.py
-src/parsagon/tests/cli_mocks.py
 src/parsagon/tests/conftest.py
 src/parsagon/tests/test_executor.py
 src/parsagon/tests/test_invalid_args.py

parsagon-0.12.4/src/parsagon/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from parsagon.main import create, update, detail, run, batch_runs, delete, get_product, get_review_article, get_article_list

parsagon-0.12.4/src/parsagon/main.py DELETED Viewed

@@ -1,454 +0,0 @@
-import argparse
-import datetime
-import json
-import logging
-import logging.config
-import psutil
-import time
-import traceback
-from rich.console import Console
-from rich.progress import Progress
-from rich.prompt import Prompt
-from parsagon.api import (
-    get_program_sketches,
-    create_pipeline,
-    delete_pipeline,
-    add_examples_to_custom_function,
-    create_pipeline_run,
-    update_pipeline_run,
-    get_pipeline,
-    get_pipelines,
-    get_pipeline_code,
-    get_run,
-    poll_extract,
-)
-from parsagon.assistant import assist
-from parsagon.create import create_program
-from parsagon.exceptions import ParsagonException, RunFailedException
-from parsagon.executor import Executor, custom_functions_to_descriptions
-from parsagon.settings import get_api_key, get_settings, clear_settings, save_setting, get_logging_config
-console = Console()
-logger = logging.getLogger(__name__)
-def configure_logging(verbose):
-    logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))
-def get_args():
-    parser = argparse.ArgumentParser(
-        prog="parsagon", description="Scrapes and interacts with web pages based on natural language.", add_help=False
-    )
-    parser.add_argument("-v", "--verbose", action="store_true", help="run the task in verbose mode")
-    subparsers = parser.add_subparsers()
-    # Create
-    parser_create = subparsers.add_parser("create", description="Creates a program.")
-    parser_create.add_argument(
-        "--headless",
-        action="store_true",
-        help="run the browser in headless mode",
-    )
-    parser_create.add_argument(
-        "--infer",
-        action="store_true",
-        help="let Parsagon infer all elements to be scraped",
-    )
-    parser_create.add_argument(
-        "--no_assistant",
-        action="store_true",
-        help="disable the Parsagon assistant",
-    )
-    parser_create.set_defaults(func=create)
-    # Detail
-    parser_detail = subparsers.add_parser(
-        "detail",
-        description="Outputs details of a created program.",
-    )
-    parser_detail.add_argument(
-        "-p",
-        "--program",
-        dest="program_name",
-        type=str,
-        help="the name of the program",
-    )
-    parser_detail.set_defaults(func=detail)
-    # Update
-    parser_update = subparsers.add_parser(
-        "update",
-        description="Updates a created program.",
-    )
-    parser_update.add_argument(
-        "program_name",
-        type=str,
-        help="the name of the program to update",
-    )
-    parser_update.add_argument(
-        "--variables",
-        type=json.loads,
-        default="{}",
-        help="a JSON object mapping variables to values",
-    )
-    parser_update.add_argument(
-        "--headless",
-        action="store_true",
-        help="run the browser in headless mode",
-    )
-    parser_update.add_argument(
-        "--infer",
-        action="store_true",
-        help="let Parsagon infer all elements to be scraped",
-    )
-    parser_update.add_argument(
-        "--replace",
-        action="store_true",
-        help="remove old example data while updating the program",
-    )
-    parser_update.set_defaults(func=update)
-    # Run
-    parser_run = subparsers.add_parser(
-        "run",
-        description="Runs a created program.",
-    )
-    parser_run.add_argument(
-        "program_name",
-        type=str,
-        help="the name of the program to run",
-    )
-    parser_run.add_argument(
-        "--variables",
-        type=json.loads,
-        default="{}",
-        help="a JSON object mapping variables to values",
-    )
-    parser_run.add_argument(
-        "--headless",
-        action="store_true",
-        help="run the browser in headless mode",
-    )
-    parser_run.add_argument(
-        "--remote",
-        action="store_true",
-        help="run the program in the cloud",
-    )
-    parser_run.add_argument(
-        "--output_log",
-        action="store_true",
-        help="output log data from the run",
-    )
-    parser_run.set_defaults(func=run)
-    # Delete
-    parser_delete = subparsers.add_parser(
-        "delete",
-        description="Deletes a program.",
-    )
-    parser_delete.add_argument(
-        "program_name",
-        type=str,
-        help="the name of the program to run",
-    )
-    parser_delete.add_argument(
-        "-y", "--yes", dest="confirm_with_user", action="store_false", help="auto-confirm option"
-    )
-    parser_delete.set_defaults(func=delete)
-    # Setup
-    parser_setup = subparsers.add_parser(
-        "setup",
-        description="Interactively sets up Parsagon with an API key.",
-    )
-    parser_setup.set_defaults(func=setup)
-    args = parser.parse_args()
-    kwargs = vars(args)
-    return kwargs, parser
-def main():
-    kwargs, parser = get_args()
-    func = kwargs.pop("func")
-    verbose = kwargs["verbose"]
-    configure_logging(verbose)
-    if func:
-        try:
-            return func(**kwargs)
-        except ParsagonException as e:
-            error_message = "Error:\n" + e.to_string(verbose)
-            logger.error(error_message)
-    else:
-        parser.print_help()
-def create(headless=False, infer=False, no_assistant=False, verbose=False):
-    task = Prompt.ask("Type what do you want to do")
-    if no_assistant:
-        create_program(task, headless=headless, infer=infer)
-    else:
-        assist(task, headless=headless, infer=infer)
-def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
-    configure_logging(verbose)
-    pipeline = get_pipeline(program_name)
-    abridged_program = pipeline["abridged_sketch"]
-    # Make the program runnable
-    variables_str = ", ".join(f"{k}={repr(v)}" for k, v in variables.items())
-    abridged_program += f"\n\noutput = func({variables_str})\n"
-    # Execute the abridged program to gather examples
-    executor = Executor(headless=headless, infer=infer)
-    executor.execute(abridged_program)
-    while True:
-        program_name_input = input(f"Type \"{program_name}\" to update this program, or press enter without typing a name to CANCEL: ")
-        if not program_name_input:
-            logger.info("Canceled update.")
-            return
-        if program_name_input == program_name:
-            break
-    pipeline_id = pipeline["id"]
-    try:
-        for call_id, custom_function in executor.custom_functions.items():
-            debug_suffix = f" ({custom_function.name})"
-            description = custom_functions_to_descriptions.get(custom_function.name)
-            description = " to " + description if description else ""
-            if verbose:
-                description += debug_suffix
-            logger.info(f"  Saving function{description}...")
-            add_examples_to_custom_function(pipeline_id, call_id, custom_function, replace)
-        logger.info(f"Saved.")
-    except Exception as e:
-        logger.error(f"An error occurred while saving the program. The program was not updated.")
-def detail(program_name=None, verbose=False):
-    if program_name:
-        data = [get_pipeline(program_name)]
-    else:
-        data = get_pipelines()
-    for pipeline in data:
-        print(
-            f"Program: {pipeline['name']}\nDescription: {pipeline['description']}\nVariables: {pipeline['variables']}\n"
-        )
-def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
-    """
-    Executes pipeline code
-    """
-    if headless and remote:
-        raise ParsagonException("Cannot run a program remotely in headless mode")
-    logger.info("Preparing to run program %s", program_name)
-    pipeline_id = get_pipeline(program_name)["id"]
-    if remote:
-        result = create_pipeline_run(pipeline_id, variables, False)
-        with console.status("Program running remotely...") as status:
-            while True:
-                run = get_run(result["id"])
-                status = run["status"]
-                if output_log and status in ("FINISHED", "ERROR"):
-                    return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
-                if status == "FINISHED":
-                    if verbose:
-                        logger.info(run["log"])
-                        for warning in run["warnings"]:
-                            logger.warning(warning)
-                    logger.info("Program finished running.")
-                    return run["output"]
-                elif status == "ERROR":
-                    raise ParsagonException(f"Program failed to run: {run['error']}")
-                elif status == "CANCELED":
-                    raise ParsagonException("Program execution was canceled")
-                time.sleep(5)
-    run = create_pipeline_run(pipeline_id, variables, True)
-    code = get_pipeline_code(program_name, variables, headless)["code"]
-    start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
-    run_data = {"start_time": start_time}
-    logger.info("Running program...")
-    globals_locals = {"PARSAGON_API_KEY": get_api_key()}
-    try:
-        exec(code, globals_locals, globals_locals)
-        run_data["status"] = "FINISHED"
-    except:
-        run_data["status"] = "ERROR"
-        run_data["error"] = str(traceback.format_exc())
-        if not output_log:
-            raise
-    finally:
-        end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
-        run_data["end_time"] = end_time
-        if "driver" in globals_locals:
-            globals_locals["driver"].quit()
-        if "display" in globals_locals:
-            globals_locals["display"].stop()
-        if "parsagon_log" in globals_locals:
-            run_data["log"] = "\n".join(globals_locals["parsagon_log"])
-            logger.info(run_data["log"])
-        if "parsagon_warnings" in globals_locals:
-            run_data["warnings"] = globals_locals["parsagon_warnings"]
-        for proc in psutil.process_iter():
-            try:
-                if proc.name() == "chromedriver":
-                    proc.kill()
-            except psutil.NoSuchProcess:
-                continue
-        run = update_pipeline_run(run["id"], run_data)
-    logger.info("Done.")
-    if output_log:
-        if "error" not in run_data:
-            run["output"] = globals_locals["output"]
-        return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
-    return globals_locals["output"]
-def batch_runs(batch_name, program_name, runs, headless=False, ignore_errors=False, error_value=None, rerun_warnings=False, rerun_warning_types=[], rerun_errors=False, verbose=False):
-    save_file = f"{batch_name}.json"
-    try:
-        with open(save_file) as f:
-            outputs = json.load(f)
-    except FileNotFoundError:
-        outputs = []
-    metadata_file = f"{batch_name}_metadata.json"
-    try:
-        with open(metadata_file) as f:
-            metadata = json.load(f)
-    except FileNotFoundError:
-        metadata = []
-    num_initial_results = len(outputs)
-    error = None
-    variables = None
-    try:
-        default_desc = f'Running program "{program_name}"'
-        with Progress() as progress:
-            task = progress.add_task(default_desc, total=len(runs))
-            for i, variables in progress.track(enumerate(runs), task_id=task):
-                if i < num_initial_results:
-                    if rerun_errors and metadata[i]["status"] == "ERROR":
-                        pass
-                    elif rerun_warnings and metadata[i]["warnings"]:
-                        if not rerun_warning_types or any(warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]):
-                            pass
-                        else:
-                            continue
-                    else:
-                        continue
-                for j in range(3):
-                    result = run(program_name, variables, headless, output_log=True)
-                    if result["status"] != "ERROR":
-                        output = result.pop("output")
-                        if i < num_initial_results:
-                            outputs[i] = output
-                            metadata[i] = result
-                        else:
-                            outputs.append(output)
-                            metadata.append(result)
-                        break
-                    else:
-                        error = result["error"].strip().split("\n")[-1]
-                        if j < 2:
-                            progress.update(task, description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)")
-                            time.sleep(60)
-                            progress.update(task, description=default_desc)
-                            error = None
-                            continue
-                        else:
-                            if ignore_errors:
-                                error = None
-                                if i < num_initial_results:
-                                    outputs[i] = error_value
-                                else:
-                                    outputs.append(error_value)
-                                break
-                            else:
-                                raise RunFailedException
-    except RunFailedException:
-        pass
-    except Exception as e:
-        error = repr(e)
-    finally:
-        configure_logging(verbose)
-        if error:
-            logger.error(f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
-        with open(save_file, "w") as f:
-            json.dump(outputs, f)
-        with open(metadata_file, "w") as f:
-            json.dump(metadata, f)
-    num_warnings = 0
-    num_runs_with_warnings = 0
-    for m in metadata:
-        if m["warnings"]:
-            num_warnings += len(m["warnings"])
-            num_runs_with_warnings += 1
-    logger.info(f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n")
-    return None if error else outputs
-def delete(program_name, verbose=False, confirm_with_user=False):
-    if (
-        confirm_with_user
-        and input(f"Are you sure you want to delete program with name {program_name}? (y/N) ").lower().strip() != "y"
-    ):
-        logger.error("Cancelled operation.")
-        return
-    logger.info("Preparing to delete program %s", program_name)
-    pipeline_id = get_pipeline(program_name)["id"]
-    logger.info("Deleting program...")
-    delete_pipeline(pipeline_id)
-    logger.info("Done.")
-def setup(verbose=False):
-    try:
-        old_api_key = get_api_key()
-    except ParsagonException:
-        old_api_key = None
-    try:
-        save_setting("api_key", None)
-        get_api_key(interactive=True)
-    except KeyboardInterrupt:
-        save_setting("api_key", old_api_key)
-        logger.error("\nCancelled operation.")
-        return
-    logger.info("Setup complete.")
-def _get_data(url, page_type, timeout):
-    start_time = time.time()
-    with console.status("Extracting data...") as status:
-        while time.time() - start_time <= timeout:
-            result = poll_extract(url, page_type)
-            if result["done"]:
-                return result["result"]
-            time.sleep(15)
-    logger.info("No data found")
-    return None
-def get_product(url, timeout=300):
-    return _get_data(url, "PRODUCT_DETAIL", timeout)
-def get_review_article(url, timeout=300):
-    return _get_data(url, "REVIEW_ARTICLE_DETAIL", timeout)
-def get_article_list(url, timeout=300):
-    return _get_data(url, "ARTICLE_LIST", timeout)

parsagon-0.12.4/src/parsagon/tests/cli_mocks.py DELETED Viewed

@@ -1,16 +0,0 @@
-from parsagon.main import main
-def call_cli(mocker, args):
-    """
-    Uses the mocker to pretend that the args passed are coming from argparse, then calls the main function.
-    """
-    mocker.patch(
-        "parsagon.main.get_args",
-        lambda: (
-            args,
-            None,
-        ),
-    )
-    return main()

{parsagon-0.12.4 → parsagon-0.14.0}/README.md RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/setup.cfg RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/__init__.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/custom_function.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/exceptions.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/highlights.js RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/print.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/secrets.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/__init__.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/conftest.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/test_executor.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon/tests/test_secrets.py RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/entry_points.txt RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/requires.txt RENAMED Viewed

File without changes

{parsagon-0.12.4 → parsagon-0.14.0}/src/parsagon.egg-info/top_level.txt RENAMED Viewed

File without changes

parsagon 0.12.4__tar.gz → 0.14.0__tar.gz

parsagon 0.12.4tar.gz → 0.14.0tar.gz