parsagon 0.10.20__py3-none-any.whl → 0.10.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsagon/api.py CHANGED
@@ -135,11 +135,11 @@ def get_bool_about_data(data, question):
135
135
  return data["result"]
136
136
 
137
137
 
138
- def create_pipeline(name, description, program_sketch, pseudocode):
138
+ def create_pipeline(name, description, program_sketch, pseudocode, secrets):
139
139
  return _api_call(
140
140
  httpx.post,
141
141
  "/pipelines/",
142
- json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode},
142
+ json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode, "secrets": secrets},
143
143
  )
144
144
 
145
145
 
@@ -192,11 +192,19 @@ def get_pipeline_code(pipeline_name, variables, headless):
192
192
  )
193
193
 
194
194
 
195
- def create_pipeline_run(pipeline_id, variables):
195
+ def create_pipeline_run(pipeline_id, variables, is_local):
196
196
  return _api_call(
197
197
  httpx.post,
198
198
  f"/pipelines/{pipeline_id}/runs/",
199
- json={"variables": variables},
199
+ json={"variables": variables, "is_local": is_local},
200
+ )
201
+
202
+
203
+ def update_pipeline_run(run_id, data):
204
+ return _api_call(
205
+ httpx.patch,
206
+ f"/pipelines/runs/{run_id}/",
207
+ json=data,
200
208
  )
201
209
 
202
210
 
parsagon/exceptions.py CHANGED
@@ -24,3 +24,8 @@ class ProgramNotFoundException(ParsagonException):
24
24
 
25
25
  def to_string(self, verbose):
26
26
  return f"A program with name {self.program} does not exist."
27
+
28
+
29
+ class RunFailedException(ParsagonException):
30
+ """Raised when a run fails."""
31
+ pass
parsagon/main.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import argparse
2
+ import datetime
2
3
  import json
3
4
  import logging
4
5
  import logging.config
5
6
  import psutil
6
7
  import time
8
+ import traceback
7
9
 
8
10
  from halo import Halo
9
11
  from tqdm import tqdm
@@ -15,15 +17,16 @@ from parsagon.api import (
15
17
  create_custom_function,
16
18
  add_examples_to_custom_function,
17
19
  create_pipeline_run,
20
+ update_pipeline_run,
18
21
  get_pipeline,
19
22
  get_pipelines,
20
23
  get_pipeline_code,
21
24
  get_run,
22
25
  poll_data,
23
- APIException,
24
26
  )
25
- from parsagon.exceptions import ParsagonException
27
+ from parsagon.exceptions import ParsagonException, APIException, RunFailedException
26
28
  from parsagon.executor import Executor, custom_functions_to_descriptions
29
+ from parsagon.secrets import extract_secrets
27
30
  from parsagon.settings import get_api_key, get_settings, clear_settings, save_setting, get_logging_config
28
31
 
29
32
  logger = logging.getLogger(__name__)
@@ -139,6 +142,11 @@ def get_args():
139
142
  action="store_true",
140
143
  help="run the program in the cloud",
141
144
  )
145
+ parser_run.add_argument(
146
+ "--output_log",
147
+ action="store_true",
148
+ help="output log data from the run",
149
+ )
142
150
  parser_run.set_defaults(func=run)
143
151
 
144
152
  # Delete
@@ -185,12 +193,15 @@ def main():
185
193
 
186
194
 
187
195
  def create(task=None, program_name=None, headless=False, infer=False, verbose=False):
196
+ configure_logging(verbose)
197
+
188
198
  if task:
189
199
  logger.info("Launched with task description:\n%s", task)
190
200
  else:
191
201
  task = input("Type what you want to do: ")
192
202
 
193
203
  logger.info("Analyzing task description...")
204
+ task, secrets = extract_secrets(task)
194
205
  program_sketches = get_program_sketches(task)
195
206
 
196
207
  full_program = program_sketches["full"]
@@ -198,7 +209,8 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
198
209
  pseudocode = program_sketches["pseudocode"]
199
210
  logger.info(f"Created a program based on task description. Program does the following:\n\n{pseudocode}\n\nNow executing the program to identify web elements to be scraped:\n")
200
211
  logger.debug("Program:\n%s", abridged_program)
201
- abridged_program += "\n\noutput = func()\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n" # Make the program runnable
212
+ args = ", ".join(f"{k}={repr(v)}" for k, v in secrets.items())
213
+ abridged_program += f"\n\noutput = func({args})" + "\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n" # Make the program runnable
202
214
 
203
215
  # Execute the abridged program to gather examples
204
216
  executor = Executor(headless=headless, infer=infer)
@@ -211,7 +223,7 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
211
223
  if program_name:
212
224
  logger.info(f"Saving program as {program_name}")
213
225
  try:
214
- pipeline = create_pipeline(program_name, task, full_program, pseudocode)
226
+ pipeline = create_pipeline(program_name, task, full_program, pseudocode, secrets)
215
227
  except APIException as e:
216
228
  if isinstance(e.value, list) and "Pipeline with name already exists" in e.value:
217
229
  logger.info("A program with this name already exists. Please choose another name.")
@@ -243,6 +255,8 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
243
255
 
244
256
 
245
257
  def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
258
+ configure_logging(verbose)
259
+
246
260
  pipeline = get_pipeline(program_name)
247
261
  abridged_program = pipeline["abridged_sketch"]
248
262
  # Make the program runnable
@@ -274,8 +288,7 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
274
288
  add_examples_to_custom_function(pipeline_id, call_id, custom_function, replace)
275
289
  logger.info(f"Saved.")
276
290
  except Exception as e:
277
- print(e)
278
- logger.info(f"An error occurred while saving the program. The program was not updated.")
291
+ logger.error(f"An error occurred while saving the program. The program was not updated.")
279
292
 
280
293
 
281
294
  def detail(program_name=None, verbose=False):
@@ -289,96 +302,165 @@ def detail(program_name=None, verbose=False):
289
302
  )
290
303
 
291
304
 
292
- def run(program_name, variables={}, headless=False, remote=False, verbose=False):
305
+ def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
293
306
  """
294
307
  Executes pipeline code
295
308
  """
309
+ configure_logging(verbose)
310
+
296
311
  if headless and remote:
297
312
  raise ParsagonException("Cannot run a program remotely in headless mode")
298
313
 
314
+ logger.info("Preparing to run program %s", program_name)
315
+ pipeline_id = get_pipeline(program_name)["id"]
316
+
299
317
  if remote:
300
- pipeline_id = get_pipeline(program_name)["id"]
301
- result = create_pipeline_run(pipeline_id, variables)
318
+ result = create_pipeline_run(pipeline_id, variables, False)
302
319
  with Halo(text="Program running remotely...", spinner="dots"):
303
320
  while True:
304
321
  run = get_run(result["id"])
305
322
  status = run["status"]
323
+
324
+ if output_log and status in ("FINISHED", "ERROR"):
325
+ return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
326
+
306
327
  if status == "FINISHED":
328
+ if verbose:
329
+ logger.info(run["log"])
330
+ for warning in run["warnings"]:
331
+ logger.warning(warning)
307
332
  logger.info("Program finished running.")
308
333
  return run["output"]
309
334
  elif status == "ERROR":
310
335
  raise ParsagonException(f"Program failed to run: {run['error']}")
311
336
  elif status == "CANCELED":
312
337
  raise ParsagonException("Program execution was canceled")
338
+
313
339
  time.sleep(5)
314
340
 
315
- logger.info("Preparing to run program %s", program_name)
341
+ run = create_pipeline_run(pipeline_id, variables, True)
316
342
  code = get_pipeline_code(program_name, variables, headless)["code"]
343
+ start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
344
+ run_data = {"start_time": start_time}
317
345
 
318
346
  logger.info("Running program...")
319
347
  globals_locals = {"PARSAGON_API_KEY": get_api_key()}
320
348
  try:
321
349
  exec(code, globals_locals, globals_locals)
350
+ run_data["status"] = "FINISHED"
351
+ except:
352
+ run_data["status"] = "ERROR"
353
+ run_data["error"] = str(traceback.format_exc())
354
+ if not output_log:
355
+ raise
322
356
  finally:
357
+ end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
358
+ run_data["end_time"] = end_time
323
359
  if "driver" in globals_locals:
324
360
  globals_locals["driver"].quit()
325
361
  if "display" in globals_locals:
326
362
  globals_locals["display"].stop()
363
+ if "parsagon_log" in globals_locals:
364
+ run_data["log"] = "\n".join(globals_locals["parsagon_log"])
365
+ logger.info(run_data["log"])
366
+ if "parsagon_warnings" in globals_locals:
367
+ run_data["warnings"] = globals_locals["parsagon_warnings"]
327
368
  for proc in psutil.process_iter():
328
369
  try:
329
370
  if proc.name() == "chromedriver":
330
371
  proc.kill()
331
372
  except psutil.NoSuchProcess:
332
373
  continue
374
+ run = update_pipeline_run(run["id"], run_data)
333
375
  logger.info("Done.")
376
+ if output_log:
377
+ if "error" not in run_data:
378
+ run["output"] = globals_locals["output"]
379
+ return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
334
380
  return globals_locals["output"]
335
381
 
336
382
 
337
- def batch_runs(batch_name, program_name, runs=[], headless=False, ignore_errors=False, error_value=None):
383
+ def batch_runs(batch_name, program_name, runs=[], headless=False, ignore_errors=False, error_value=None, rerun_warnings=False, rerun_warning_types=[], rerun_errors=False, verbose=False):
384
+ configure_logging(verbose)
385
+
338
386
  save_file = f"{batch_name}.json"
339
387
  try:
340
388
  with open(save_file) as f:
341
- results = json.load(f)
389
+ outputs = json.load(f)
342
390
  except FileNotFoundError:
343
- results = []
344
- num_initial_results = len(results)
391
+ outputs = []
392
+ metadata_file = f"{batch_name}_metadata.json"
393
+ try:
394
+ with open(metadata_file) as f:
395
+ metadata = json.load(f)
396
+ except FileNotFoundError:
397
+ metadata = []
398
+
399
+ num_initial_results = len(outputs)
345
400
  pbar = tqdm(runs)
346
401
  default_desc = f'Running program "{program_name}"'
347
402
  pbar.set_description(default_desc)
348
403
  error = None
349
- error_variables = None
404
+ variables = None
350
405
  try:
351
406
  for i, variables in enumerate(pbar):
352
407
  if i < num_initial_results:
353
- continue
408
+ if rerun_errors and metadata[i]["status"] == "ERROR":
409
+ pass
410
+ elif rerun_warnings and metadata[i]["warnings"]:
411
+ if not rerun_warning_types or any(warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]):
412
+ pass
413
+ else:
414
+ continue
415
+ else:
416
+ continue
354
417
  for j in range(3):
355
- try:
356
- results.append(run(program_name, variables, headless))
418
+ result = run(program_name, variables, headless, output_log=True)
419
+ if result["status"] != "ERROR":
420
+ output = result.pop("output")
421
+ if i < num_initial_results:
422
+ outputs[i] = output
423
+ metadata[i] = result
424
+ else:
425
+ outputs.append(output)
426
+ metadata.append(result)
357
427
  break
358
- except Exception as e:
359
- error = e
360
- error_variables = variables
428
+ else:
429
+ error = result["error"].strip().split("\n")[-1]
361
430
  if j < 2:
362
- pbar.set_description(f"An error occurred: {e} - Waiting 60s before retrying (Attempt {j+2}/3)")
431
+ pbar.set_description(f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)")
363
432
  time.sleep(60)
364
433
  pbar.set_description(default_desc)
365
434
  error = None
366
- error_variables = None
367
435
  continue
368
436
  else:
369
437
  if ignore_errors:
370
438
  error = None
371
- error_variables = None
372
- results.append(error_value)
439
+ if i < num_initial_results:
440
+ outputs[i] = error_value
441
+ else:
442
+ outputs.append(error_value)
373
443
  break
374
444
  else:
375
- raise
445
+ raise RunFailedException
446
+ except RunFailedException:
447
+ logger.error(f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
376
448
  except Exception as e:
377
- logger.error(f"Unresolvable error occurred on run with variables {error_variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
449
+ error = str(e)
450
+ logger.error(f"Unresolvable error occurred while looping over runs: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
378
451
  finally:
379
452
  with open(save_file, "w") as f:
380
- json.dump(results, f)
381
- return None if error else results
453
+ json.dump(outputs, f)
454
+ with open(metadata_file, "w") as f:
455
+ json.dump(metadata, f)
456
+ num_warnings = 0
457
+ num_runs_with_warnings = 0
458
+ for m in metadata:
459
+ if m["warnings"]:
460
+ num_warnings += len(m["warnings"])
461
+ num_runs_with_warnings += 1
462
+ logger.info(f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n")
463
+ return None if error else outputs
382
464
 
383
465
 
384
466
  def delete(program_name, verbose=False, confirm_with_user=False):
parsagon/secrets.py ADDED
@@ -0,0 +1,15 @@
1
+ import ast
2
+ import re
3
+
4
+
5
+ def extract_secrets(task):
6
+ secrets = {}
7
+ matches = list(re.finditer(r'\{\s*(?P<var>[A-Za-z_]+)\s*:\s*(?P<value>"([^"]|\\")*")\}', task))
8
+ for match in matches:
9
+ var_name = match.group("var")
10
+ if not var_name.startswith("SECRET"):
11
+ continue
12
+ new_match = re.sub(r'\{([A-Za-z_]+):\s*"([^"]|\\")*"\}', '{\\1: "******"}', match.group(0))
13
+ task = task.replace(match.group(0), new_match)
14
+ secrets[match.group(1)] = ast.literal_eval(match.group(2))
15
+ return task, secrets
@@ -0,0 +1,53 @@
1
+ import pytest
2
+
3
+ from parsagon.secrets import extract_secrets
4
+
5
+
6
+ def test_non_secrets_are_not_extracted():
7
+ """
8
+ Non-secrets should not be extracted from task descriptions.
9
+ """
10
+ task = 'Go to https://example.com. Type {username: "myusername"} in the username field'
11
+ task, secrets = extract_secrets(task)
12
+ assert secrets == {}
13
+ assert task == 'Go to https://example.com. Type {username: "myusername"} in the username field'
14
+
15
+
16
+ def test_secret_is_extracted():
17
+ """
18
+ A secret should be extracted and replaced in a task description.
19
+ """
20
+ task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword"} in the password field'
21
+ task, secrets = extract_secrets(task)
22
+ assert secrets == {"SECRET_PASSWORD": "mypassword"}
23
+ assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field'
24
+
25
+
26
+ def test_secret_with_quotes_is_extracted():
27
+ """
28
+ A secret with quotes in its value should be extracted and replaced in a task description.
29
+ """
30
+ task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword\\"?!1"} in the password field'
31
+ task, secrets = extract_secrets(task)
32
+ assert secrets == {"SECRET_PASSWORD": 'mypassword"?!1'}
33
+ assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field'
34
+
35
+
36
+ def test_multiple_secrets_are_extracted():
37
+ """
38
+ Multiple secrets should be extracted and replaced in the same task description.
39
+ """
40
+ task = 'Go to https://example.com. Type {SECRET_PASSWORD: "mypassword"} in the password field. Type {SECRET_ADDRESS: "myaddress"} in the address field'
41
+ task, secrets = extract_secrets(task)
42
+ assert secrets == {"SECRET_PASSWORD": "mypassword", "SECRET_ADDRESS": "myaddress"}
43
+ assert task == 'Go to https://example.com. Type {SECRET_PASSWORD: "******"} in the password field. Type {SECRET_ADDRESS: "******"} in the address field'
44
+
45
+
46
+ def test_secrets_mixed_with_non_secrets_are_extracted():
47
+ """
48
+ Multiple secrets should be extracted and replaced in the same task description, and non-secrets should remain the same.
49
+ """
50
+ task = 'Go to https://example.com. Type {USERNAME: "myusername"} in the username field. Type {SECRET_PASSWORD: "mypassword"} in the password field. Type {SECRET_ADDRESS: "myaddress"} in the address field'
51
+ task, secrets = extract_secrets(task)
52
+ assert secrets == {"SECRET_PASSWORD": "mypassword", "SECRET_ADDRESS": "myaddress"}
53
+ assert task == 'Go to https://example.com. Type {USERNAME: "myusername"} in the username field. Type {SECRET_PASSWORD: "******"} in the password field. Type {SECRET_ADDRESS: "******"} in the address field'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.10.20
3
+ Version: 0.10.22
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -1,10 +1,11 @@
1
1
  parsagon/__init__.py,sha256=n4-wiFVVuyW_KOJeNiycggAg9BTa5bbBIVpD_DkdOO4,125
2
- parsagon/api.py,sha256=eJULOzTyWqA4Mio7tH9PszwTrZyxRBI0uO9t1h3R7rw,6634
2
+ parsagon/api.py,sha256=nDTDe0LdDTn1hSXbgqd8j1qxe_3xWm3wZXhrTsmbwOE,6842
3
3
  parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
4
- parsagon/exceptions.py,sha256=NYpFaSLZplBTv9fov_1LKPzDPIqb7Ffe7IunnjntxvA,819
4
+ parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
5
5
  parsagon/executor.py,sha256=e_e9p5eLvf7wYHk1BNJf0j_qt0H17BfivPb8CoOKMHE,22791
6
6
  parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
7
- parsagon/main.py,sha256=yQbIzqJ7Ea6XZJ3Eolx2lTh7Di87qPJvDI0WRNeoX14,14736
7
+ parsagon/main.py,sha256=mHmeXPUskTXyxJvuDnmOKF_MXkaOXB2oYYu5VOAE8s4,18344
8
+ parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
8
9
  parsagon/settings.py,sha256=s5_MsDMFM5tB8U8tfHaFnKibCoEqPnAu8b_ueg07Ftw,2947
9
10
  parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
11
  parsagon/tests/api_mocks.py,sha256=M8xhiyPa1dI8Vx-odDk7ETopfFAfcjfAf-ApmSqgvfw,3127
@@ -13,8 +14,9 @@ parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,42
13
14
  parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
14
15
  parsagon/tests/test_invalid_args.py,sha256=kOjMpbZvviR1CwvXReteZMxBvuhq_rOv5Tm1muBSzNk,676
15
16
  parsagon/tests/test_pipeline_operations.py,sha256=TpBKCuRA8LHYWx3PD_k9mYCSsA_9SZjrOX-rS4mE8XE,1089
16
- parsagon-0.10.20.dist-info/METADATA,sha256=vA0gDtSRv8FeLScDZJHTNQjSgBw2MaAetLcrDJ9bIaw,2410
17
- parsagon-0.10.20.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
18
- parsagon-0.10.20.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
19
- parsagon-0.10.20.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
20
- parsagon-0.10.20.dist-info/RECORD,,
17
+ parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
18
+ parsagon-0.10.22.dist-info/METADATA,sha256=WMKXNXXNse8ftQwxVvwnV9LTuur6NmF2KCOJr9C8yZI,2410
19
+ parsagon-0.10.22.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
20
+ parsagon-0.10.22.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
21
+ parsagon-0.10.22.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
22
+ parsagon-0.10.22.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.2)
2
+ Generator: bdist_wheel (0.41.3)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5