parsagon 0.12.4__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsagon/__init__.py CHANGED
@@ -1 +1,3 @@
1
- from parsagon.main import create, update, detail, run, batch_runs, delete, get_product, get_review_article, get_article_list
1
+ from parsagon.main import update, detail, delete, get_product, get_review_article, get_article_list
2
+ from parsagon.create import create_program as create
3
+ from parsagon.runs import run, batch_runs
parsagon/api.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from json import JSONDecodeError
2
2
  import time
3
+ from urllib.parse import quote
3
4
 
4
5
  import httpx
5
6
 
@@ -166,7 +167,13 @@ def create_pipeline(name, description, program_sketch, pseudocode, secrets):
166
167
  return _api_call(
167
168
  httpx.post,
168
169
  "/pipelines/",
169
- json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode, "secrets": secrets},
170
+ json={
171
+ "name": name,
172
+ "description": description,
173
+ "program_sketch": program_sketch,
174
+ "pseudocode": pseudocode,
175
+ "secrets": secrets,
176
+ },
170
177
  )
171
178
 
172
179
 
@@ -195,11 +202,21 @@ def add_examples_to_custom_function(pipeline_id, call_id, custom_function, remov
195
202
  )
196
203
 
197
204
 
205
+ def double_quote(string):
206
+ """
207
+ A bug in Django disallows URLs even with quoted slashes as in:
208
+ /api/pipelines/name/stripe%2Fstuff/code/ HTTP/1.1" 405
209
+ Therefore we must double quote
210
+ """
211
+ return quote(quote(string, safe=""), safe="")
212
+
213
+
198
214
  def get_pipeline(pipeline_name):
215
+ escaped_pipeline_name = double_quote(pipeline_name)
199
216
  with RaiseProgramNotFound(pipeline_name):
200
217
  return _api_call(
201
218
  httpx.get,
202
- f"/pipelines/name/{pipeline_name}/",
219
+ f"/pipelines/name/{escaped_pipeline_name}/?double_escaped=True",
203
220
  )
204
221
 
205
222
 
@@ -207,14 +224,16 @@ def get_pipelines():
207
224
  return _api_call(httpx.get, f"/pipelines/")
208
225
 
209
226
 
210
- def get_pipeline_code(pipeline_name, variables, headless):
227
+ def get_pipeline_code(pipeline_name, variables, headless, use_uc):
228
+ escaped_pipeline_name = double_quote(pipeline_name)
211
229
  with RaiseProgramNotFound(pipeline_name):
212
230
  return _api_call(
213
231
  httpx.post,
214
- f"/pipelines/name/{pipeline_name}/code/",
232
+ f"/pipelines/name/{escaped_pipeline_name}/code/?double_escaped=True",
215
233
  json={
216
234
  "variables": variables,
217
235
  "headless": headless,
236
+ "use_uc": use_uc,
218
237
  },
219
238
  )
220
239
 
@@ -246,11 +265,17 @@ def get_run(run_id):
246
265
 
247
266
 
248
267
  def send_assistant_message(message, thread_id=None):
249
- return _api_call(httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id})
268
+ return _api_call(
269
+ httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id}
270
+ )
250
271
 
251
272
 
252
273
  def send_assistant_function_outputs(outputs, thread_id, run_id):
253
- return _api_call(httpx.post, "/transformers/send-assistant-function-outputs/", json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id})
274
+ return _api_call(
275
+ httpx.post,
276
+ "/transformers/send-assistant-function-outputs/",
277
+ json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id},
278
+ )
254
279
 
255
280
 
256
281
  def poll_extract(url, page_type):
parsagon/assistant.py CHANGED
@@ -4,9 +4,11 @@ from parsagon.create import create_program
4
4
  from parsagon.executor import Executor
5
5
  from parsagon.print import assistant_print, assistant_spinner, browser_print, error_print
6
6
  from rich.prompt import Prompt
7
+ from parsagon.runs import run, batch_runs
7
8
 
8
9
 
9
- def assist(task, headless, infer):
10
+ def assist(verbose=False):
11
+ task = Prompt.ask("Type what do you want to do")
10
12
  with assistant_spinner():
11
13
  response = send_assistant_message(task)
12
14
  while True:
@@ -35,7 +37,16 @@ def assist(task, headless, infer):
35
37
  output["output"] = html
36
38
  outputs.append(output)
37
39
  elif name == "create_program":
38
- result = create_program(args["description"], headless=headless, infer=infer)
40
+ result = create_program(args["description"])
41
+ output["output"] = json.dumps(result)
42
+ outputs.append(output)
43
+ elif name == "run_program":
44
+ result = run(**args)
45
+ output["output"] = json.dumps(result)
46
+ outputs.append(output)
47
+ elif name == "batch_runs":
48
+ batch_name = input("Please enter a name for the batch run (for saving of intermediate results): ")
49
+ result = batch_runs(batch_name, **args)
39
50
  output["output"] = json.dumps(result)
40
51
  outputs.append(output)
41
52
  with assistant_spinner():
parsagon/create.py CHANGED
@@ -6,7 +6,7 @@ from parsagon.secrets import extract_secrets
6
6
  from rich.prompt import Prompt
7
7
 
8
8
 
9
- def create_program(task, headless=False, infer=False):
9
+ def create_program(task, headless=False, infer=False, undetected=False):
10
10
  assistant_print("Creating a program based on your specifications...")
11
11
  task, secrets = extract_secrets(task)
12
12
  program_sketches = get_program_sketches(task)
@@ -28,7 +28,7 @@ def create_program(task, headless=False, infer=False):
28
28
  abridged_program += f"\n\noutput = func({args})\n" # Make the program runnable
29
29
 
30
30
  # Execute the abridged program to gather examples
31
- executor = Executor(headless=headless, infer=infer)
31
+ executor = Executor(headless=headless, infer=infer, use_uc=undetected)
32
32
  executor.execute(abridged_program)
33
33
 
34
34
  # The user must select a name
parsagon/executor.py CHANGED
@@ -54,6 +54,7 @@ ELEMENT_TYPES = {
54
54
  "html": "HTML",
55
55
  "element": "ACTION",
56
56
  "textarea": "TEXT",
57
+ "text_markdown": "TEXT",
57
58
  "markdown": "TEXT",
58
59
  "elem_id": "ACTION",
59
60
  }
parsagon/main.py CHANGED
@@ -1,44 +1,30 @@
1
1
  import argparse
2
- import datetime
3
2
  import json
4
- import logging
5
3
  import logging.config
6
- import psutil
7
4
  import time
8
- import traceback
9
5
 
10
6
  from rich.console import Console
11
- from rich.progress import Progress
12
7
  from rich.prompt import Prompt
13
8
 
14
9
  from parsagon.api import (
15
- get_program_sketches,
16
- create_pipeline,
17
10
  delete_pipeline,
18
11
  add_examples_to_custom_function,
19
- create_pipeline_run,
20
- update_pipeline_run,
21
12
  get_pipeline,
22
13
  get_pipelines,
23
- get_pipeline_code,
24
- get_run,
25
- poll_extract,
14
+ poll_extract
26
15
  )
27
16
  from parsagon.assistant import assist
28
17
  from parsagon.create import create_program
29
- from parsagon.exceptions import ParsagonException, RunFailedException
18
+ from parsagon.exceptions import ParsagonException
30
19
  from parsagon.executor import Executor, custom_functions_to_descriptions
31
- from parsagon.settings import get_api_key, get_settings, clear_settings, save_setting, get_logging_config
20
+ from parsagon.runs import run
21
+ from parsagon.settings import get_api_key, save_setting, configure_logging
32
22
 
33
23
  console = Console()
34
24
  logger = logging.getLogger(__name__)
35
25
 
36
26
 
37
- def configure_logging(verbose):
38
- logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))
39
-
40
-
41
- def get_args():
27
+ def get_args(argv):
42
28
  parser = argparse.ArgumentParser(
43
29
  prog="parsagon", description="Scrapes and interacts with web pages based on natural language.", add_help=False
44
30
  )
@@ -58,11 +44,11 @@ def get_args():
58
44
  help="let Parsagon infer all elements to be scraped",
59
45
  )
60
46
  parser_create.add_argument(
61
- "--no_assistant",
47
+ "--undetected",
62
48
  action="store_true",
63
- help="disable the Parsagon assistant",
49
+ help="run in undetected mode",
64
50
  )
65
- parser_create.set_defaults(func=create)
51
+ parser_create.set_defaults(func=create_cli)
66
52
 
67
53
  # Detail
68
54
  parser_detail = subparsers.add_parser(
@@ -142,6 +128,16 @@ def get_args():
142
128
  action="store_true",
143
129
  help="output log data from the run",
144
130
  )
131
+ parser_run.add_argument(
132
+ "--output_file",
133
+ type=str,
134
+ help="write the data to the given file path",
135
+ )
136
+ parser_run.add_argument(
137
+ "--undetected",
138
+ action="store_true",
139
+ help="run in undetected mode",
140
+ )
145
141
  parser_run.set_defaults(func=run)
146
142
 
147
143
  # Delete
@@ -166,33 +162,36 @@ def get_args():
166
162
  )
167
163
  parser_setup.set_defaults(func=setup)
168
164
 
169
- args = parser.parse_args()
165
+ # Help
166
+ parser_help = subparsers.add_parser(
167
+ "help",
168
+ description="Shows help.",
169
+ )
170
+ parser_help.set_defaults(func=help, parser=parser)
171
+
172
+ args = parser.parse_args(argv)
170
173
  kwargs = vars(args)
171
174
  return kwargs, parser
172
175
 
173
176
 
174
- def main():
175
- kwargs, parser = get_args()
176
- func = kwargs.pop("func")
177
+ def main(argv=None):
178
+ kwargs, parser = get_args(argv)
179
+ func = kwargs.pop("func", None)
180
+ if func is None:
181
+ func = assist
177
182
  verbose = kwargs["verbose"]
178
183
  configure_logging(verbose)
179
184
 
180
- if func:
181
- try:
182
- return func(**kwargs)
183
- except ParsagonException as e:
184
- error_message = "Error:\n" + e.to_string(verbose)
185
- logger.error(error_message)
186
- else:
187
- parser.print_help()
185
+ try:
186
+ return func(**kwargs)
187
+ except ParsagonException as e:
188
+ error_message = "Error:\n" + e.to_string(verbose)
189
+ logger.error(error_message)
188
190
 
189
191
 
190
- def create(headless=False, infer=False, no_assistant=False, verbose=False):
191
- task = Prompt.ask("Type what do you want to do")
192
- if no_assistant:
193
- create_program(task, headless=headless, infer=infer)
194
- else:
195
- assist(task, headless=headless, infer=infer)
192
+ def create_cli(headless=False, infer=False, undetected=False, verbose=False):
193
+ task = Prompt.ask("Enter a detailed scraping task")
194
+ create_program(task, headless=headless, infer=infer, undetected=undetected)
196
195
 
197
196
 
198
197
  def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
@@ -209,7 +208,9 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
209
208
  executor.execute(abridged_program)
210
209
 
211
210
  while True:
212
- program_name_input = input(f"Type \"{program_name}\" to update this program, or press enter without typing a name to CANCEL: ")
211
+ program_name_input = input(
212
+ f'Type "{program_name}" to update this program, or press enter without typing a name to CANCEL: '
213
+ )
213
214
  if not program_name_input:
214
215
  logger.info("Canceled update.")
215
216
  return
@@ -242,165 +243,6 @@ def detail(program_name=None, verbose=False):
242
243
  )
243
244
 
244
245
 
245
- def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
246
- """
247
- Executes pipeline code
248
- """
249
- if headless and remote:
250
- raise ParsagonException("Cannot run a program remotely in headless mode")
251
-
252
- logger.info("Preparing to run program %s", program_name)
253
- pipeline_id = get_pipeline(program_name)["id"]
254
-
255
- if remote:
256
- result = create_pipeline_run(pipeline_id, variables, False)
257
- with console.status("Program running remotely...") as status:
258
- while True:
259
- run = get_run(result["id"])
260
- status = run["status"]
261
-
262
- if output_log and status in ("FINISHED", "ERROR"):
263
- return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
264
-
265
- if status == "FINISHED":
266
- if verbose:
267
- logger.info(run["log"])
268
- for warning in run["warnings"]:
269
- logger.warning(warning)
270
- logger.info("Program finished running.")
271
- return run["output"]
272
- elif status == "ERROR":
273
- raise ParsagonException(f"Program failed to run: {run['error']}")
274
- elif status == "CANCELED":
275
- raise ParsagonException("Program execution was canceled")
276
-
277
- time.sleep(5)
278
-
279
- run = create_pipeline_run(pipeline_id, variables, True)
280
- code = get_pipeline_code(program_name, variables, headless)["code"]
281
- start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
282
- run_data = {"start_time": start_time}
283
-
284
- logger.info("Running program...")
285
- globals_locals = {"PARSAGON_API_KEY": get_api_key()}
286
- try:
287
- exec(code, globals_locals, globals_locals)
288
- run_data["status"] = "FINISHED"
289
- except:
290
- run_data["status"] = "ERROR"
291
- run_data["error"] = str(traceback.format_exc())
292
- if not output_log:
293
- raise
294
- finally:
295
- end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
296
- run_data["end_time"] = end_time
297
- if "driver" in globals_locals:
298
- globals_locals["driver"].quit()
299
- if "display" in globals_locals:
300
- globals_locals["display"].stop()
301
- if "parsagon_log" in globals_locals:
302
- run_data["log"] = "\n".join(globals_locals["parsagon_log"])
303
- logger.info(run_data["log"])
304
- if "parsagon_warnings" in globals_locals:
305
- run_data["warnings"] = globals_locals["parsagon_warnings"]
306
- for proc in psutil.process_iter():
307
- try:
308
- if proc.name() == "chromedriver":
309
- proc.kill()
310
- except psutil.NoSuchProcess:
311
- continue
312
- run = update_pipeline_run(run["id"], run_data)
313
- logger.info("Done.")
314
- if output_log:
315
- if "error" not in run_data:
316
- run["output"] = globals_locals["output"]
317
- return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
318
- return globals_locals["output"]
319
-
320
-
321
- def batch_runs(batch_name, program_name, runs, headless=False, ignore_errors=False, error_value=None, rerun_warnings=False, rerun_warning_types=[], rerun_errors=False, verbose=False):
322
- save_file = f"{batch_name}.json"
323
- try:
324
- with open(save_file) as f:
325
- outputs = json.load(f)
326
- except FileNotFoundError:
327
- outputs = []
328
- metadata_file = f"{batch_name}_metadata.json"
329
- try:
330
- with open(metadata_file) as f:
331
- metadata = json.load(f)
332
- except FileNotFoundError:
333
- metadata = []
334
-
335
- num_initial_results = len(outputs)
336
- error = None
337
- variables = None
338
- try:
339
- default_desc = f'Running program "{program_name}"'
340
- with Progress() as progress:
341
- task = progress.add_task(default_desc, total=len(runs))
342
- for i, variables in progress.track(enumerate(runs), task_id=task):
343
- if i < num_initial_results:
344
- if rerun_errors and metadata[i]["status"] == "ERROR":
345
- pass
346
- elif rerun_warnings and metadata[i]["warnings"]:
347
- if not rerun_warning_types or any(warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]):
348
- pass
349
- else:
350
- continue
351
- else:
352
- continue
353
- for j in range(3):
354
- result = run(program_name, variables, headless, output_log=True)
355
- if result["status"] != "ERROR":
356
- output = result.pop("output")
357
- if i < num_initial_results:
358
- outputs[i] = output
359
- metadata[i] = result
360
- else:
361
- outputs.append(output)
362
- metadata.append(result)
363
- break
364
- else:
365
- error = result["error"].strip().split("\n")[-1]
366
- if j < 2:
367
- progress.update(task, description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)")
368
- time.sleep(60)
369
- progress.update(task, description=default_desc)
370
- error = None
371
- continue
372
- else:
373
- if ignore_errors:
374
- error = None
375
- if i < num_initial_results:
376
- outputs[i] = error_value
377
- else:
378
- outputs.append(error_value)
379
- break
380
- else:
381
- raise RunFailedException
382
- except RunFailedException:
383
- pass
384
- except Exception as e:
385
- error = repr(e)
386
- finally:
387
- configure_logging(verbose)
388
- if error:
389
- logger.error(f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
390
- with open(save_file, "w") as f:
391
- json.dump(outputs, f)
392
- with open(metadata_file, "w") as f:
393
- json.dump(metadata, f)
394
- num_warnings = 0
395
- num_runs_with_warnings = 0
396
- for m in metadata:
397
- if m["warnings"]:
398
- num_warnings += len(m["warnings"])
399
- num_runs_with_warnings += 1
400
- logger.info(f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n")
401
- return None if error else outputs
402
-
403
-
404
246
  def delete(program_name, verbose=False, confirm_with_user=False):
405
247
  if (
406
248
  confirm_with_user
@@ -430,6 +272,10 @@ def setup(verbose=False):
430
272
  logger.info("Setup complete.")
431
273
 
432
274
 
275
+ def help(parser, verbose):
276
+ parser.print_help()
277
+
278
+
433
279
  def _get_data(url, page_type, timeout):
434
280
  start_time = time.time()
435
281
  with console.status("Extracting data...") as status:
parsagon/runs.py ADDED
@@ -0,0 +1,227 @@
1
+ import datetime
2
+ import json
3
+ import logging.config
4
+ import time
5
+ import traceback
6
+
7
+ import psutil
8
+ from rich.console import Console
9
+ from rich.progress import Progress
10
+ from rich.prompt import Prompt
11
+
12
+ from parsagon.api import (
13
+ create_pipeline_run,
14
+ update_pipeline_run,
15
+ get_pipeline,
16
+ get_pipeline_code,
17
+ get_run,
18
+ )
19
+ from parsagon.exceptions import ParsagonException, RunFailedException
20
+ from parsagon.settings import get_api_key
21
+
22
+ console = Console()
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def run(program_name, variables={}, headless=False, remote=False, output_log=False, output_file=None, undetected=False, verbose=False):
27
+ """
28
+ Executes pipeline code
29
+ """
30
+ if headless and remote:
31
+ raise ParsagonException("Cannot run a program remotely in headless mode")
32
+
33
+ if not isinstance(variables, dict):
34
+ raise ParsagonException("Variables must be a dictionary")
35
+
36
+ logger.info("Preparing to run program %s", program_name)
37
+ pipeline_id = get_pipeline(program_name)["id"]
38
+
39
+ if remote:
40
+ result = create_pipeline_run(pipeline_id, variables, False)
41
+ with console.status("Program running remotely...") as status:
42
+ while True:
43
+ run = get_run(result["id"])
44
+ status = run["status"]
45
+
46
+ if output_log and status in ("FINISHED", "ERROR"):
47
+ result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
48
+ if output_file:
49
+ with open(output_file, "w") as f:
50
+ json.dump(result, f, indent=4)
51
+ return
52
+ else:
53
+ return result
54
+
55
+ if status == "FINISHED":
56
+ if verbose:
57
+ logger.info(run["log"])
58
+ for warning in run["warnings"]:
59
+ logger.warning(warning)
60
+ logger.info("Program finished running.")
61
+ result = run["output"]
62
+ if output_file:
63
+ with open(output_file, "w") as f:
64
+ json.dump(result, f, indent=4)
65
+ return
66
+ else:
67
+ return result
68
+ elif status == "ERROR":
69
+ raise ParsagonException(f"Program failed to run: {run['error']}")
70
+ elif status == "CANCELED":
71
+ raise ParsagonException("Program execution was canceled")
72
+
73
+ time.sleep(5)
74
+
75
+ run = create_pipeline_run(pipeline_id, variables, True)
76
+ code = get_pipeline_code(program_name, variables, headless, undetected)["code"]
77
+ start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
78
+ run_data = {"start_time": start_time}
79
+
80
+ logger.info("Running program...")
81
+ globals_locals = {"PARSAGON_API_KEY": get_api_key()}
82
+ try:
83
+ exec(code, globals_locals, globals_locals)
84
+ run_data["status"] = "FINISHED"
85
+ except:
86
+ run_data["status"] = "ERROR"
87
+ run_data["error"] = str(traceback.format_exc())
88
+ if not output_log:
89
+ raise
90
+ finally:
91
+ end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
92
+ run_data["end_time"] = end_time
93
+ if "driver" in globals_locals:
94
+ globals_locals["driver"].quit()
95
+ if "display" in globals_locals:
96
+ globals_locals["display"].stop()
97
+ if "parsagon_log" in globals_locals:
98
+ run_data["log"] = "\n".join(globals_locals["parsagon_log"])
99
+ logger.info(run_data["log"])
100
+ if "parsagon_warnings" in globals_locals:
101
+ run_data["warnings"] = globals_locals["parsagon_warnings"]
102
+ for proc in psutil.process_iter():
103
+ try:
104
+ if proc.name() == "chromedriver":
105
+ proc.kill()
106
+ except psutil.NoSuchProcess:
107
+ continue
108
+ run = update_pipeline_run(run["id"], run_data)
109
+ logger.info("Done.")
110
+
111
+ result = globals_locals["output"]
112
+ if output_log:
113
+ if "error" not in run_data:
114
+ run["output"] = globals_locals["output"]
115
+ result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
116
+ if output_file:
117
+ with open(output_file, "w") as f:
118
+ json.dump(result, f, indent=4)
119
+ return
120
+ else:
121
+ return result
122
+
123
+
124
+ def batch_runs(
125
+ batch_name,
126
+ program_name,
127
+ runs,
128
+ headless=False,
129
+ ignore_errors=False,
130
+ error_value=None,
131
+ rerun_warnings=False,
132
+ rerun_warning_types=[],
133
+ rerun_errors=False,
134
+ verbose=False,
135
+ ):
136
+ # Validate runs
137
+ if not all(isinstance(run_, dict) for run_ in runs):
138
+ raise ParsagonException("Runs must be a list of dictionaries")
139
+
140
+ save_file = f"{batch_name}.json"
141
+ try:
142
+ with open(save_file) as f:
143
+ outputs = json.load(f)
144
+ except FileNotFoundError:
145
+ outputs = []
146
+ metadata_file = f"{batch_name}_metadata.json"
147
+ try:
148
+ with open(metadata_file) as f:
149
+ metadata = json.load(f)
150
+ except FileNotFoundError:
151
+ metadata = []
152
+
153
+ num_initial_results = len(outputs)
154
+ error = None
155
+ variables = None
156
+ try:
157
+ default_desc = f'Running program "{program_name}"'
158
+ with Progress() as progress:
159
+ task = progress.add_task(default_desc, total=len(runs))
160
+ for i, variables in progress.track(enumerate(runs), task_id=task):
161
+ if i < num_initial_results:
162
+ if rerun_errors and metadata[i]["status"] == "ERROR":
163
+ pass
164
+ elif rerun_warnings and metadata[i]["warnings"]:
165
+ if not rerun_warning_types or any(
166
+ warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]
167
+ ):
168
+ pass
169
+ else:
170
+ continue
171
+ else:
172
+ continue
173
+ for j in range(3):
174
+ result = run(program_name, variables, headless, output_log=True)
175
+ if result["status"] != "ERROR":
176
+ output = result.pop("output")
177
+ if i < num_initial_results:
178
+ outputs[i] = output
179
+ metadata[i] = result
180
+ else:
181
+ outputs.append(output)
182
+ metadata.append(result)
183
+ break
184
+ else:
185
+ error = result["error"].strip().split("\n")[-1]
186
+ if j < 2:
187
+ progress.update(
188
+ task,
189
+ description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)",
190
+ )
191
+ time.sleep(60)
192
+ progress.update(task, description=default_desc)
193
+ error = None
194
+ continue
195
+ else:
196
+ if ignore_errors:
197
+ error = None
198
+ if i < num_initial_results:
199
+ outputs[i] = error_value
200
+ else:
201
+ outputs.append(error_value)
202
+ break
203
+ else:
204
+ raise RunFailedException
205
+ except RunFailedException:
206
+ pass
207
+ except Exception as e:
208
+ error = repr(e)
209
+ finally:
210
+ if error:
211
+ logger.error(
212
+ f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume."
213
+ )
214
+ with open(save_file, "w") as f:
215
+ json.dump(outputs, f)
216
+ with open(metadata_file, "w") as f:
217
+ json.dump(metadata, f)
218
+ num_warnings = 0
219
+ num_runs_with_warnings = 0
220
+ for m in metadata:
221
+ if m["warnings"]:
222
+ num_warnings += len(m["warnings"])
223
+ num_runs_with_warnings += 1
224
+ logger.info(
225
+ f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n"
226
+ )
227
+ return None if error else outputs
parsagon/settings.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import sys
4
4
  from os import environ
5
5
  from pathlib import Path
6
+ import logging.config
6
7
 
7
8
  from parsagon.exceptions import ParsagonException
8
9
 
@@ -114,3 +115,7 @@ def get_logging_config(log_level="INFO"):
114
115
  },
115
116
  },
116
117
  }
118
+
119
+
120
+ def configure_logging(verbose):
121
+ logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))
@@ -53,7 +53,7 @@ def mock_httpx_method_func(*args, **kwargs):
53
53
  },
54
54
  )
55
55
 
56
- if match := re.search(r"/pipelines/name/(.+)/$", url):
56
+ if match := re.search(r"/pipelines/name/(.+)/", url):
57
57
  assert method == "get"
58
58
  pipeline_name = match.group(1)
59
59
  if pipeline_name == not_found_pipeline_name:
@@ -1,23 +1,13 @@
1
1
  import pytest
2
2
 
3
- from parsagon import run
4
- from parsagon.tests.cli_mocks import call_cli
3
+ from parsagon.main import main
5
4
 
6
5
 
7
6
  def test_headless_remote_run_invalid(mocker, debug_logs):
8
7
  """
9
8
  Tests that we are unable to run a program in headless mode when the environment is remote, and that this is logged to the user.
10
9
  """
11
- call_cli(
12
- mocker,
13
- {
14
- "func": run,
15
- "program_name": "test_program",
16
- "headless": True,
17
- "remote": True,
18
- "verbose": False,
19
- },
20
- )
10
+ main(["run", "test_program", "--headless", "--remote"])
21
11
  debug_logs_lower = debug_logs.text.lower()
22
12
  assert "error" in debug_logs_lower
23
13
  assert "headless" in debug_logs_lower
@@ -6,7 +6,6 @@ import pytest
6
6
  from parsagon import delete, run
7
7
  from parsagon.main import main
8
8
  from parsagon.tests.api_mocks import install_api_mocks, not_found_pipeline_name
9
- from parsagon.tests.cli_mocks import call_cli
10
9
 
11
10
 
12
11
  def test_pipeline_delete(mocker):
@@ -18,24 +17,10 @@ def test_pipeline_not_found(mocker, debug_logs):
18
17
  install_api_mocks(mocker, {"code_to_return": 'raise Exception("Should not exec this code if pipeline not found.")'})
19
18
 
20
19
  # On delete
21
- call_cli(
22
- mocker,
23
- {
24
- "func": delete,
25
- "program_name": not_found_pipeline_name,
26
- "verbose": False,
27
- },
28
- )
20
+ main(["delete", not_found_pipeline_name, "-y"])
29
21
  assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
30
22
  debug_logs.clear()
31
23
 
32
24
  # On attempted run
33
- call_cli(
34
- mocker,
35
- {
36
- "func": run,
37
- "program_name": not_found_pipeline_name,
38
- "verbose": False,
39
- },
40
- )
25
+ main(["run", not_found_pipeline_name])
41
26
  assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.12.4
3
+ Version: 0.14.0
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -0,0 +1,25 @@
1
+ parsagon/__init__.py,sha256=2bUc4R8F8Mz_AEiTnWTiBAe8Yc-i4GgM_TTiFfQHuS8,195
2
+ parsagon/api.py,sha256=Fqly1gOs3Rcn7Ct6cYJN0QXfMv-_rd5W2woXrxlUXg0,8710
3
+ parsagon/assistant.py,sha256=vCjOFRFxkA0uVuihtj4vu7cDm7fL9LJezTzvTpw7uqQ,2966
4
+ parsagon/create.py,sha256=UD0FsAHRfvYMbzYQvNnbgGaSl7ph4w9wSZLfteiT2Kg,3384
5
+ parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
6
+ parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
7
+ parsagon/executor.py,sha256=csstgKXxT2L7_qQUL4kDL_qHHlM6A3ocEuSPZXTeu6k,23604
8
+ parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
9
+ parsagon/main.py,sha256=R2aUxxOvaLK91OriOJdnoBSQ35EkbBC3OniQvlkodg8,8865
10
+ parsagon/print.py,sha256=w1pg6BPZNjFdjneXla2AVZCDxWXXtQ5kPSGr2SBTwag,399
11
+ parsagon/runs.py,sha256=qhxwNUCtdApthwLVhOWAY8o6JPVSluW1s4ImkifLlcU,8521
12
+ parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
13
+ parsagon/settings.py,sha256=6SXcxoYlACrq1iiMpvX-4OVpYuzyGvU8Nf_FQ7L0uZk,3085
14
+ parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ parsagon/tests/api_mocks.py,sha256=A5zuP0MH23g1JAqt2JWponEChayWF6t99ANkrhiX-7k,3126
16
+ parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,428
17
+ parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
18
+ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEKQYaHek,482
19
+ parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
20
+ parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
21
+ parsagon-0.14.0.dist-info/METADATA,sha256=llI4zum5BNN8DE5EVfRZvabjzdwSGfI_fNzn7asn33o,2380
22
+ parsagon-0.14.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
23
+ parsagon-0.14.0.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
24
+ parsagon-0.14.0.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
25
+ parsagon-0.14.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,16 +0,0 @@
1
- from parsagon.main import main
2
-
3
-
4
- def call_cli(mocker, args):
5
- """
6
- Uses the mocker to pretend that the args passed are coming from argparse, then calls the main function.
7
- """
8
-
9
- mocker.patch(
10
- "parsagon.main.get_args",
11
- lambda: (
12
- args,
13
- None,
14
- ),
15
- )
16
- return main()
@@ -1,25 +0,0 @@
1
- parsagon/__init__.py,sha256=n4-wiFVVuyW_KOJeNiycggAg9BTa5bbBIVpD_DkdOO4,125
2
- parsagon/api.py,sha256=5tpxSp637iEKGkzyt7YVdOa0PQdtCnVl91RNHjEJUW8,8098
3
- parsagon/assistant.py,sha256=yc0f5AYXjBAQcKQqhtcyH0shYFhVNfC_VINBkEZwZrc,2402
4
- parsagon/create.py,sha256=3rJ17O2zMRdAuGMCOXPgE6e2KqzORgaobkMKBiMAhqQ,3347
5
- parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
6
- parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
7
- parsagon/executor.py,sha256=q0b44sVeFBV0rNQi5_5gLw__QzS6XPA8A2PRsVazk0E,23575
8
- parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
9
- parsagon/main.py,sha256=dQPH5wMowrDbcQljwd2W4Lyq3o41srFXVoRF2O14XXc,15665
10
- parsagon/print.py,sha256=w1pg6BPZNjFdjneXla2AVZCDxWXXtQ5kPSGr2SBTwag,399
11
- parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
12
- parsagon/settings.py,sha256=s5_MsDMFM5tB8U8tfHaFnKibCoEqPnAu8b_ueg07Ftw,2947
13
- parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- parsagon/tests/api_mocks.py,sha256=M8xhiyPa1dI8Vx-odDk7ETopfFAfcjfAf-ApmSqgvfw,3127
15
- parsagon/tests/cli_mocks.py,sha256=Y4W_wgH6ixQRCk8xVdWOwDJ_ChD09XdZEV2xUVXWFiM,327
16
- parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,428
17
- parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
18
- parsagon/tests/test_invalid_args.py,sha256=kOjMpbZvviR1CwvXReteZMxBvuhq_rOv5Tm1muBSzNk,676
19
- parsagon/tests/test_pipeline_operations.py,sha256=TpBKCuRA8LHYWx3PD_k9mYCSsA_9SZjrOX-rS4mE8XE,1089
20
- parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
21
- parsagon-0.12.4.dist-info/METADATA,sha256=BjBWs2zC7TGgfKhigLEaAP3jHRQFkqNRSKqm5arf5dg,2380
22
- parsagon-0.12.4.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
23
- parsagon-0.12.4.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
24
- parsagon-0.12.4.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
25
- parsagon-0.12.4.dist-info/RECORD,,