parsagon 0.12.4__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/__init__.py +3 -1
- parsagon/api.py +31 -6
- parsagon/assistant.py +13 -2
- parsagon/create.py +2 -2
- parsagon/executor.py +1 -0
- parsagon/main.py +46 -200
- parsagon/runs.py +227 -0
- parsagon/settings.py +5 -0
- parsagon/tests/api_mocks.py +1 -1
- parsagon/tests/test_invalid_args.py +2 -12
- parsagon/tests/test_pipeline_operations.py +2 -17
- {parsagon-0.12.4.dist-info → parsagon-0.14.0.dist-info}/METADATA +1 -1
- parsagon-0.14.0.dist-info/RECORD +25 -0
- {parsagon-0.12.4.dist-info → parsagon-0.14.0.dist-info}/WHEEL +1 -1
- parsagon/tests/cli_mocks.py +0 -16
- parsagon-0.12.4.dist-info/RECORD +0 -25
- {parsagon-0.12.4.dist-info → parsagon-0.14.0.dist-info}/entry_points.txt +0 -0
- {parsagon-0.12.4.dist-info → parsagon-0.14.0.dist-info}/top_level.txt +0 -0
parsagon/__init__.py
CHANGED
@@ -1 +1,3 @@
|
|
1
|
-
from parsagon.main import
|
1
|
+
from parsagon.main import update, detail, delete, get_product, get_review_article, get_article_list
|
2
|
+
from parsagon.create import create_program as create
|
3
|
+
from parsagon.runs import run, batch_runs
|
parsagon/api.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from json import JSONDecodeError
|
2
2
|
import time
|
3
|
+
from urllib.parse import quote
|
3
4
|
|
4
5
|
import httpx
|
5
6
|
|
@@ -166,7 +167,13 @@ def create_pipeline(name, description, program_sketch, pseudocode, secrets):
|
|
166
167
|
return _api_call(
|
167
168
|
httpx.post,
|
168
169
|
"/pipelines/",
|
169
|
-
json={
|
170
|
+
json={
|
171
|
+
"name": name,
|
172
|
+
"description": description,
|
173
|
+
"program_sketch": program_sketch,
|
174
|
+
"pseudocode": pseudocode,
|
175
|
+
"secrets": secrets,
|
176
|
+
},
|
170
177
|
)
|
171
178
|
|
172
179
|
|
@@ -195,11 +202,21 @@ def add_examples_to_custom_function(pipeline_id, call_id, custom_function, remov
|
|
195
202
|
)
|
196
203
|
|
197
204
|
|
205
|
+
def double_quote(string):
|
206
|
+
"""
|
207
|
+
A bug in Django disallows URLs even with quoted slashes as in:
|
208
|
+
/api/pipelines/name/stripe%2Fstuff/code/ HTTP/1.1" 405
|
209
|
+
Therefore we must double quote
|
210
|
+
"""
|
211
|
+
return quote(quote(string, safe=""), safe="")
|
212
|
+
|
213
|
+
|
198
214
|
def get_pipeline(pipeline_name):
|
215
|
+
escaped_pipeline_name = double_quote(pipeline_name)
|
199
216
|
with RaiseProgramNotFound(pipeline_name):
|
200
217
|
return _api_call(
|
201
218
|
httpx.get,
|
202
|
-
f"/pipelines/name/{
|
219
|
+
f"/pipelines/name/{escaped_pipeline_name}/?double_escaped=True",
|
203
220
|
)
|
204
221
|
|
205
222
|
|
@@ -207,14 +224,16 @@ def get_pipelines():
|
|
207
224
|
return _api_call(httpx.get, f"/pipelines/")
|
208
225
|
|
209
226
|
|
210
|
-
def get_pipeline_code(pipeline_name, variables, headless):
|
227
|
+
def get_pipeline_code(pipeline_name, variables, headless, use_uc):
|
228
|
+
escaped_pipeline_name = double_quote(pipeline_name)
|
211
229
|
with RaiseProgramNotFound(pipeline_name):
|
212
230
|
return _api_call(
|
213
231
|
httpx.post,
|
214
|
-
f"/pipelines/name/{
|
232
|
+
f"/pipelines/name/{escaped_pipeline_name}/code/?double_escaped=True",
|
215
233
|
json={
|
216
234
|
"variables": variables,
|
217
235
|
"headless": headless,
|
236
|
+
"use_uc": use_uc,
|
218
237
|
},
|
219
238
|
)
|
220
239
|
|
@@ -246,11 +265,17 @@ def get_run(run_id):
|
|
246
265
|
|
247
266
|
|
248
267
|
def send_assistant_message(message, thread_id=None):
|
249
|
-
return _api_call(
|
268
|
+
return _api_call(
|
269
|
+
httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id}
|
270
|
+
)
|
250
271
|
|
251
272
|
|
252
273
|
def send_assistant_function_outputs(outputs, thread_id, run_id):
|
253
|
-
return _api_call(
|
274
|
+
return _api_call(
|
275
|
+
httpx.post,
|
276
|
+
"/transformers/send-assistant-function-outputs/",
|
277
|
+
json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id},
|
278
|
+
)
|
254
279
|
|
255
280
|
|
256
281
|
def poll_extract(url, page_type):
|
parsagon/assistant.py
CHANGED
@@ -4,9 +4,11 @@ from parsagon.create import create_program
|
|
4
4
|
from parsagon.executor import Executor
|
5
5
|
from parsagon.print import assistant_print, assistant_spinner, browser_print, error_print
|
6
6
|
from rich.prompt import Prompt
|
7
|
+
from parsagon.runs import run, batch_runs
|
7
8
|
|
8
9
|
|
9
|
-
def assist(
|
10
|
+
def assist(verbose=False):
|
11
|
+
task = Prompt.ask("Type what do you want to do")
|
10
12
|
with assistant_spinner():
|
11
13
|
response = send_assistant_message(task)
|
12
14
|
while True:
|
@@ -35,7 +37,16 @@ def assist(task, headless, infer):
|
|
35
37
|
output["output"] = html
|
36
38
|
outputs.append(output)
|
37
39
|
elif name == "create_program":
|
38
|
-
result = create_program(args["description"]
|
40
|
+
result = create_program(args["description"])
|
41
|
+
output["output"] = json.dumps(result)
|
42
|
+
outputs.append(output)
|
43
|
+
elif name == "run_program":
|
44
|
+
result = run(**args)
|
45
|
+
output["output"] = json.dumps(result)
|
46
|
+
outputs.append(output)
|
47
|
+
elif name == "batch_runs":
|
48
|
+
batch_name = input("Please enter a name for the batch run (for saving of intermediate results): ")
|
49
|
+
result = batch_runs(batch_name, **args)
|
39
50
|
output["output"] = json.dumps(result)
|
40
51
|
outputs.append(output)
|
41
52
|
with assistant_spinner():
|
parsagon/create.py
CHANGED
@@ -6,7 +6,7 @@ from parsagon.secrets import extract_secrets
|
|
6
6
|
from rich.prompt import Prompt
|
7
7
|
|
8
8
|
|
9
|
-
def create_program(task, headless=False, infer=False):
|
9
|
+
def create_program(task, headless=False, infer=False, undetected=False):
|
10
10
|
assistant_print("Creating a program based on your specifications...")
|
11
11
|
task, secrets = extract_secrets(task)
|
12
12
|
program_sketches = get_program_sketches(task)
|
@@ -28,7 +28,7 @@ def create_program(task, headless=False, infer=False):
|
|
28
28
|
abridged_program += f"\n\noutput = func({args})\n" # Make the program runnable
|
29
29
|
|
30
30
|
# Execute the abridged program to gather examples
|
31
|
-
executor = Executor(headless=headless, infer=infer)
|
31
|
+
executor = Executor(headless=headless, infer=infer, use_uc=undetected)
|
32
32
|
executor.execute(abridged_program)
|
33
33
|
|
34
34
|
# The user must select a name
|
parsagon/executor.py
CHANGED
parsagon/main.py
CHANGED
@@ -1,44 +1,30 @@
|
|
1
1
|
import argparse
|
2
|
-
import datetime
|
3
2
|
import json
|
4
|
-
import logging
|
5
3
|
import logging.config
|
6
|
-
import psutil
|
7
4
|
import time
|
8
|
-
import traceback
|
9
5
|
|
10
6
|
from rich.console import Console
|
11
|
-
from rich.progress import Progress
|
12
7
|
from rich.prompt import Prompt
|
13
8
|
|
14
9
|
from parsagon.api import (
|
15
|
-
get_program_sketches,
|
16
|
-
create_pipeline,
|
17
10
|
delete_pipeline,
|
18
11
|
add_examples_to_custom_function,
|
19
|
-
create_pipeline_run,
|
20
|
-
update_pipeline_run,
|
21
12
|
get_pipeline,
|
22
13
|
get_pipelines,
|
23
|
-
|
24
|
-
get_run,
|
25
|
-
poll_extract,
|
14
|
+
poll_extract
|
26
15
|
)
|
27
16
|
from parsagon.assistant import assist
|
28
17
|
from parsagon.create import create_program
|
29
|
-
from parsagon.exceptions import ParsagonException
|
18
|
+
from parsagon.exceptions import ParsagonException
|
30
19
|
from parsagon.executor import Executor, custom_functions_to_descriptions
|
31
|
-
from parsagon.
|
20
|
+
from parsagon.runs import run
|
21
|
+
from parsagon.settings import get_api_key, save_setting, configure_logging
|
32
22
|
|
33
23
|
console = Console()
|
34
24
|
logger = logging.getLogger(__name__)
|
35
25
|
|
36
26
|
|
37
|
-
def
|
38
|
-
logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))
|
39
|
-
|
40
|
-
|
41
|
-
def get_args():
|
27
|
+
def get_args(argv):
|
42
28
|
parser = argparse.ArgumentParser(
|
43
29
|
prog="parsagon", description="Scrapes and interacts with web pages based on natural language.", add_help=False
|
44
30
|
)
|
@@ -58,11 +44,11 @@ def get_args():
|
|
58
44
|
help="let Parsagon infer all elements to be scraped",
|
59
45
|
)
|
60
46
|
parser_create.add_argument(
|
61
|
-
"--
|
47
|
+
"--undetected",
|
62
48
|
action="store_true",
|
63
|
-
help="
|
49
|
+
help="run in undetected mode",
|
64
50
|
)
|
65
|
-
parser_create.set_defaults(func=
|
51
|
+
parser_create.set_defaults(func=create_cli)
|
66
52
|
|
67
53
|
# Detail
|
68
54
|
parser_detail = subparsers.add_parser(
|
@@ -142,6 +128,16 @@ def get_args():
|
|
142
128
|
action="store_true",
|
143
129
|
help="output log data from the run",
|
144
130
|
)
|
131
|
+
parser_run.add_argument(
|
132
|
+
"--output_file",
|
133
|
+
type=str,
|
134
|
+
help="write the data to the given file path",
|
135
|
+
)
|
136
|
+
parser_run.add_argument(
|
137
|
+
"--undetected",
|
138
|
+
action="store_true",
|
139
|
+
help="run in undetected mode",
|
140
|
+
)
|
145
141
|
parser_run.set_defaults(func=run)
|
146
142
|
|
147
143
|
# Delete
|
@@ -166,33 +162,36 @@ def get_args():
|
|
166
162
|
)
|
167
163
|
parser_setup.set_defaults(func=setup)
|
168
164
|
|
169
|
-
|
165
|
+
# Help
|
166
|
+
parser_help = subparsers.add_parser(
|
167
|
+
"help",
|
168
|
+
description="Shows help.",
|
169
|
+
)
|
170
|
+
parser_help.set_defaults(func=help, parser=parser)
|
171
|
+
|
172
|
+
args = parser.parse_args(argv)
|
170
173
|
kwargs = vars(args)
|
171
174
|
return kwargs, parser
|
172
175
|
|
173
176
|
|
174
|
-
def main():
|
175
|
-
kwargs, parser = get_args()
|
176
|
-
func = kwargs.pop("func")
|
177
|
+
def main(argv=None):
|
178
|
+
kwargs, parser = get_args(argv)
|
179
|
+
func = kwargs.pop("func", None)
|
180
|
+
if func is None:
|
181
|
+
func = assist
|
177
182
|
verbose = kwargs["verbose"]
|
178
183
|
configure_logging(verbose)
|
179
184
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
logger.error(error_message)
|
186
|
-
else:
|
187
|
-
parser.print_help()
|
185
|
+
try:
|
186
|
+
return func(**kwargs)
|
187
|
+
except ParsagonException as e:
|
188
|
+
error_message = "Error:\n" + e.to_string(verbose)
|
189
|
+
logger.error(error_message)
|
188
190
|
|
189
191
|
|
190
|
-
def
|
191
|
-
task = Prompt.ask("
|
192
|
-
|
193
|
-
create_program(task, headless=headless, infer=infer)
|
194
|
-
else:
|
195
|
-
assist(task, headless=headless, infer=infer)
|
192
|
+
def create_cli(headless=False, infer=False, undetected=False, verbose=False):
|
193
|
+
task = Prompt.ask("Enter a detailed scraping task")
|
194
|
+
create_program(task, headless=headless, infer=infer, undetected=undetected)
|
196
195
|
|
197
196
|
|
198
197
|
def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
|
@@ -209,7 +208,9 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
|
|
209
208
|
executor.execute(abridged_program)
|
210
209
|
|
211
210
|
while True:
|
212
|
-
program_name_input = input(
|
211
|
+
program_name_input = input(
|
212
|
+
f'Type "{program_name}" to update this program, or press enter without typing a name to CANCEL: '
|
213
|
+
)
|
213
214
|
if not program_name_input:
|
214
215
|
logger.info("Canceled update.")
|
215
216
|
return
|
@@ -242,165 +243,6 @@ def detail(program_name=None, verbose=False):
|
|
242
243
|
)
|
243
244
|
|
244
245
|
|
245
|
-
def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
|
246
|
-
"""
|
247
|
-
Executes pipeline code
|
248
|
-
"""
|
249
|
-
if headless and remote:
|
250
|
-
raise ParsagonException("Cannot run a program remotely in headless mode")
|
251
|
-
|
252
|
-
logger.info("Preparing to run program %s", program_name)
|
253
|
-
pipeline_id = get_pipeline(program_name)["id"]
|
254
|
-
|
255
|
-
if remote:
|
256
|
-
result = create_pipeline_run(pipeline_id, variables, False)
|
257
|
-
with console.status("Program running remotely...") as status:
|
258
|
-
while True:
|
259
|
-
run = get_run(result["id"])
|
260
|
-
status = run["status"]
|
261
|
-
|
262
|
-
if output_log and status in ("FINISHED", "ERROR"):
|
263
|
-
return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
|
264
|
-
|
265
|
-
if status == "FINISHED":
|
266
|
-
if verbose:
|
267
|
-
logger.info(run["log"])
|
268
|
-
for warning in run["warnings"]:
|
269
|
-
logger.warning(warning)
|
270
|
-
logger.info("Program finished running.")
|
271
|
-
return run["output"]
|
272
|
-
elif status == "ERROR":
|
273
|
-
raise ParsagonException(f"Program failed to run: {run['error']}")
|
274
|
-
elif status == "CANCELED":
|
275
|
-
raise ParsagonException("Program execution was canceled")
|
276
|
-
|
277
|
-
time.sleep(5)
|
278
|
-
|
279
|
-
run = create_pipeline_run(pipeline_id, variables, True)
|
280
|
-
code = get_pipeline_code(program_name, variables, headless)["code"]
|
281
|
-
start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
282
|
-
run_data = {"start_time": start_time}
|
283
|
-
|
284
|
-
logger.info("Running program...")
|
285
|
-
globals_locals = {"PARSAGON_API_KEY": get_api_key()}
|
286
|
-
try:
|
287
|
-
exec(code, globals_locals, globals_locals)
|
288
|
-
run_data["status"] = "FINISHED"
|
289
|
-
except:
|
290
|
-
run_data["status"] = "ERROR"
|
291
|
-
run_data["error"] = str(traceback.format_exc())
|
292
|
-
if not output_log:
|
293
|
-
raise
|
294
|
-
finally:
|
295
|
-
end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
296
|
-
run_data["end_time"] = end_time
|
297
|
-
if "driver" in globals_locals:
|
298
|
-
globals_locals["driver"].quit()
|
299
|
-
if "display" in globals_locals:
|
300
|
-
globals_locals["display"].stop()
|
301
|
-
if "parsagon_log" in globals_locals:
|
302
|
-
run_data["log"] = "\n".join(globals_locals["parsagon_log"])
|
303
|
-
logger.info(run_data["log"])
|
304
|
-
if "parsagon_warnings" in globals_locals:
|
305
|
-
run_data["warnings"] = globals_locals["parsagon_warnings"]
|
306
|
-
for proc in psutil.process_iter():
|
307
|
-
try:
|
308
|
-
if proc.name() == "chromedriver":
|
309
|
-
proc.kill()
|
310
|
-
except psutil.NoSuchProcess:
|
311
|
-
continue
|
312
|
-
run = update_pipeline_run(run["id"], run_data)
|
313
|
-
logger.info("Done.")
|
314
|
-
if output_log:
|
315
|
-
if "error" not in run_data:
|
316
|
-
run["output"] = globals_locals["output"]
|
317
|
-
return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
|
318
|
-
return globals_locals["output"]
|
319
|
-
|
320
|
-
|
321
|
-
def batch_runs(batch_name, program_name, runs, headless=False, ignore_errors=False, error_value=None, rerun_warnings=False, rerun_warning_types=[], rerun_errors=False, verbose=False):
|
322
|
-
save_file = f"{batch_name}.json"
|
323
|
-
try:
|
324
|
-
with open(save_file) as f:
|
325
|
-
outputs = json.load(f)
|
326
|
-
except FileNotFoundError:
|
327
|
-
outputs = []
|
328
|
-
metadata_file = f"{batch_name}_metadata.json"
|
329
|
-
try:
|
330
|
-
with open(metadata_file) as f:
|
331
|
-
metadata = json.load(f)
|
332
|
-
except FileNotFoundError:
|
333
|
-
metadata = []
|
334
|
-
|
335
|
-
num_initial_results = len(outputs)
|
336
|
-
error = None
|
337
|
-
variables = None
|
338
|
-
try:
|
339
|
-
default_desc = f'Running program "{program_name}"'
|
340
|
-
with Progress() as progress:
|
341
|
-
task = progress.add_task(default_desc, total=len(runs))
|
342
|
-
for i, variables in progress.track(enumerate(runs), task_id=task):
|
343
|
-
if i < num_initial_results:
|
344
|
-
if rerun_errors and metadata[i]["status"] == "ERROR":
|
345
|
-
pass
|
346
|
-
elif rerun_warnings and metadata[i]["warnings"]:
|
347
|
-
if not rerun_warning_types or any(warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]):
|
348
|
-
pass
|
349
|
-
else:
|
350
|
-
continue
|
351
|
-
else:
|
352
|
-
continue
|
353
|
-
for j in range(3):
|
354
|
-
result = run(program_name, variables, headless, output_log=True)
|
355
|
-
if result["status"] != "ERROR":
|
356
|
-
output = result.pop("output")
|
357
|
-
if i < num_initial_results:
|
358
|
-
outputs[i] = output
|
359
|
-
metadata[i] = result
|
360
|
-
else:
|
361
|
-
outputs.append(output)
|
362
|
-
metadata.append(result)
|
363
|
-
break
|
364
|
-
else:
|
365
|
-
error = result["error"].strip().split("\n")[-1]
|
366
|
-
if j < 2:
|
367
|
-
progress.update(task, description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)")
|
368
|
-
time.sleep(60)
|
369
|
-
progress.update(task, description=default_desc)
|
370
|
-
error = None
|
371
|
-
continue
|
372
|
-
else:
|
373
|
-
if ignore_errors:
|
374
|
-
error = None
|
375
|
-
if i < num_initial_results:
|
376
|
-
outputs[i] = error_value
|
377
|
-
else:
|
378
|
-
outputs.append(error_value)
|
379
|
-
break
|
380
|
-
else:
|
381
|
-
raise RunFailedException
|
382
|
-
except RunFailedException:
|
383
|
-
pass
|
384
|
-
except Exception as e:
|
385
|
-
error = repr(e)
|
386
|
-
finally:
|
387
|
-
configure_logging(verbose)
|
388
|
-
if error:
|
389
|
-
logger.error(f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume.")
|
390
|
-
with open(save_file, "w") as f:
|
391
|
-
json.dump(outputs, f)
|
392
|
-
with open(metadata_file, "w") as f:
|
393
|
-
json.dump(metadata, f)
|
394
|
-
num_warnings = 0
|
395
|
-
num_runs_with_warnings = 0
|
396
|
-
for m in metadata:
|
397
|
-
if m["warnings"]:
|
398
|
-
num_warnings += len(m["warnings"])
|
399
|
-
num_runs_with_warnings += 1
|
400
|
-
logger.info(f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n")
|
401
|
-
return None if error else outputs
|
402
|
-
|
403
|
-
|
404
246
|
def delete(program_name, verbose=False, confirm_with_user=False):
|
405
247
|
if (
|
406
248
|
confirm_with_user
|
@@ -430,6 +272,10 @@ def setup(verbose=False):
|
|
430
272
|
logger.info("Setup complete.")
|
431
273
|
|
432
274
|
|
275
|
+
def help(parser, verbose):
|
276
|
+
parser.print_help()
|
277
|
+
|
278
|
+
|
433
279
|
def _get_data(url, page_type, timeout):
|
434
280
|
start_time = time.time()
|
435
281
|
with console.status("Extracting data...") as status:
|
parsagon/runs.py
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
import datetime
|
2
|
+
import json
|
3
|
+
import logging.config
|
4
|
+
import time
|
5
|
+
import traceback
|
6
|
+
|
7
|
+
import psutil
|
8
|
+
from rich.console import Console
|
9
|
+
from rich.progress import Progress
|
10
|
+
from rich.prompt import Prompt
|
11
|
+
|
12
|
+
from parsagon.api import (
|
13
|
+
create_pipeline_run,
|
14
|
+
update_pipeline_run,
|
15
|
+
get_pipeline,
|
16
|
+
get_pipeline_code,
|
17
|
+
get_run,
|
18
|
+
)
|
19
|
+
from parsagon.exceptions import ParsagonException, RunFailedException
|
20
|
+
from parsagon.settings import get_api_key
|
21
|
+
|
22
|
+
console = Console()
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
def run(program_name, variables={}, headless=False, remote=False, output_log=False, output_file=None, undetected=False, verbose=False):
|
27
|
+
"""
|
28
|
+
Executes pipeline code
|
29
|
+
"""
|
30
|
+
if headless and remote:
|
31
|
+
raise ParsagonException("Cannot run a program remotely in headless mode")
|
32
|
+
|
33
|
+
if not isinstance(variables, dict):
|
34
|
+
raise ParsagonException("Variables must be a dictionary")
|
35
|
+
|
36
|
+
logger.info("Preparing to run program %s", program_name)
|
37
|
+
pipeline_id = get_pipeline(program_name)["id"]
|
38
|
+
|
39
|
+
if remote:
|
40
|
+
result = create_pipeline_run(pipeline_id, variables, False)
|
41
|
+
with console.status("Program running remotely...") as status:
|
42
|
+
while True:
|
43
|
+
run = get_run(result["id"])
|
44
|
+
status = run["status"]
|
45
|
+
|
46
|
+
if output_log and status in ("FINISHED", "ERROR"):
|
47
|
+
result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
|
48
|
+
if output_file:
|
49
|
+
with open(output_file, "w") as f:
|
50
|
+
json.dump(result, f, indent=4)
|
51
|
+
return
|
52
|
+
else:
|
53
|
+
return result
|
54
|
+
|
55
|
+
if status == "FINISHED":
|
56
|
+
if verbose:
|
57
|
+
logger.info(run["log"])
|
58
|
+
for warning in run["warnings"]:
|
59
|
+
logger.warning(warning)
|
60
|
+
logger.info("Program finished running.")
|
61
|
+
result = run["output"]
|
62
|
+
if output_file:
|
63
|
+
with open(output_file, "w") as f:
|
64
|
+
json.dump(result, f, indent=4)
|
65
|
+
return
|
66
|
+
else:
|
67
|
+
return result
|
68
|
+
elif status == "ERROR":
|
69
|
+
raise ParsagonException(f"Program failed to run: {run['error']}")
|
70
|
+
elif status == "CANCELED":
|
71
|
+
raise ParsagonException("Program execution was canceled")
|
72
|
+
|
73
|
+
time.sleep(5)
|
74
|
+
|
75
|
+
run = create_pipeline_run(pipeline_id, variables, True)
|
76
|
+
code = get_pipeline_code(program_name, variables, headless, undetected)["code"]
|
77
|
+
start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
78
|
+
run_data = {"start_time": start_time}
|
79
|
+
|
80
|
+
logger.info("Running program...")
|
81
|
+
globals_locals = {"PARSAGON_API_KEY": get_api_key()}
|
82
|
+
try:
|
83
|
+
exec(code, globals_locals, globals_locals)
|
84
|
+
run_data["status"] = "FINISHED"
|
85
|
+
except:
|
86
|
+
run_data["status"] = "ERROR"
|
87
|
+
run_data["error"] = str(traceback.format_exc())
|
88
|
+
if not output_log:
|
89
|
+
raise
|
90
|
+
finally:
|
91
|
+
end_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
92
|
+
run_data["end_time"] = end_time
|
93
|
+
if "driver" in globals_locals:
|
94
|
+
globals_locals["driver"].quit()
|
95
|
+
if "display" in globals_locals:
|
96
|
+
globals_locals["display"].stop()
|
97
|
+
if "parsagon_log" in globals_locals:
|
98
|
+
run_data["log"] = "\n".join(globals_locals["parsagon_log"])
|
99
|
+
logger.info(run_data["log"])
|
100
|
+
if "parsagon_warnings" in globals_locals:
|
101
|
+
run_data["warnings"] = globals_locals["parsagon_warnings"]
|
102
|
+
for proc in psutil.process_iter():
|
103
|
+
try:
|
104
|
+
if proc.name() == "chromedriver":
|
105
|
+
proc.kill()
|
106
|
+
except psutil.NoSuchProcess:
|
107
|
+
continue
|
108
|
+
run = update_pipeline_run(run["id"], run_data)
|
109
|
+
logger.info("Done.")
|
110
|
+
|
111
|
+
result = globals_locals["output"]
|
112
|
+
if output_log:
|
113
|
+
if "error" not in run_data:
|
114
|
+
run["output"] = globals_locals["output"]
|
115
|
+
result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
|
116
|
+
if output_file:
|
117
|
+
with open(output_file, "w") as f:
|
118
|
+
json.dump(result, f, indent=4)
|
119
|
+
return
|
120
|
+
else:
|
121
|
+
return result
|
122
|
+
|
123
|
+
|
124
|
+
def batch_runs(
|
125
|
+
batch_name,
|
126
|
+
program_name,
|
127
|
+
runs,
|
128
|
+
headless=False,
|
129
|
+
ignore_errors=False,
|
130
|
+
error_value=None,
|
131
|
+
rerun_warnings=False,
|
132
|
+
rerun_warning_types=[],
|
133
|
+
rerun_errors=False,
|
134
|
+
verbose=False,
|
135
|
+
):
|
136
|
+
# Validate runs
|
137
|
+
if not all(isinstance(run_, dict) for run_ in runs):
|
138
|
+
raise ParsagonException("Runs must be a list of dictionaries")
|
139
|
+
|
140
|
+
save_file = f"{batch_name}.json"
|
141
|
+
try:
|
142
|
+
with open(save_file) as f:
|
143
|
+
outputs = json.load(f)
|
144
|
+
except FileNotFoundError:
|
145
|
+
outputs = []
|
146
|
+
metadata_file = f"{batch_name}_metadata.json"
|
147
|
+
try:
|
148
|
+
with open(metadata_file) as f:
|
149
|
+
metadata = json.load(f)
|
150
|
+
except FileNotFoundError:
|
151
|
+
metadata = []
|
152
|
+
|
153
|
+
num_initial_results = len(outputs)
|
154
|
+
error = None
|
155
|
+
variables = None
|
156
|
+
try:
|
157
|
+
default_desc = f'Running program "{program_name}"'
|
158
|
+
with Progress() as progress:
|
159
|
+
task = progress.add_task(default_desc, total=len(runs))
|
160
|
+
for i, variables in progress.track(enumerate(runs), task_id=task):
|
161
|
+
if i < num_initial_results:
|
162
|
+
if rerun_errors and metadata[i]["status"] == "ERROR":
|
163
|
+
pass
|
164
|
+
elif rerun_warnings and metadata[i]["warnings"]:
|
165
|
+
if not rerun_warning_types or any(
|
166
|
+
warning["type"] in rerun_warning_types for warning in metadata[i]["warnings"]
|
167
|
+
):
|
168
|
+
pass
|
169
|
+
else:
|
170
|
+
continue
|
171
|
+
else:
|
172
|
+
continue
|
173
|
+
for j in range(3):
|
174
|
+
result = run(program_name, variables, headless, output_log=True)
|
175
|
+
if result["status"] != "ERROR":
|
176
|
+
output = result.pop("output")
|
177
|
+
if i < num_initial_results:
|
178
|
+
outputs[i] = output
|
179
|
+
metadata[i] = result
|
180
|
+
else:
|
181
|
+
outputs.append(output)
|
182
|
+
metadata.append(result)
|
183
|
+
break
|
184
|
+
else:
|
185
|
+
error = result["error"].strip().split("\n")[-1]
|
186
|
+
if j < 2:
|
187
|
+
progress.update(
|
188
|
+
task,
|
189
|
+
description=f"An error occurred: {error} - Waiting 60s before retrying (Attempt {j+2}/3)",
|
190
|
+
)
|
191
|
+
time.sleep(60)
|
192
|
+
progress.update(task, description=default_desc)
|
193
|
+
error = None
|
194
|
+
continue
|
195
|
+
else:
|
196
|
+
if ignore_errors:
|
197
|
+
error = None
|
198
|
+
if i < num_initial_results:
|
199
|
+
outputs[i] = error_value
|
200
|
+
else:
|
201
|
+
outputs.append(error_value)
|
202
|
+
break
|
203
|
+
else:
|
204
|
+
raise RunFailedException
|
205
|
+
except RunFailedException:
|
206
|
+
pass
|
207
|
+
except Exception as e:
|
208
|
+
error = repr(e)
|
209
|
+
finally:
|
210
|
+
if error:
|
211
|
+
logger.error(
|
212
|
+
f"Unresolvable error occurred on run with variables {variables}: {error} - Data has been saved to {save_file}. Rerun your command to resume."
|
213
|
+
)
|
214
|
+
with open(save_file, "w") as f:
|
215
|
+
json.dump(outputs, f)
|
216
|
+
with open(metadata_file, "w") as f:
|
217
|
+
json.dump(metadata, f)
|
218
|
+
num_warnings = 0
|
219
|
+
num_runs_with_warnings = 0
|
220
|
+
for m in metadata:
|
221
|
+
if m["warnings"]:
|
222
|
+
num_warnings += len(m["warnings"])
|
223
|
+
num_runs_with_warnings += 1
|
224
|
+
logger.info(
|
225
|
+
f"\nSummary: {len(outputs)} runs made; {num_warnings} warnings encountered across {num_runs_with_warnings} runs. See {metadata_file} for logs.\n"
|
226
|
+
)
|
227
|
+
return None if error else outputs
|
parsagon/settings.py
CHANGED
@@ -3,6 +3,7 @@ import logging
|
|
3
3
|
import sys
|
4
4
|
from os import environ
|
5
5
|
from pathlib import Path
|
6
|
+
import logging.config
|
6
7
|
|
7
8
|
from parsagon.exceptions import ParsagonException
|
8
9
|
|
@@ -114,3 +115,7 @@ def get_logging_config(log_level="INFO"):
|
|
114
115
|
},
|
115
116
|
},
|
116
117
|
}
|
118
|
+
|
119
|
+
|
120
|
+
def configure_logging(verbose):
|
121
|
+
logging.config.dictConfig(get_logging_config("DEBUG" if verbose else "INFO"))
|
parsagon/tests/api_mocks.py
CHANGED
@@ -53,7 +53,7 @@ def mock_httpx_method_func(*args, **kwargs):
|
|
53
53
|
},
|
54
54
|
)
|
55
55
|
|
56
|
-
if match := re.search(r"/pipelines/name/(.+)
|
56
|
+
if match := re.search(r"/pipelines/name/(.+)/", url):
|
57
57
|
assert method == "get"
|
58
58
|
pipeline_name = match.group(1)
|
59
59
|
if pipeline_name == not_found_pipeline_name:
|
@@ -1,23 +1,13 @@
|
|
1
1
|
import pytest
|
2
2
|
|
3
|
-
from parsagon import
|
4
|
-
from parsagon.tests.cli_mocks import call_cli
|
3
|
+
from parsagon.main import main
|
5
4
|
|
6
5
|
|
7
6
|
def test_headless_remote_run_invalid(mocker, debug_logs):
|
8
7
|
"""
|
9
8
|
Tests that we are unable to run a program in headless mode when the environment is remote, and that this is logged to the user.
|
10
9
|
"""
|
11
|
-
|
12
|
-
mocker,
|
13
|
-
{
|
14
|
-
"func": run,
|
15
|
-
"program_name": "test_program",
|
16
|
-
"headless": True,
|
17
|
-
"remote": True,
|
18
|
-
"verbose": False,
|
19
|
-
},
|
20
|
-
)
|
10
|
+
main(["run", "test_program", "--headless", "--remote"])
|
21
11
|
debug_logs_lower = debug_logs.text.lower()
|
22
12
|
assert "error" in debug_logs_lower
|
23
13
|
assert "headless" in debug_logs_lower
|
@@ -6,7 +6,6 @@ import pytest
|
|
6
6
|
from parsagon import delete, run
|
7
7
|
from parsagon.main import main
|
8
8
|
from parsagon.tests.api_mocks import install_api_mocks, not_found_pipeline_name
|
9
|
-
from parsagon.tests.cli_mocks import call_cli
|
10
9
|
|
11
10
|
|
12
11
|
def test_pipeline_delete(mocker):
|
@@ -18,24 +17,10 @@ def test_pipeline_not_found(mocker, debug_logs):
|
|
18
17
|
install_api_mocks(mocker, {"code_to_return": 'raise Exception("Should not exec this code if pipeline not found.")'})
|
19
18
|
|
20
19
|
# On delete
|
21
|
-
|
22
|
-
mocker,
|
23
|
-
{
|
24
|
-
"func": delete,
|
25
|
-
"program_name": not_found_pipeline_name,
|
26
|
-
"verbose": False,
|
27
|
-
},
|
28
|
-
)
|
20
|
+
main(["delete", not_found_pipeline_name, "-y"])
|
29
21
|
assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
|
30
22
|
debug_logs.clear()
|
31
23
|
|
32
24
|
# On attempted run
|
33
|
-
|
34
|
-
mocker,
|
35
|
-
{
|
36
|
-
"func": run,
|
37
|
-
"program_name": not_found_pipeline_name,
|
38
|
-
"verbose": False,
|
39
|
-
},
|
40
|
-
)
|
25
|
+
main(["run", not_found_pipeline_name])
|
41
26
|
assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
|
@@ -0,0 +1,25 @@
|
|
1
|
+
parsagon/__init__.py,sha256=2bUc4R8F8Mz_AEiTnWTiBAe8Yc-i4GgM_TTiFfQHuS8,195
|
2
|
+
parsagon/api.py,sha256=Fqly1gOs3Rcn7Ct6cYJN0QXfMv-_rd5W2woXrxlUXg0,8710
|
3
|
+
parsagon/assistant.py,sha256=vCjOFRFxkA0uVuihtj4vu7cDm7fL9LJezTzvTpw7uqQ,2966
|
4
|
+
parsagon/create.py,sha256=UD0FsAHRfvYMbzYQvNnbgGaSl7ph4w9wSZLfteiT2Kg,3384
|
5
|
+
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
|
+
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
7
|
+
parsagon/executor.py,sha256=csstgKXxT2L7_qQUL4kDL_qHHlM6A3ocEuSPZXTeu6k,23604
|
8
|
+
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
9
|
+
parsagon/main.py,sha256=R2aUxxOvaLK91OriOJdnoBSQ35EkbBC3OniQvlkodg8,8865
|
10
|
+
parsagon/print.py,sha256=w1pg6BPZNjFdjneXla2AVZCDxWXXtQ5kPSGr2SBTwag,399
|
11
|
+
parsagon/runs.py,sha256=qhxwNUCtdApthwLVhOWAY8o6JPVSluW1s4ImkifLlcU,8521
|
12
|
+
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
13
|
+
parsagon/settings.py,sha256=6SXcxoYlACrq1iiMpvX-4OVpYuzyGvU8Nf_FQ7L0uZk,3085
|
14
|
+
parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
+
parsagon/tests/api_mocks.py,sha256=A5zuP0MH23g1JAqt2JWponEChayWF6t99ANkrhiX-7k,3126
|
16
|
+
parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,428
|
17
|
+
parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
|
18
|
+
parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEKQYaHek,482
|
19
|
+
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
20
|
+
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
21
|
+
parsagon-0.14.0.dist-info/METADATA,sha256=llI4zum5BNN8DE5EVfRZvabjzdwSGfI_fNzn7asn33o,2380
|
22
|
+
parsagon-0.14.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
23
|
+
parsagon-0.14.0.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
24
|
+
parsagon-0.14.0.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
25
|
+
parsagon-0.14.0.dist-info/RECORD,,
|
parsagon/tests/cli_mocks.py
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
from parsagon.main import main
|
2
|
-
|
3
|
-
|
4
|
-
def call_cli(mocker, args):
|
5
|
-
"""
|
6
|
-
Uses the mocker to pretend that the args passed are coming from argparse, then calls the main function.
|
7
|
-
"""
|
8
|
-
|
9
|
-
mocker.patch(
|
10
|
-
"parsagon.main.get_args",
|
11
|
-
lambda: (
|
12
|
-
args,
|
13
|
-
None,
|
14
|
-
),
|
15
|
-
)
|
16
|
-
return main()
|
parsagon-0.12.4.dist-info/RECORD
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
parsagon/__init__.py,sha256=n4-wiFVVuyW_KOJeNiycggAg9BTa5bbBIVpD_DkdOO4,125
|
2
|
-
parsagon/api.py,sha256=5tpxSp637iEKGkzyt7YVdOa0PQdtCnVl91RNHjEJUW8,8098
|
3
|
-
parsagon/assistant.py,sha256=yc0f5AYXjBAQcKQqhtcyH0shYFhVNfC_VINBkEZwZrc,2402
|
4
|
-
parsagon/create.py,sha256=3rJ17O2zMRdAuGMCOXPgE6e2KqzORgaobkMKBiMAhqQ,3347
|
5
|
-
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
|
-
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
7
|
-
parsagon/executor.py,sha256=q0b44sVeFBV0rNQi5_5gLw__QzS6XPA8A2PRsVazk0E,23575
|
8
|
-
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
9
|
-
parsagon/main.py,sha256=dQPH5wMowrDbcQljwd2W4Lyq3o41srFXVoRF2O14XXc,15665
|
10
|
-
parsagon/print.py,sha256=w1pg6BPZNjFdjneXla2AVZCDxWXXtQ5kPSGr2SBTwag,399
|
11
|
-
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
12
|
-
parsagon/settings.py,sha256=s5_MsDMFM5tB8U8tfHaFnKibCoEqPnAu8b_ueg07Ftw,2947
|
13
|
-
parsagon/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
parsagon/tests/api_mocks.py,sha256=M8xhiyPa1dI8Vx-odDk7ETopfFAfcjfAf-ApmSqgvfw,3127
|
15
|
-
parsagon/tests/cli_mocks.py,sha256=Y4W_wgH6ixQRCk8xVdWOwDJ_ChD09XdZEV2xUVXWFiM,327
|
16
|
-
parsagon/tests/conftest.py,sha256=KMlHohc0QT77HzumraIojzKeqroyxarnaT6naJDNvEc,428
|
17
|
-
parsagon/tests/test_executor.py,sha256=n3cmh84r74siSeJqUeAIwjjnNzDVPEdxcvYAeJ4hNX8,645
|
18
|
-
parsagon/tests/test_invalid_args.py,sha256=kOjMpbZvviR1CwvXReteZMxBvuhq_rOv5Tm1muBSzNk,676
|
19
|
-
parsagon/tests/test_pipeline_operations.py,sha256=TpBKCuRA8LHYWx3PD_k9mYCSsA_9SZjrOX-rS4mE8XE,1089
|
20
|
-
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
21
|
-
parsagon-0.12.4.dist-info/METADATA,sha256=BjBWs2zC7TGgfKhigLEaAP3jHRQFkqNRSKqm5arf5dg,2380
|
22
|
-
parsagon-0.12.4.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
23
|
-
parsagon-0.12.4.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
24
|
-
parsagon-0.12.4.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
25
|
-
parsagon-0.12.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|