parsagon 0.13.0__tar.gz → 0.14.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {parsagon-0.13.0 → parsagon-0.14.0}/PKG-INFO +1 -1
  2. {parsagon-0.13.0 → parsagon-0.14.0}/pyproject.toml +1 -1
  3. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/api.py +31 -6
  4. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/assistant.py +4 -4
  5. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/create.py +2 -2
  6. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/executor.py +1 -0
  7. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/main.py +23 -22
  8. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/runs.py +25 -18
  9. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/api_mocks.py +1 -1
  10. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/test_invalid_args.py +2 -12
  11. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/test_pipeline_operations.py +2 -17
  12. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/PKG-INFO +1 -1
  13. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/SOURCES.txt +0 -1
  14. parsagon-0.13.0/src/parsagon/tests/cli_mocks.py +0 -16
  15. {parsagon-0.13.0 → parsagon-0.14.0}/README.md +0 -0
  16. {parsagon-0.13.0 → parsagon-0.14.0}/setup.cfg +0 -0
  17. {parsagon-0.13.0 → parsagon-0.14.0}/src/__init__.py +0 -0
  18. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/__init__.py +0 -0
  19. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/custom_function.py +0 -0
  20. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/exceptions.py +0 -0
  21. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/highlights.js +0 -0
  22. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/print.py +0 -0
  23. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/secrets.py +0 -0
  24. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/settings.py +0 -0
  25. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/__init__.py +0 -0
  26. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/conftest.py +0 -0
  27. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/test_executor.py +0 -0
  28. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon/tests/test_secrets.py +0 -0
  29. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/dependency_links.txt +0 -0
  30. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/entry_points.txt +0 -0
  31. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/requires.txt +0 -0
  32. {parsagon-0.13.0 → parsagon-0.14.0}/src/parsagon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -16,7 +16,7 @@ line-length = 120
16
16
 
17
17
  [project]
18
18
  name = "parsagon"
19
- version = "0.13.0"
19
+ version = "0.14.0"
20
20
  description = "Allows you to create browser automations with natural language"
21
21
  readme = "README.md"
22
22
  requires-python = ">=3.8"
@@ -1,5 +1,6 @@
1
1
  from json import JSONDecodeError
2
2
  import time
3
+ from urllib.parse import quote
3
4
 
4
5
  import httpx
5
6
 
@@ -166,7 +167,13 @@ def create_pipeline(name, description, program_sketch, pseudocode, secrets):
166
167
  return _api_call(
167
168
  httpx.post,
168
169
  "/pipelines/",
169
- json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode, "secrets": secrets},
170
+ json={
171
+ "name": name,
172
+ "description": description,
173
+ "program_sketch": program_sketch,
174
+ "pseudocode": pseudocode,
175
+ "secrets": secrets,
176
+ },
170
177
  )
171
178
 
172
179
 
@@ -195,11 +202,21 @@ def add_examples_to_custom_function(pipeline_id, call_id, custom_function, remov
195
202
  )
196
203
 
197
204
 
205
+ def double_quote(string):
206
+ """
207
+ A bug in Django disallows URLs even with quoted slashes as in:
208
+ /api/pipelines/name/stripe%2Fstuff/code/ HTTP/1.1" 405
209
+ Therefore we must double quote
210
+ """
211
+ return quote(quote(string, safe=""), safe="")
212
+
213
+
198
214
  def get_pipeline(pipeline_name):
215
+ escaped_pipeline_name = double_quote(pipeline_name)
199
216
  with RaiseProgramNotFound(pipeline_name):
200
217
  return _api_call(
201
218
  httpx.get,
202
- f"/pipelines/name/{pipeline_name}/",
219
+ f"/pipelines/name/{escaped_pipeline_name}/?double_escaped=True",
203
220
  )
204
221
 
205
222
 
@@ -207,14 +224,16 @@ def get_pipelines():
207
224
  return _api_call(httpx.get, f"/pipelines/")
208
225
 
209
226
 
210
- def get_pipeline_code(pipeline_name, variables, headless):
227
+ def get_pipeline_code(pipeline_name, variables, headless, use_uc):
228
+ escaped_pipeline_name = double_quote(pipeline_name)
211
229
  with RaiseProgramNotFound(pipeline_name):
212
230
  return _api_call(
213
231
  httpx.post,
214
- f"/pipelines/name/{pipeline_name}/code/",
232
+ f"/pipelines/name/{escaped_pipeline_name}/code/?double_escaped=True",
215
233
  json={
216
234
  "variables": variables,
217
235
  "headless": headless,
236
+ "use_uc": use_uc,
218
237
  },
219
238
  )
220
239
 
@@ -246,11 +265,17 @@ def get_run(run_id):
246
265
 
247
266
 
248
267
  def send_assistant_message(message, thread_id=None):
249
- return _api_call(httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id})
268
+ return _api_call(
269
+ httpx.post, "/transformers/send-assistant-message/", json={"message": message, "thread_id": thread_id}
270
+ )
250
271
 
251
272
 
252
273
  def send_assistant_function_outputs(outputs, thread_id, run_id):
253
- return _api_call(httpx.post, "/transformers/send-assistant-function-outputs/", json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id})
274
+ return _api_call(
275
+ httpx.post,
276
+ "/transformers/send-assistant-function-outputs/",
277
+ json={"outputs": outputs, "thread_id": thread_id, "run_id": run_id},
278
+ )
254
279
 
255
280
 
256
281
  def poll_extract(url, page_type):
@@ -4,10 +4,10 @@ from parsagon.create import create_program
4
4
  from parsagon.executor import Executor
5
5
  from parsagon.print import assistant_print, assistant_spinner, browser_print, error_print
6
6
  from rich.prompt import Prompt
7
- from parsagon.runs import run, batch_runs, run_with_file_output
7
+ from parsagon.runs import run, batch_runs
8
8
 
9
9
 
10
- def assist(headless=False, infer=False, verbose=False):
10
+ def assist(verbose=False):
11
11
  task = Prompt.ask("Type what do you want to do")
12
12
  with assistant_spinner():
13
13
  response = send_assistant_message(task)
@@ -37,11 +37,11 @@ def assist(headless=False, infer=False, verbose=False):
37
37
  output["output"] = html
38
38
  outputs.append(output)
39
39
  elif name == "create_program":
40
- result = create_program(args["description"], headless=headless, infer=infer)
40
+ result = create_program(args["description"])
41
41
  output["output"] = json.dumps(result)
42
42
  outputs.append(output)
43
43
  elif name == "run_program":
44
- result = run_with_file_output(**args)
44
+ result = run(**args)
45
45
  output["output"] = json.dumps(result)
46
46
  outputs.append(output)
47
47
  elif name == "batch_runs":
@@ -6,7 +6,7 @@ from parsagon.secrets import extract_secrets
6
6
  from rich.prompt import Prompt
7
7
 
8
8
 
9
- def create_program(task, headless=False, infer=False):
9
+ def create_program(task, headless=False, infer=False, undetected=False):
10
10
  assistant_print("Creating a program based on your specifications...")
11
11
  task, secrets = extract_secrets(task)
12
12
  program_sketches = get_program_sketches(task)
@@ -28,7 +28,7 @@ def create_program(task, headless=False, infer=False):
28
28
  abridged_program += f"\n\noutput = func({args})\n" # Make the program runnable
29
29
 
30
30
  # Execute the abridged program to gather examples
31
- executor = Executor(headless=headless, infer=infer)
31
+ executor = Executor(headless=headless, infer=infer, use_uc=undetected)
32
32
  executor.execute(abridged_program)
33
33
 
34
34
  # The user must select a name
@@ -54,6 +54,7 @@ ELEMENT_TYPES = {
54
54
  "html": "HTML",
55
55
  "element": "ACTION",
56
56
  "textarea": "TEXT",
57
+ "text_markdown": "TEXT",
57
58
  "markdown": "TEXT",
58
59
  "elem_id": "ACTION",
59
60
  }
@@ -17,28 +17,18 @@ from parsagon.assistant import assist
17
17
  from parsagon.create import create_program
18
18
  from parsagon.exceptions import ParsagonException
19
19
  from parsagon.executor import Executor, custom_functions_to_descriptions
20
- from parsagon.runs import run_with_file_output
20
+ from parsagon.runs import run
21
21
  from parsagon.settings import get_api_key, save_setting, configure_logging
22
22
 
23
23
  console = Console()
24
24
  logger = logging.getLogger(__name__)
25
25
 
26
26
 
27
- def get_args():
27
+ def get_args(argv):
28
28
  parser = argparse.ArgumentParser(
29
29
  prog="parsagon", description="Scrapes and interacts with web pages based on natural language.", add_help=False
30
30
  )
31
31
  parser.add_argument("-v", "--verbose", action="store_true", help="run the task in verbose mode")
32
- parser.add_argument(
33
- "--headless",
34
- action="store_true",
35
- help="run the browser in headless mode",
36
- )
37
- parser.add_argument(
38
- "--infer",
39
- action="store_true",
40
- help="let Parsagon infer all elements to be scraped",
41
- )
42
32
  subparsers = parser.add_subparsers()
43
33
 
44
34
  # Create
@@ -53,6 +43,11 @@ def get_args():
53
43
  action="store_true",
54
44
  help="let Parsagon infer all elements to be scraped",
55
45
  )
46
+ parser_create.add_argument(
47
+ "--undetected",
48
+ action="store_true",
49
+ help="run in undetected mode",
50
+ )
56
51
  parser_create.set_defaults(func=create_cli)
57
52
 
58
53
  # Detail
@@ -133,7 +128,17 @@ def get_args():
133
128
  action="store_true",
134
129
  help="output log data from the run",
135
130
  )
136
- parser_run.set_defaults(func=run_with_file_output)
131
+ parser_run.add_argument(
132
+ "--output_file",
133
+ type=str,
134
+ help="write the data to the given file path",
135
+ )
136
+ parser_run.add_argument(
137
+ "--undetected",
138
+ action="store_true",
139
+ help="run in undetected mode",
140
+ )
141
+ parser_run.set_defaults(func=run)
137
142
 
138
143
  # Delete
139
144
  parser_delete = subparsers.add_parser(
@@ -164,20 +169,16 @@ def get_args():
164
169
  )
165
170
  parser_help.set_defaults(func=help, parser=parser)
166
171
 
167
- args = parser.parse_args()
172
+ args = parser.parse_args(argv)
168
173
  kwargs = vars(args)
169
174
  return kwargs, parser
170
175
 
171
176
 
172
- def main():
173
- kwargs, parser = get_args()
177
+ def main(argv=None):
178
+ kwargs, parser = get_args(argv)
174
179
  func = kwargs.pop("func", None)
175
180
  if func is None:
176
181
  func = assist
177
- else:
178
- # Pop assist-only arguments
179
- kwargs.pop("infer")
180
- kwargs.pop("headless")
181
182
  verbose = kwargs["verbose"]
182
183
  configure_logging(verbose)
183
184
 
@@ -188,9 +189,9 @@ def main():
188
189
  logger.error(error_message)
189
190
 
190
191
 
191
- def create_cli(headless=False, infer=False, verbose=False):
192
+ def create_cli(headless=False, infer=False, undetected=False, verbose=False):
192
193
  task = Prompt.ask("Enter a detailed scraping task")
193
- create_program(task, headless=headless, infer=infer)
194
+ create_program(task, headless=headless, infer=infer, undetected=undetected)
194
195
 
195
196
 
196
197
  def update(program_name, variables={}, headless=False, infer=False, replace=False, verbose=False):
@@ -1,5 +1,4 @@
1
1
  import datetime
2
- import datetime
3
2
  import json
4
3
  import logging.config
5
4
  import time
@@ -24,18 +23,7 @@ console = Console()
24
23
  logger = logging.getLogger(__name__)
25
24
 
26
25
 
27
- def run_with_file_output(*args, **kwargs):
28
- dump_path = Prompt.ask("Please enter a path/filename to save the output (in JSON format)")
29
- if not dump_path.endswith(".json"):
30
- dump_path += ".json"
31
- result = run(*args, **kwargs)
32
- with open(dump_path, "w") as f:
33
- json.dump(result, f, indent=4)
34
- print(f"Output saved to {dump_path}")
35
- return result
36
-
37
-
38
- def run(program_name, variables={}, headless=False, remote=False, output_log=False, verbose=False):
26
+ def run(program_name, variables={}, headless=False, remote=False, output_log=False, output_file=None, undetected=False, verbose=False):
39
27
  """
40
28
  Executes pipeline code
41
29
  """
@@ -56,7 +44,13 @@ def run(program_name, variables={}, headless=False, remote=False, output_log=Fal
56
44
  status = run["status"]
57
45
 
58
46
  if output_log and status in ("FINISHED", "ERROR"):
59
- return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
47
+ result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
48
+ if output_file:
49
+ with open(output_file, "w") as f:
50
+ json.dump(result, f, indent=4)
51
+ return
52
+ else:
53
+ return result
60
54
 
61
55
  if status == "FINISHED":
62
56
  if verbose:
@@ -64,7 +58,13 @@ def run(program_name, variables={}, headless=False, remote=False, output_log=Fal
64
58
  for warning in run["warnings"]:
65
59
  logger.warning(warning)
66
60
  logger.info("Program finished running.")
67
- return run["output"]
61
+ result = run["output"]
62
+ if output_file:
63
+ with open(output_file, "w") as f:
64
+ json.dump(result, f, indent=4)
65
+ return
66
+ else:
67
+ return result
68
68
  elif status == "ERROR":
69
69
  raise ParsagonException(f"Program failed to run: {run['error']}")
70
70
  elif status == "CANCELED":
@@ -73,7 +73,7 @@ def run(program_name, variables={}, headless=False, remote=False, output_log=Fal
73
73
  time.sleep(5)
74
74
 
75
75
  run = create_pipeline_run(pipeline_id, variables, True)
76
- code = get_pipeline_code(program_name, variables, headless)["code"]
76
+ code = get_pipeline_code(program_name, variables, headless, undetected)["code"]
77
77
  start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
78
78
  run_data = {"start_time": start_time}
79
79
 
@@ -107,11 +107,18 @@ def run(program_name, variables={}, headless=False, remote=False, output_log=Fal
107
107
  continue
108
108
  run = update_pipeline_run(run["id"], run_data)
109
109
  logger.info("Done.")
110
+
111
+ result = globals_locals["output"]
110
112
  if output_log:
111
113
  if "error" not in run_data:
112
114
  run["output"] = globals_locals["output"]
113
- return {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
114
- return globals_locals["output"]
115
+ result = {k: v for k, v in run.items() if k in ("output", "status", "log", "warnings", "error")}
116
+ if output_file:
117
+ with open(output_file, "w") as f:
118
+ json.dump(result, f, indent=4)
119
+ return
120
+ else:
121
+ return result
115
122
 
116
123
 
117
124
  def batch_runs(
@@ -53,7 +53,7 @@ def mock_httpx_method_func(*args, **kwargs):
53
53
  },
54
54
  )
55
55
 
56
- if match := re.search(r"/pipelines/name/(.+)/$", url):
56
+ if match := re.search(r"/pipelines/name/(.+)/", url):
57
57
  assert method == "get"
58
58
  pipeline_name = match.group(1)
59
59
  if pipeline_name == not_found_pipeline_name:
@@ -1,23 +1,13 @@
1
1
  import pytest
2
2
 
3
- from parsagon import run
4
- from parsagon.tests.cli_mocks import call_cli
3
+ from parsagon.main import main
5
4
 
6
5
 
7
6
  def test_headless_remote_run_invalid(mocker, debug_logs):
8
7
  """
9
8
  Tests that we are unable to run a program in headless mode when the environment is remote, and that this is logged to the user.
10
9
  """
11
- call_cli(
12
- mocker,
13
- {
14
- "func": run,
15
- "program_name": "test_program",
16
- "headless": True,
17
- "remote": True,
18
- "verbose": False,
19
- },
20
- )
10
+ main(["run", "test_program", "--headless", "--remote"])
21
11
  debug_logs_lower = debug_logs.text.lower()
22
12
  assert "error" in debug_logs_lower
23
13
  assert "headless" in debug_logs_lower
@@ -6,7 +6,6 @@ import pytest
6
6
  from parsagon import delete, run
7
7
  from parsagon.main import main
8
8
  from parsagon.tests.api_mocks import install_api_mocks, not_found_pipeline_name
9
- from parsagon.tests.cli_mocks import call_cli
10
9
 
11
10
 
12
11
  def test_pipeline_delete(mocker):
@@ -18,24 +17,10 @@ def test_pipeline_not_found(mocker, debug_logs):
18
17
  install_api_mocks(mocker, {"code_to_return": 'raise Exception("Should not exec this code if pipeline not found.")'})
19
18
 
20
19
  # On delete
21
- call_cli(
22
- mocker,
23
- {
24
- "func": delete,
25
- "program_name": not_found_pipeline_name,
26
- "verbose": False,
27
- },
28
- )
20
+ main(["delete", not_found_pipeline_name, "-y"])
29
21
  assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
30
22
  debug_logs.clear()
31
23
 
32
24
  # On attempted run
33
- call_cli(
34
- mocker,
35
- {
36
- "func": run,
37
- "program_name": not_found_pipeline_name,
38
- "verbose": False,
39
- },
40
- )
25
+ main(["run", not_found_pipeline_name])
41
26
  assert f"A program with name {not_found_pipeline_name} does not exist." in debug_logs.text
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.13.0
3
+ Version: 0.14.0
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -22,7 +22,6 @@ src/parsagon.egg-info/requires.txt
22
22
  src/parsagon.egg-info/top_level.txt
23
23
  src/parsagon/tests/__init__.py
24
24
  src/parsagon/tests/api_mocks.py
25
- src/parsagon/tests/cli_mocks.py
26
25
  src/parsagon/tests/conftest.py
27
26
  src/parsagon/tests/test_executor.py
28
27
  src/parsagon/tests/test_invalid_args.py
@@ -1,16 +0,0 @@
1
- from parsagon.main import main
2
-
3
-
4
- def call_cli(mocker, args):
5
- """
6
- Uses the mocker to pretend that the args passed are coming from argparse, then calls the main function.
7
- """
8
-
9
- mocker.patch(
10
- "parsagon.main.get_args",
11
- lambda: (
12
- args,
13
- None,
14
- ),
15
- )
16
- return main()
File without changes
File without changes
File without changes