parsagon 0.14.37__py3-none-any.whl → 0.14.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/api.py +7 -0
- parsagon/executor.py +19 -0
- parsagon/main.py +28 -4
- {parsagon-0.14.37.dist-info → parsagon-0.14.38.dist-info}/METADATA +1 -1
- {parsagon-0.14.37.dist-info → parsagon-0.14.38.dist-info}/RECORD +8 -8
- {parsagon-0.14.37.dist-info → parsagon-0.14.38.dist-info}/WHEEL +0 -0
- {parsagon-0.14.37.dist-info → parsagon-0.14.38.dist-info}/entry_points.txt +0 -0
- {parsagon-0.14.37.dist-info → parsagon-0.14.38.dist-info}/top_level.txt +0 -0
parsagon/api.py
CHANGED
@@ -239,6 +239,13 @@ def get_pipeline(pipeline_name):
|
|
239
239
|
)
|
240
240
|
|
241
241
|
|
242
|
+
def get_pipeline_by_id(pipeline_id):
|
243
|
+
results = _api_call(httpx.get, f"/pipelines/{pipeline_id}/")
|
244
|
+
if not results:
|
245
|
+
raise ProgramNotFoundException(pipeline_id)
|
246
|
+
return results[0]
|
247
|
+
|
248
|
+
|
242
249
|
def get_pipelines():
|
243
250
|
return _api_call(httpx.get, f"/pipelines/")
|
244
251
|
|
parsagon/executor.py
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
import base64
|
1
2
|
from collections import defaultdict
|
2
3
|
import copy
|
3
4
|
import dateutil.parser
|
@@ -11,8 +12,10 @@ import time
|
|
11
12
|
from urllib.parse import urljoin
|
12
13
|
|
13
14
|
import html2text
|
15
|
+
import httpx
|
14
16
|
from lxml import etree
|
15
17
|
import lxml.html
|
18
|
+
from lxml.html.clean import Cleaner
|
16
19
|
from pypdf import PdfReader
|
17
20
|
from pyvirtualdisplay import Display
|
18
21
|
import undetected_chromedriver as uc
|
@@ -132,6 +135,7 @@ class Executor:
|
|
132
135
|
self.execution_context = {
|
133
136
|
"custom_assert": self.custom_assert,
|
134
137
|
"goto": self.goto,
|
138
|
+
"goto_lite": self.goto_lite,
|
135
139
|
"close_window": self.close_window,
|
136
140
|
"click_elem": self.click_elem,
|
137
141
|
"click_elem_by_id": self.click_elem_by_id,
|
@@ -380,6 +384,21 @@ class Executor:
|
|
380
384
|
|
381
385
|
return self.driver.current_window_handle
|
382
386
|
|
387
|
+
def goto_lite(self, url, window_id=None):
|
388
|
+
if window_id in self.driver.window_handles:
|
389
|
+
self.driver.switch_to.window(window_id)
|
390
|
+
else:
|
391
|
+
self.driver.switch_to.new_window("tab")
|
392
|
+
# This is usually called in programs that use a proxy, but executor does not use proxies
|
393
|
+
with httpx.Client(verify=False) as client:
|
394
|
+
r = client.get(url, timeout=60)
|
395
|
+
cleaner = Cleaner()
|
396
|
+
cleaner.javascript = True
|
397
|
+
html = lxml.html.tostring(cleaner.clean_html(lxml.html.fromstring(r.text)))
|
398
|
+
self.driver.get("data:text/html;base64," + base64.b64encode(html).decode())
|
399
|
+
time.sleep(1)
|
400
|
+
return self.driver.current_window_handle
|
401
|
+
|
383
402
|
def close_window(self, window_id):
|
384
403
|
if self.driver.current_window_handle != window_id:
|
385
404
|
self.driver.switch_to.window(window_id)
|
parsagon/main.py
CHANGED
@@ -4,7 +4,14 @@ import logging.config
|
|
4
4
|
import time
|
5
5
|
|
6
6
|
|
7
|
-
from parsagon.api import
|
7
|
+
from parsagon.api import (
|
8
|
+
delete_pipeline,
|
9
|
+
add_examples_to_custom_function,
|
10
|
+
get_pipeline,
|
11
|
+
get_pipeline_by_id,
|
12
|
+
get_pipelines,
|
13
|
+
poll_extract,
|
14
|
+
)
|
8
15
|
from parsagon.assistant import assist
|
9
16
|
from parsagon.api import delete_pipeline, add_examples_to_custom_function, get_pipeline, get_pipelines, poll_extract
|
10
17
|
from parsagon.create import create_program
|
@@ -84,6 +91,11 @@ def get_args(argv):
|
|
84
91
|
action="store_true",
|
85
92
|
help="run the browser in headless mode",
|
86
93
|
)
|
94
|
+
parser_update.add_argument(
|
95
|
+
"--undetected",
|
96
|
+
action="store_true",
|
97
|
+
help="run in undetected mode",
|
98
|
+
)
|
87
99
|
parser_update.add_argument(
|
88
100
|
"--infer",
|
89
101
|
action="store_true",
|
@@ -208,17 +220,29 @@ def edit(program_name, variables={}, verbose=False):
|
|
208
220
|
edit_program(task, program_name)
|
209
221
|
|
210
222
|
|
211
|
-
def update(
|
223
|
+
def update(
|
224
|
+
program_name=None,
|
225
|
+
program_id=None,
|
226
|
+
variables={},
|
227
|
+
headless=False,
|
228
|
+
undetected=False,
|
229
|
+
infer=False,
|
230
|
+
replace=False,
|
231
|
+
verbose=False,
|
232
|
+
):
|
212
233
|
configure_logging(verbose)
|
213
234
|
|
214
|
-
|
235
|
+
if program_id:
|
236
|
+
pipeline = get_pipeline_by_id(program_id)
|
237
|
+
else:
|
238
|
+
pipeline = get_pipeline(program_name)
|
215
239
|
abridged_program = pipeline["abridged_sketch"]
|
216
240
|
# Make the program runnable
|
217
241
|
variables_str = ", ".join(f"{k}={repr(v)}" for k, v in variables.items())
|
218
242
|
abridged_program += f"\n\noutput = func({variables_str})\n"
|
219
243
|
|
220
244
|
# Execute the abridged program to gather examples
|
221
|
-
executor = Executor(pipeline["description"], headless=headless, infer=infer)
|
245
|
+
executor = Executor(pipeline["description"], headless=headless, infer=infer, use_uc=undetected)
|
222
246
|
executor.execute(abridged_program)
|
223
247
|
|
224
248
|
while True:
|
@@ -1,14 +1,14 @@
|
|
1
1
|
parsagon/__init__.py,sha256=_S5MlYHBViB4iY96_UbGo3mfGndE8MmFmb1EORJppK8,452
|
2
|
-
parsagon/api.py,sha256=
|
2
|
+
parsagon/api.py,sha256=10v2kI56lxvuOWsOZwazidKJQK75MNnh59fs9t3QgGE,10581
|
3
3
|
parsagon/assistant.py,sha256=V3NL6UdDqe74W_X3wPQ1qwFuJRvHzitXtOt-XSCXvds,4065
|
4
4
|
parsagon/create.py,sha256=BERrBviwMvifg5OwApqdanvULJHHk39fIvnTCZN3Xkk,4432
|
5
5
|
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
6
|
parsagon/edit.py,sha256=5gtnx0gNB7Gvz8ET00SczE-ZS0TomN1um6uObP-OObE,3120
|
7
7
|
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
8
|
-
parsagon/executor.py,sha256=
|
8
|
+
parsagon/executor.py,sha256=8p0rkgzbp5KeOahwWC0e9suP54iDPauHhn_sdAurmmI,29246
|
9
9
|
parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
|
10
10
|
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
11
|
-
parsagon/main.py,sha256=
|
11
|
+
parsagon/main.py,sha256=FGdGLqX3JTU72Pcz8LMgXWL3L5gw0buXGpvERrSkIvE,9823
|
12
12
|
parsagon/print.py,sha256=-7iVKil0W9e8zX1EJMcdlqNdfpmfPxKTBtZfwzWpGYU,4106
|
13
13
|
parsagon/runs.py,sha256=gi36oak3RxKo1BxB0Bc6GV8K06vrrBrEXu3NYkIVPwY,8633
|
14
14
|
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
|
|
25
25
|
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
26
26
|
parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
|
27
27
|
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
28
|
-
parsagon-0.14.
|
29
|
-
parsagon-0.14.
|
30
|
-
parsagon-0.14.
|
31
|
-
parsagon-0.14.
|
32
|
-
parsagon-0.14.
|
28
|
+
parsagon-0.14.38.dist-info/METADATA,sha256=kiuxV05OwRajPmAlFPFbX_9MD6dh5xgXG2B9BtIeAw0,2556
|
29
|
+
parsagon-0.14.38.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
30
|
+
parsagon-0.14.38.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
31
|
+
parsagon-0.14.38.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
32
|
+
parsagon-0.14.38.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|