parsagon 0.14.24__py3-none-any.whl → 0.14.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/api.py +2 -1
- parsagon/executor.py +35 -0
- parsagon/main.py +5 -0
- parsagon/runs.py +2 -1
- {parsagon-0.14.24.dist-info → parsagon-0.14.26.dist-info}/METADATA +3 -2
- {parsagon-0.14.24.dist-info → parsagon-0.14.26.dist-info}/RECORD +9 -9
- {parsagon-0.14.24.dist-info → parsagon-0.14.26.dist-info}/WHEEL +0 -0
- {parsagon-0.14.24.dist-info → parsagon-0.14.26.dist-info}/entry_points.txt +0 -0
- {parsagon-0.14.24.dist-info → parsagon-0.14.26.dist-info}/top_level.txt +0 -0
parsagon/api.py
CHANGED
@@ -235,7 +235,7 @@ def get_pipelines():
|
|
235
235
|
return _api_call(httpx.get, f"/pipelines/")
|
236
236
|
|
237
237
|
|
238
|
-
def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize):
|
238
|
+
def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize, use_proxy):
|
239
239
|
escaped_pipeline_name = double_quote(pipeline_name)
|
240
240
|
with RaiseProgramNotFound(pipeline_name):
|
241
241
|
return _api_call(
|
@@ -246,6 +246,7 @@ def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize):
|
|
246
246
|
"headless": headless,
|
247
247
|
"use_uc": use_uc,
|
248
248
|
"optimize": optimize,
|
249
|
+
"use_proxy": use_proxy,
|
249
250
|
},
|
250
251
|
)
|
251
252
|
|
parsagon/executor.py
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
from collections import defaultdict
|
2
2
|
import copy
|
3
|
+
import glob
|
3
4
|
import json
|
4
5
|
import logging
|
6
|
+
import os
|
5
7
|
from pathlib import Path
|
6
8
|
import psutil
|
7
9
|
import time
|
8
10
|
from urllib.parse import urljoin
|
9
11
|
|
10
12
|
import lxml.html
|
13
|
+
from pypdf import PdfReader
|
11
14
|
from pyvirtualdisplay import Display
|
12
15
|
import undetected_chromedriver as uc
|
13
16
|
from selenium import webdriver
|
@@ -92,12 +95,30 @@ class Executor:
|
|
92
95
|
chrome_options.add_argument("--start-maximized")
|
93
96
|
for option in options:
|
94
97
|
chrome_options.add_argument(option)
|
98
|
+
chrome_options.add_experimental_option(
|
99
|
+
"prefs",
|
100
|
+
{
|
101
|
+
"download.default_directory": os.getcwd(),
|
102
|
+
"download.prompt_for_download": False,
|
103
|
+
"download.directory_upgrade": True,
|
104
|
+
"plugins.always_open_pdf_externally": True,
|
105
|
+
},
|
106
|
+
)
|
95
107
|
self.driver = uc.Chrome(driver_executable_path=driver_executable_path, options=chrome_options)
|
96
108
|
else:
|
97
109
|
chrome_options = webdriver.ChromeOptions()
|
98
110
|
chrome_options.add_argument("--start-maximized")
|
99
111
|
for option in options:
|
100
112
|
chrome_options.add_argument(option)
|
113
|
+
chrome_options.add_experimental_option(
|
114
|
+
"prefs",
|
115
|
+
{
|
116
|
+
"download.default_directory": os.getcwd(),
|
117
|
+
"download.prompt_for_download": False,
|
118
|
+
"download.directory_upgrade": True,
|
119
|
+
"plugins.always_open_pdf_externally": True,
|
120
|
+
},
|
121
|
+
)
|
101
122
|
self.driver = webdriver.Chrome(service=ChromeService(driver_executable_path), options=chrome_options)
|
102
123
|
if page_load_timeout:
|
103
124
|
self.driver.set_page_load_timeout(page_load_timeout)
|
@@ -124,6 +145,7 @@ class Executor:
|
|
124
145
|
"get_str_about_data": get_str_about_data,
|
125
146
|
"get_bool_about_data": get_bool_about_data,
|
126
147
|
"get_json_about_data": get_json_about_data,
|
148
|
+
"get_pdf_text": self.get_pdf_text,
|
127
149
|
}
|
128
150
|
self.custom_functions = {}
|
129
151
|
self.infer = infer
|
@@ -617,6 +639,19 @@ class Executor:
|
|
617
639
|
self.add_custom_function(call_id, custom_function)
|
618
640
|
return scraped_data
|
619
641
|
|
642
|
+
def get_pdf_text(self, url):
|
643
|
+
window_id = self.goto(url)
|
644
|
+
self.close_window(window_id)
|
645
|
+
files = glob.glob("*")
|
646
|
+
most_recent_file = max(files, key=os.path.getmtime)
|
647
|
+
reader = PdfReader(most_recent_file)
|
648
|
+
text = ""
|
649
|
+
for page in reader.pages:
|
650
|
+
text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
|
651
|
+
text += "\n"
|
652
|
+
os.remove(most_recent_file)
|
653
|
+
return text
|
654
|
+
|
620
655
|
def execute(self, code):
|
621
656
|
loc = {}
|
622
657
|
try:
|
parsagon/main.py
CHANGED
@@ -142,6 +142,11 @@ def get_args(argv):
|
|
142
142
|
action="store_true",
|
143
143
|
help="run in optimized mode",
|
144
144
|
)
|
145
|
+
parser_run.add_argument(
|
146
|
+
"--use_proxy",
|
147
|
+
action="store_true",
|
148
|
+
help="run with proxy",
|
149
|
+
)
|
145
150
|
parser_run.set_defaults(func=run)
|
146
151
|
|
147
152
|
# Delete
|
parsagon/runs.py
CHANGED
@@ -29,6 +29,7 @@ def run(
|
|
29
29
|
output_file=None,
|
30
30
|
undetected=False,
|
31
31
|
optimize=False,
|
32
|
+
use_proxy=False,
|
32
33
|
verbose=False,
|
33
34
|
):
|
34
35
|
"""
|
@@ -80,7 +81,7 @@ def run(
|
|
80
81
|
time.sleep(5)
|
81
82
|
|
82
83
|
run = create_pipeline_run(pipeline_id, variables, True)
|
83
|
-
code = get_pipeline_code(program_name, variables, headless, undetected, optimize)["code"]
|
84
|
+
code = get_pipeline_code(program_name, variables, headless, undetected, optimize, use_proxy)["code"]
|
84
85
|
start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
85
86
|
run_data = {"start_time": start_time}
|
86
87
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.14.
|
3
|
+
Version: 0.14.26
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -16,10 +16,11 @@ Requires-Dist: rich ==13.6.0
|
|
16
16
|
Requires-Dist: PyVirtualDisplay ==3.0
|
17
17
|
Requires-Dist: selenium-wire ==5.1.0
|
18
18
|
Requires-Dist: cssselect ==1.1.0
|
19
|
-
Requires-Dist: undetected-chromedriver ==3.5.
|
19
|
+
Requires-Dist: undetected-chromedriver ==3.5.5
|
20
20
|
Requires-Dist: webdriver-manager ==4.0.1
|
21
21
|
Requires-Dist: jsonpath-ng ==1.5.3
|
22
22
|
Requires-Dist: simplejson ==3.19.1
|
23
|
+
Requires-Dist: pypdf ==4.2.0
|
23
24
|
Provides-Extra: dev
|
24
25
|
Requires-Dist: pytest ==7.3.2 ; extra == 'dev'
|
25
26
|
Requires-Dist: pytest-mock ==3.11.1 ; extra == 'dev'
|
@@ -1,16 +1,16 @@
|
|
1
1
|
parsagon/__init__.py,sha256=_S5MlYHBViB4iY96_UbGo3mfGndE8MmFmb1EORJppK8,452
|
2
|
-
parsagon/api.py,sha256=
|
2
|
+
parsagon/api.py,sha256=z52b5zBMTaIpWw9tu5-fF0Ngt58Vtm5TFnreUm8O_XU,9834
|
3
3
|
parsagon/assistant.py,sha256=V3NL6UdDqe74W_X3wPQ1qwFuJRvHzitXtOt-XSCXvds,4065
|
4
4
|
parsagon/create.py,sha256=igrJN8jVP5p3P9-dPSabrHGOQOlG2Flx7ep4SqEDB14,4373
|
5
5
|
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
6
|
parsagon/edit.py,sha256=aLeAcp1hFHm29qh6do21cfYykK76CsHC4tc9Uh2_2bk,3006
|
7
7
|
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
8
|
-
parsagon/executor.py,sha256=
|
8
|
+
parsagon/executor.py,sha256=Vijqx5fbYmglC1EFxMp4K0oqDTOmCy0ruBxZ784X0Uk,26932
|
9
9
|
parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
|
10
10
|
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
11
|
-
parsagon/main.py,sha256=
|
11
|
+
parsagon/main.py,sha256=WPxrT1ZyPe6wNSFP7GnnKIGisgPM2dFieHP2TC5TGVQ,9467
|
12
12
|
parsagon/print.py,sha256=-7iVKil0W9e8zX1EJMcdlqNdfpmfPxKTBtZfwzWpGYU,4106
|
13
|
-
parsagon/runs.py,sha256=
|
13
|
+
parsagon/runs.py,sha256=XXbk3eUJjBBETBq6AUyBrNPIR18svCRTSCSnhN2GpOc,8560
|
14
14
|
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
15
15
|
parsagon/settings.py,sha256=ejd9wGCOEB68n8a0M2SyzlmlwPofnzMKaxpBqYhcs34,3552
|
16
16
|
parsagon/gui/__init__.py,sha256=ZvKZfcchSnBh__UY_XeMnQJACd1EG99ix3_SfSHth8g,68
|
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
|
|
25
25
|
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
26
26
|
parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
|
27
27
|
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
28
|
-
parsagon-0.14.
|
29
|
-
parsagon-0.14.
|
30
|
-
parsagon-0.14.
|
31
|
-
parsagon-0.14.
|
32
|
-
parsagon-0.14.
|
28
|
+
parsagon-0.14.26.dist-info/METADATA,sha256=beaG5po0hkamZx4Frl-B94LQVWEvzr_afeXG13j73Ko,2461
|
29
|
+
parsagon-0.14.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
30
|
+
parsagon-0.14.26.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
31
|
+
parsagon-0.14.26.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
32
|
+
parsagon-0.14.26.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|