parsagon 0.14.25__py3-none-any.whl → 0.14.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/api.py +13 -1
- parsagon/executor.py +35 -0
- parsagon/runs.py +5 -2
- {parsagon-0.14.25.dist-info → parsagon-0.14.27.dist-info}/METADATA +3 -2
- {parsagon-0.14.25.dist-info → parsagon-0.14.27.dist-info}/RECORD +8 -8
- {parsagon-0.14.25.dist-info → parsagon-0.14.27.dist-info}/WHEEL +0 -0
- {parsagon-0.14.25.dist-info → parsagon-0.14.27.dist-info}/entry_points.txt +0 -0
- {parsagon-0.14.25.dist-info → parsagon-0.14.27.dist-info}/top_level.txt +0 -0
parsagon/api.py
CHANGED
@@ -235,7 +235,19 @@ def get_pipelines():
|
|
235
235
|
return _api_call(httpx.get, f"/pipelines/")
|
236
236
|
|
237
237
|
|
238
|
-
def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize, use_proxy):
|
238
|
+
def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize, use_proxy, pipeline_id=None):
|
239
|
+
if pipeline_id:
|
240
|
+
return _api_call(
|
241
|
+
httpx.post,
|
242
|
+
f"/pipelines/{pipeline_id}/code/",
|
243
|
+
json={
|
244
|
+
"variables": variables,
|
245
|
+
"headless": headless,
|
246
|
+
"use_uc": use_uc,
|
247
|
+
"optimize": optimize,
|
248
|
+
"use_proxy": use_proxy,
|
249
|
+
},
|
250
|
+
)
|
239
251
|
escaped_pipeline_name = double_quote(pipeline_name)
|
240
252
|
with RaiseProgramNotFound(pipeline_name):
|
241
253
|
return _api_call(
|
parsagon/executor.py
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
from collections import defaultdict
|
2
2
|
import copy
|
3
|
+
import glob
|
3
4
|
import json
|
4
5
|
import logging
|
6
|
+
import os
|
5
7
|
from pathlib import Path
|
6
8
|
import psutil
|
7
9
|
import time
|
8
10
|
from urllib.parse import urljoin
|
9
11
|
|
10
12
|
import lxml.html
|
13
|
+
from pypdf import PdfReader
|
11
14
|
from pyvirtualdisplay import Display
|
12
15
|
import undetected_chromedriver as uc
|
13
16
|
from selenium import webdriver
|
@@ -92,12 +95,30 @@ class Executor:
|
|
92
95
|
chrome_options.add_argument("--start-maximized")
|
93
96
|
for option in options:
|
94
97
|
chrome_options.add_argument(option)
|
98
|
+
chrome_options.add_experimental_option(
|
99
|
+
"prefs",
|
100
|
+
{
|
101
|
+
"download.default_directory": os.getcwd(),
|
102
|
+
"download.prompt_for_download": False,
|
103
|
+
"download.directory_upgrade": True,
|
104
|
+
"plugins.always_open_pdf_externally": True,
|
105
|
+
},
|
106
|
+
)
|
95
107
|
self.driver = uc.Chrome(driver_executable_path=driver_executable_path, options=chrome_options)
|
96
108
|
else:
|
97
109
|
chrome_options = webdriver.ChromeOptions()
|
98
110
|
chrome_options.add_argument("--start-maximized")
|
99
111
|
for option in options:
|
100
112
|
chrome_options.add_argument(option)
|
113
|
+
chrome_options.add_experimental_option(
|
114
|
+
"prefs",
|
115
|
+
{
|
116
|
+
"download.default_directory": os.getcwd(),
|
117
|
+
"download.prompt_for_download": False,
|
118
|
+
"download.directory_upgrade": True,
|
119
|
+
"plugins.always_open_pdf_externally": True,
|
120
|
+
},
|
121
|
+
)
|
101
122
|
self.driver = webdriver.Chrome(service=ChromeService(driver_executable_path), options=chrome_options)
|
102
123
|
if page_load_timeout:
|
103
124
|
self.driver.set_page_load_timeout(page_load_timeout)
|
@@ -124,6 +145,7 @@ class Executor:
|
|
124
145
|
"get_str_about_data": get_str_about_data,
|
125
146
|
"get_bool_about_data": get_bool_about_data,
|
126
147
|
"get_json_about_data": get_json_about_data,
|
148
|
+
"get_pdf_text": self.get_pdf_text,
|
127
149
|
}
|
128
150
|
self.custom_functions = {}
|
129
151
|
self.infer = infer
|
@@ -617,6 +639,19 @@ class Executor:
|
|
617
639
|
self.add_custom_function(call_id, custom_function)
|
618
640
|
return scraped_data
|
619
641
|
|
642
|
+
def get_pdf_text(self, url):
|
643
|
+
window_id = self.goto(url)
|
644
|
+
self.close_window(window_id)
|
645
|
+
files = glob.glob("*")
|
646
|
+
most_recent_file = max(files, key=os.path.getmtime)
|
647
|
+
reader = PdfReader(most_recent_file)
|
648
|
+
text = ""
|
649
|
+
for page in reader.pages:
|
650
|
+
text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
|
651
|
+
text += "\n"
|
652
|
+
os.remove(most_recent_file)
|
653
|
+
return text
|
654
|
+
|
620
655
|
def execute(self, code):
|
621
656
|
loc = {}
|
622
657
|
try:
|
parsagon/runs.py
CHANGED
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
|
|
22
22
|
|
23
23
|
def run(
|
24
24
|
program_name,
|
25
|
+
program_id=None,
|
25
26
|
variables={},
|
26
27
|
headless=False,
|
27
28
|
remote=False,
|
@@ -42,7 +43,7 @@ def run(
|
|
42
43
|
raise ParsagonException("Variables must be a dictionary")
|
43
44
|
|
44
45
|
logger.info("Preparing to run program %s", program_name)
|
45
|
-
pipeline_id = get_pipeline(program_name)["id"]
|
46
|
+
pipeline_id = program_id or get_pipeline(program_name)["id"]
|
46
47
|
|
47
48
|
if remote:
|
48
49
|
result = create_pipeline_run(pipeline_id, variables, False)
|
@@ -81,7 +82,9 @@ def run(
|
|
81
82
|
time.sleep(5)
|
82
83
|
|
83
84
|
run = create_pipeline_run(pipeline_id, variables, True)
|
84
|
-
code = get_pipeline_code(
|
85
|
+
code = get_pipeline_code(
|
86
|
+
program_name, variables, headless, undetected, optimize, use_proxy, pipeline_id=program_id
|
87
|
+
)["code"]
|
85
88
|
start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
86
89
|
run_data = {"start_time": start_time}
|
87
90
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.14.
|
3
|
+
Version: 0.14.27
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -16,10 +16,11 @@ Requires-Dist: rich ==13.6.0
|
|
16
16
|
Requires-Dist: PyVirtualDisplay ==3.0
|
17
17
|
Requires-Dist: selenium-wire ==5.1.0
|
18
18
|
Requires-Dist: cssselect ==1.1.0
|
19
|
-
Requires-Dist: undetected-chromedriver ==3.5.
|
19
|
+
Requires-Dist: undetected-chromedriver ==3.5.5
|
20
20
|
Requires-Dist: webdriver-manager ==4.0.1
|
21
21
|
Requires-Dist: jsonpath-ng ==1.5.3
|
22
22
|
Requires-Dist: simplejson ==3.19.1
|
23
|
+
Requires-Dist: pypdf ==4.2.0
|
23
24
|
Provides-Extra: dev
|
24
25
|
Requires-Dist: pytest ==7.3.2 ; extra == 'dev'
|
25
26
|
Requires-Dist: pytest-mock ==3.11.1 ; extra == 'dev'
|
@@ -1,16 +1,16 @@
|
|
1
1
|
parsagon/__init__.py,sha256=_S5MlYHBViB4iY96_UbGo3mfGndE8MmFmb1EORJppK8,452
|
2
|
-
parsagon/api.py,sha256=
|
2
|
+
parsagon/api.py,sha256=DpnHmbP61je3qeX9_6mUFt44Ps0f4TwaSRUsWVueAIM,10203
|
3
3
|
parsagon/assistant.py,sha256=V3NL6UdDqe74W_X3wPQ1qwFuJRvHzitXtOt-XSCXvds,4065
|
4
4
|
parsagon/create.py,sha256=igrJN8jVP5p3P9-dPSabrHGOQOlG2Flx7ep4SqEDB14,4373
|
5
5
|
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
6
|
parsagon/edit.py,sha256=aLeAcp1hFHm29qh6do21cfYykK76CsHC4tc9Uh2_2bk,3006
|
7
7
|
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
8
|
-
parsagon/executor.py,sha256=
|
8
|
+
parsagon/executor.py,sha256=Vijqx5fbYmglC1EFxMp4K0oqDTOmCy0ruBxZ784X0Uk,26932
|
9
9
|
parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
|
10
10
|
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
11
11
|
parsagon/main.py,sha256=WPxrT1ZyPe6wNSFP7GnnKIGisgPM2dFieHP2TC5TGVQ,9467
|
12
12
|
parsagon/print.py,sha256=-7iVKil0W9e8zX1EJMcdlqNdfpmfPxKTBtZfwzWpGYU,4106
|
13
|
-
parsagon/runs.py,sha256=
|
13
|
+
parsagon/runs.py,sha256=gi36oak3RxKo1BxB0Bc6GV8K06vrrBrEXu3NYkIVPwY,8633
|
14
14
|
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
15
15
|
parsagon/settings.py,sha256=ejd9wGCOEB68n8a0M2SyzlmlwPofnzMKaxpBqYhcs34,3552
|
16
16
|
parsagon/gui/__init__.py,sha256=ZvKZfcchSnBh__UY_XeMnQJACd1EG99ix3_SfSHth8g,68
|
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
|
|
25
25
|
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
26
26
|
parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
|
27
27
|
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
28
|
-
parsagon-0.14.
|
29
|
-
parsagon-0.14.
|
30
|
-
parsagon-0.14.
|
31
|
-
parsagon-0.14.
|
32
|
-
parsagon-0.14.
|
28
|
+
parsagon-0.14.27.dist-info/METADATA,sha256=zVcojJBz_ObyKy4oxzOdWZqP51O7MqhrKJYSQMz-_Ns,2461
|
29
|
+
parsagon-0.14.27.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
30
|
+
parsagon-0.14.27.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
31
|
+
parsagon-0.14.27.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
32
|
+
parsagon-0.14.27.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|