parsagon 0.14.25__py3-none-any.whl → 0.14.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/executor.py +35 -0
- {parsagon-0.14.25.dist-info → parsagon-0.14.26.dist-info}/METADATA +3 -2
- {parsagon-0.14.25.dist-info → parsagon-0.14.26.dist-info}/RECORD +6 -6
- {parsagon-0.14.25.dist-info → parsagon-0.14.26.dist-info}/WHEEL +0 -0
- {parsagon-0.14.25.dist-info → parsagon-0.14.26.dist-info}/entry_points.txt +0 -0
- {parsagon-0.14.25.dist-info → parsagon-0.14.26.dist-info}/top_level.txt +0 -0
parsagon/executor.py
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
from collections import defaultdict
|
2
2
|
import copy
|
3
|
+
import glob
|
3
4
|
import json
|
4
5
|
import logging
|
6
|
+
import os
|
5
7
|
from pathlib import Path
|
6
8
|
import psutil
|
7
9
|
import time
|
8
10
|
from urllib.parse import urljoin
|
9
11
|
|
10
12
|
import lxml.html
|
13
|
+
from pypdf import PdfReader
|
11
14
|
from pyvirtualdisplay import Display
|
12
15
|
import undetected_chromedriver as uc
|
13
16
|
from selenium import webdriver
|
@@ -92,12 +95,30 @@ class Executor:
|
|
92
95
|
chrome_options.add_argument("--start-maximized")
|
93
96
|
for option in options:
|
94
97
|
chrome_options.add_argument(option)
|
98
|
+
chrome_options.add_experimental_option(
|
99
|
+
"prefs",
|
100
|
+
{
|
101
|
+
"download.default_directory": os.getcwd(),
|
102
|
+
"download.prompt_for_download": False,
|
103
|
+
"download.directory_upgrade": True,
|
104
|
+
"plugins.always_open_pdf_externally": True,
|
105
|
+
},
|
106
|
+
)
|
95
107
|
self.driver = uc.Chrome(driver_executable_path=driver_executable_path, options=chrome_options)
|
96
108
|
else:
|
97
109
|
chrome_options = webdriver.ChromeOptions()
|
98
110
|
chrome_options.add_argument("--start-maximized")
|
99
111
|
for option in options:
|
100
112
|
chrome_options.add_argument(option)
|
113
|
+
chrome_options.add_experimental_option(
|
114
|
+
"prefs",
|
115
|
+
{
|
116
|
+
"download.default_directory": os.getcwd(),
|
117
|
+
"download.prompt_for_download": False,
|
118
|
+
"download.directory_upgrade": True,
|
119
|
+
"plugins.always_open_pdf_externally": True,
|
120
|
+
},
|
121
|
+
)
|
101
122
|
self.driver = webdriver.Chrome(service=ChromeService(driver_executable_path), options=chrome_options)
|
102
123
|
if page_load_timeout:
|
103
124
|
self.driver.set_page_load_timeout(page_load_timeout)
|
@@ -124,6 +145,7 @@ class Executor:
|
|
124
145
|
"get_str_about_data": get_str_about_data,
|
125
146
|
"get_bool_about_data": get_bool_about_data,
|
126
147
|
"get_json_about_data": get_json_about_data,
|
148
|
+
"get_pdf_text": self.get_pdf_text,
|
127
149
|
}
|
128
150
|
self.custom_functions = {}
|
129
151
|
self.infer = infer
|
@@ -617,6 +639,19 @@ class Executor:
|
|
617
639
|
self.add_custom_function(call_id, custom_function)
|
618
640
|
return scraped_data
|
619
641
|
|
642
|
+
def get_pdf_text(self, url):
|
643
|
+
window_id = self.goto(url)
|
644
|
+
self.close_window(window_id)
|
645
|
+
files = glob.glob("*")
|
646
|
+
most_recent_file = max(files, key=os.path.getmtime)
|
647
|
+
reader = PdfReader(most_recent_file)
|
648
|
+
text = ""
|
649
|
+
for page in reader.pages:
|
650
|
+
text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
|
651
|
+
text += "\n"
|
652
|
+
os.remove(most_recent_file)
|
653
|
+
return text
|
654
|
+
|
620
655
|
def execute(self, code):
|
621
656
|
loc = {}
|
622
657
|
try:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.14.
|
3
|
+
Version: 0.14.26
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -16,10 +16,11 @@ Requires-Dist: rich ==13.6.0
|
|
16
16
|
Requires-Dist: PyVirtualDisplay ==3.0
|
17
17
|
Requires-Dist: selenium-wire ==5.1.0
|
18
18
|
Requires-Dist: cssselect ==1.1.0
|
19
|
-
Requires-Dist: undetected-chromedriver ==3.5.
|
19
|
+
Requires-Dist: undetected-chromedriver ==3.5.5
|
20
20
|
Requires-Dist: webdriver-manager ==4.0.1
|
21
21
|
Requires-Dist: jsonpath-ng ==1.5.3
|
22
22
|
Requires-Dist: simplejson ==3.19.1
|
23
|
+
Requires-Dist: pypdf ==4.2.0
|
23
24
|
Provides-Extra: dev
|
24
25
|
Requires-Dist: pytest ==7.3.2 ; extra == 'dev'
|
25
26
|
Requires-Dist: pytest-mock ==3.11.1 ; extra == 'dev'
|
@@ -5,7 +5,7 @@ parsagon/create.py,sha256=igrJN8jVP5p3P9-dPSabrHGOQOlG2Flx7ep4SqEDB14,4373
|
|
5
5
|
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
6
|
parsagon/edit.py,sha256=aLeAcp1hFHm29qh6do21cfYykK76CsHC4tc9Uh2_2bk,3006
|
7
7
|
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
8
|
-
parsagon/executor.py,sha256=
|
8
|
+
parsagon/executor.py,sha256=Vijqx5fbYmglC1EFxMp4K0oqDTOmCy0ruBxZ784X0Uk,26932
|
9
9
|
parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
|
10
10
|
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
11
11
|
parsagon/main.py,sha256=WPxrT1ZyPe6wNSFP7GnnKIGisgPM2dFieHP2TC5TGVQ,9467
|
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
|
|
25
25
|
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
26
26
|
parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
|
27
27
|
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
28
|
-
parsagon-0.14.
|
29
|
-
parsagon-0.14.
|
30
|
-
parsagon-0.14.
|
31
|
-
parsagon-0.14.
|
32
|
-
parsagon-0.14.
|
28
|
+
parsagon-0.14.26.dist-info/METADATA,sha256=beaG5po0hkamZx4Frl-B94LQVWEvzr_afeXG13j73Ko,2461
|
29
|
+
parsagon-0.14.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
30
|
+
parsagon-0.14.26.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
31
|
+
parsagon-0.14.26.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
32
|
+
parsagon-0.14.26.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|