parsagon 0.14.25__py3-none-any.whl → 0.14.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsagon/executor.py CHANGED
@@ -1,13 +1,16 @@
1
1
  from collections import defaultdict
2
2
  import copy
3
+ import glob
3
4
  import json
4
5
  import logging
6
+ import os
5
7
  from pathlib import Path
6
8
  import psutil
7
9
  import time
8
10
  from urllib.parse import urljoin
9
11
 
10
12
  import lxml.html
13
+ from pypdf import PdfReader
11
14
  from pyvirtualdisplay import Display
12
15
  import undetected_chromedriver as uc
13
16
  from selenium import webdriver
@@ -92,12 +95,30 @@ class Executor:
92
95
  chrome_options.add_argument("--start-maximized")
93
96
  for option in options:
94
97
  chrome_options.add_argument(option)
98
+ chrome_options.add_experimental_option(
99
+ "prefs",
100
+ {
101
+ "download.default_directory": os.getcwd(),
102
+ "download.prompt_for_download": False,
103
+ "download.directory_upgrade": True,
104
+ "plugins.always_open_pdf_externally": True,
105
+ },
106
+ )
95
107
  self.driver = uc.Chrome(driver_executable_path=driver_executable_path, options=chrome_options)
96
108
  else:
97
109
  chrome_options = webdriver.ChromeOptions()
98
110
  chrome_options.add_argument("--start-maximized")
99
111
  for option in options:
100
112
  chrome_options.add_argument(option)
113
+ chrome_options.add_experimental_option(
114
+ "prefs",
115
+ {
116
+ "download.default_directory": os.getcwd(),
117
+ "download.prompt_for_download": False,
118
+ "download.directory_upgrade": True,
119
+ "plugins.always_open_pdf_externally": True,
120
+ },
121
+ )
101
122
  self.driver = webdriver.Chrome(service=ChromeService(driver_executable_path), options=chrome_options)
102
123
  if page_load_timeout:
103
124
  self.driver.set_page_load_timeout(page_load_timeout)
@@ -124,6 +145,7 @@ class Executor:
124
145
  "get_str_about_data": get_str_about_data,
125
146
  "get_bool_about_data": get_bool_about_data,
126
147
  "get_json_about_data": get_json_about_data,
148
+ "get_pdf_text": self.get_pdf_text,
127
149
  }
128
150
  self.custom_functions = {}
129
151
  self.infer = infer
@@ -617,6 +639,19 @@ class Executor:
617
639
  self.add_custom_function(call_id, custom_function)
618
640
  return scraped_data
619
641
 
642
+ def get_pdf_text(self, url):
643
+ window_id = self.goto(url)
644
+ self.close_window(window_id)
645
+ files = glob.glob("*")
646
+ most_recent_file = max(files, key=os.path.getmtime)
647
+ reader = PdfReader(most_recent_file)
648
+ text = ""
649
+ for page in reader.pages:
650
+ text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
651
+ text += "\n"
652
+ os.remove(most_recent_file)
653
+ return text
654
+
620
655
  def execute(self, code):
621
656
  loc = {}
622
657
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.14.25
3
+ Version: 0.14.26
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -16,10 +16,11 @@ Requires-Dist: rich ==13.6.0
16
16
  Requires-Dist: PyVirtualDisplay ==3.0
17
17
  Requires-Dist: selenium-wire ==5.1.0
18
18
  Requires-Dist: cssselect ==1.1.0
19
- Requires-Dist: undetected-chromedriver ==3.5.3
19
+ Requires-Dist: undetected-chromedriver ==3.5.5
20
20
  Requires-Dist: webdriver-manager ==4.0.1
21
21
  Requires-Dist: jsonpath-ng ==1.5.3
22
22
  Requires-Dist: simplejson ==3.19.1
23
+ Requires-Dist: pypdf ==4.2.0
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: pytest ==7.3.2 ; extra == 'dev'
25
26
  Requires-Dist: pytest-mock ==3.11.1 ; extra == 'dev'
@@ -5,7 +5,7 @@ parsagon/create.py,sha256=igrJN8jVP5p3P9-dPSabrHGOQOlG2Flx7ep4SqEDB14,4373
5
5
  parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
6
6
  parsagon/edit.py,sha256=aLeAcp1hFHm29qh6do21cfYykK76CsHC4tc9Uh2_2bk,3006
7
7
  parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
8
- parsagon/executor.py,sha256=9KVsw_Hh4NEwIvWau8MnkAQ2mK83nWrFy1B3iAGzm9Q,25620
8
+ parsagon/executor.py,sha256=Vijqx5fbYmglC1EFxMp4K0oqDTOmCy0ruBxZ784X0Uk,26932
9
9
  parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
10
10
  parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
11
11
  parsagon/main.py,sha256=WPxrT1ZyPe6wNSFP7GnnKIGisgPM2dFieHP2TC5TGVQ,9467
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
25
25
  parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
26
26
  parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
27
27
  parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
28
- parsagon-0.14.25.dist-info/METADATA,sha256=SSWWJVRIrKeqzti57XG6tZdmgAJzegU8zXTt_5pBnu4,2432
29
- parsagon-0.14.25.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
30
- parsagon-0.14.25.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
- parsagon-0.14.25.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
- parsagon-0.14.25.dist-info/RECORD,,
28
+ parsagon-0.14.26.dist-info/METADATA,sha256=beaG5po0hkamZx4Frl-B94LQVWEvzr_afeXG13j73Ko,2461
29
+ parsagon-0.14.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
30
+ parsagon-0.14.26.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
+ parsagon-0.14.26.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
+ parsagon-0.14.26.dist-info/RECORD,,