parsagon 0.14.24__py3-none-any.whl → 0.14.26__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsagon/api.py CHANGED
@@ -235,7 +235,7 @@ def get_pipelines():
235
235
  return _api_call(httpx.get, f"/pipelines/")
236
236
 
237
237
 
238
- def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize):
238
+ def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize, use_proxy):
239
239
  escaped_pipeline_name = double_quote(pipeline_name)
240
240
  with RaiseProgramNotFound(pipeline_name):
241
241
  return _api_call(
@@ -246,6 +246,7 @@ def get_pipeline_code(pipeline_name, variables, headless, use_uc, optimize):
246
246
  "headless": headless,
247
247
  "use_uc": use_uc,
248
248
  "optimize": optimize,
249
+ "use_proxy": use_proxy,
249
250
  },
250
251
  )
251
252
 
parsagon/executor.py CHANGED
@@ -1,13 +1,16 @@
1
1
  from collections import defaultdict
2
2
  import copy
3
+ import glob
3
4
  import json
4
5
  import logging
6
+ import os
5
7
  from pathlib import Path
6
8
  import psutil
7
9
  import time
8
10
  from urllib.parse import urljoin
9
11
 
10
12
  import lxml.html
13
+ from pypdf import PdfReader
11
14
  from pyvirtualdisplay import Display
12
15
  import undetected_chromedriver as uc
13
16
  from selenium import webdriver
@@ -92,12 +95,30 @@ class Executor:
92
95
  chrome_options.add_argument("--start-maximized")
93
96
  for option in options:
94
97
  chrome_options.add_argument(option)
98
+ chrome_options.add_experimental_option(
99
+ "prefs",
100
+ {
101
+ "download.default_directory": os.getcwd(),
102
+ "download.prompt_for_download": False,
103
+ "download.directory_upgrade": True,
104
+ "plugins.always_open_pdf_externally": True,
105
+ },
106
+ )
95
107
  self.driver = uc.Chrome(driver_executable_path=driver_executable_path, options=chrome_options)
96
108
  else:
97
109
  chrome_options = webdriver.ChromeOptions()
98
110
  chrome_options.add_argument("--start-maximized")
99
111
  for option in options:
100
112
  chrome_options.add_argument(option)
113
+ chrome_options.add_experimental_option(
114
+ "prefs",
115
+ {
116
+ "download.default_directory": os.getcwd(),
117
+ "download.prompt_for_download": False,
118
+ "download.directory_upgrade": True,
119
+ "plugins.always_open_pdf_externally": True,
120
+ },
121
+ )
101
122
  self.driver = webdriver.Chrome(service=ChromeService(driver_executable_path), options=chrome_options)
102
123
  if page_load_timeout:
103
124
  self.driver.set_page_load_timeout(page_load_timeout)
@@ -124,6 +145,7 @@ class Executor:
124
145
  "get_str_about_data": get_str_about_data,
125
146
  "get_bool_about_data": get_bool_about_data,
126
147
  "get_json_about_data": get_json_about_data,
148
+ "get_pdf_text": self.get_pdf_text,
127
149
  }
128
150
  self.custom_functions = {}
129
151
  self.infer = infer
@@ -617,6 +639,19 @@ class Executor:
617
639
  self.add_custom_function(call_id, custom_function)
618
640
  return scraped_data
619
641
 
642
+ def get_pdf_text(self, url):
643
+ window_id = self.goto(url)
644
+ self.close_window(window_id)
645
+ files = glob.glob("*")
646
+ most_recent_file = max(files, key=os.path.getmtime)
647
+ reader = PdfReader(most_recent_file)
648
+ text = ""
649
+ for page in reader.pages:
650
+ text += page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
651
+ text += "\n"
652
+ os.remove(most_recent_file)
653
+ return text
654
+
620
655
  def execute(self, code):
621
656
  loc = {}
622
657
  try:
parsagon/main.py CHANGED
@@ -142,6 +142,11 @@ def get_args(argv):
142
142
  action="store_true",
143
143
  help="run in optimized mode",
144
144
  )
145
+ parser_run.add_argument(
146
+ "--use_proxy",
147
+ action="store_true",
148
+ help="run with proxy",
149
+ )
145
150
  parser_run.set_defaults(func=run)
146
151
 
147
152
  # Delete
parsagon/runs.py CHANGED
@@ -29,6 +29,7 @@ def run(
29
29
  output_file=None,
30
30
  undetected=False,
31
31
  optimize=False,
32
+ use_proxy=False,
32
33
  verbose=False,
33
34
  ):
34
35
  """
@@ -80,7 +81,7 @@ def run(
80
81
  time.sleep(5)
81
82
 
82
83
  run = create_pipeline_run(pipeline_id, variables, True)
83
- code = get_pipeline_code(program_name, variables, headless, undetected, optimize)["code"]
84
+ code = get_pipeline_code(program_name, variables, headless, undetected, optimize, use_proxy)["code"]
84
85
  start_time = datetime.datetime.now(datetime.timezone.utc).isoformat()
85
86
  run_data = {"start_time": start_time}
86
87
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.14.24
3
+ Version: 0.14.26
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -16,10 +16,11 @@ Requires-Dist: rich ==13.6.0
16
16
  Requires-Dist: PyVirtualDisplay ==3.0
17
17
  Requires-Dist: selenium-wire ==5.1.0
18
18
  Requires-Dist: cssselect ==1.1.0
19
- Requires-Dist: undetected-chromedriver ==3.5.3
19
+ Requires-Dist: undetected-chromedriver ==3.5.5
20
20
  Requires-Dist: webdriver-manager ==4.0.1
21
21
  Requires-Dist: jsonpath-ng ==1.5.3
22
22
  Requires-Dist: simplejson ==3.19.1
23
+ Requires-Dist: pypdf ==4.2.0
23
24
  Provides-Extra: dev
24
25
  Requires-Dist: pytest ==7.3.2 ; extra == 'dev'
25
26
  Requires-Dist: pytest-mock ==3.11.1 ; extra == 'dev'
@@ -1,16 +1,16 @@
1
1
  parsagon/__init__.py,sha256=_S5MlYHBViB4iY96_UbGo3mfGndE8MmFmb1EORJppK8,452
2
- parsagon/api.py,sha256=7NwmLCMfbje3-PIvxPJv-3g3TEnSKpSDPyUPMZ2XRhI,9783
2
+ parsagon/api.py,sha256=z52b5zBMTaIpWw9tu5-fF0Ngt58Vtm5TFnreUm8O_XU,9834
3
3
  parsagon/assistant.py,sha256=V3NL6UdDqe74W_X3wPQ1qwFuJRvHzitXtOt-XSCXvds,4065
4
4
  parsagon/create.py,sha256=igrJN8jVP5p3P9-dPSabrHGOQOlG2Flx7ep4SqEDB14,4373
5
5
  parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
6
6
  parsagon/edit.py,sha256=aLeAcp1hFHm29qh6do21cfYykK76CsHC4tc9Uh2_2bk,3006
7
7
  parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
8
- parsagon/executor.py,sha256=9KVsw_Hh4NEwIvWau8MnkAQ2mK83nWrFy1B3iAGzm9Q,25620
8
+ parsagon/executor.py,sha256=Vijqx5fbYmglC1EFxMp4K0oqDTOmCy0ruBxZ784X0Uk,26932
9
9
  parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
10
10
  parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
11
- parsagon/main.py,sha256=3BskD1QPqDOfSwLGJBIn0-npNxRy5KZUtKIVNAKYwtA,9349
11
+ parsagon/main.py,sha256=WPxrT1ZyPe6wNSFP7GnnKIGisgPM2dFieHP2TC5TGVQ,9467
12
12
  parsagon/print.py,sha256=-7iVKil0W9e8zX1EJMcdlqNdfpmfPxKTBtZfwzWpGYU,4106
13
- parsagon/runs.py,sha256=Td6B1OpG6FZbXZnIwbcL9KgfMesN_KCHGERWQ7_tUOA,8528
13
+ parsagon/runs.py,sha256=XXbk3eUJjBBETBq6AUyBrNPIR18svCRTSCSnhN2GpOc,8560
14
14
  parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
15
15
  parsagon/settings.py,sha256=ejd9wGCOEB68n8a0M2SyzlmlwPofnzMKaxpBqYhcs34,3552
16
16
  parsagon/gui/__init__.py,sha256=ZvKZfcchSnBh__UY_XeMnQJACd1EG99ix3_SfSHth8g,68
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
25
25
  parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
26
26
  parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
27
27
  parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
28
- parsagon-0.14.24.dist-info/METADATA,sha256=i3ZF36WIWx4eSZwdP9m_X1SrfR51bJY6goxwluyPiQk,2432
29
- parsagon-0.14.24.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
30
- parsagon-0.14.24.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
- parsagon-0.14.24.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
- parsagon-0.14.24.dist-info/RECORD,,
28
+ parsagon-0.14.26.dist-info/METADATA,sha256=beaG5po0hkamZx4Frl-B94LQVWEvzr_afeXG13j73Ko,2461
29
+ parsagon-0.14.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
30
+ parsagon-0.14.26.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
+ parsagon-0.14.26.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
+ parsagon-0.14.26.dist-info/RECORD,,