parsagon 0.17.1__py3-none-any.whl → 0.17.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
parsagon/executor.py CHANGED
@@ -16,6 +16,7 @@ import httpx
16
16
  from lxml import etree
17
17
  import lxml.html
18
18
  from lxml.html.clean import Cleaner
19
+ import pymupdf4llm
19
20
  from pypdf import PdfReader
20
21
  from pyvirtualdisplay import Display
21
22
  import undetected_chromedriver as uc
@@ -706,6 +707,15 @@ class Executor:
706
707
  os.remove(most_recent_file)
707
708
  return text
708
709
 
710
+ def get_pdf_md(self, url):
711
+ window_id = self.goto(url)
712
+ self.close_window(window_id)
713
+ files = glob.glob("*")
714
+ most_recent_file = max(files, key=os.path.getmtime)
715
+ md_text = pymupdf4llm.to_markdown(most_recent_file)
716
+ os.remove(most_recent_file)
717
+ return md_text
718
+
709
719
  def str_to_iso8601(self, s):
710
720
  while s:
711
721
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: parsagon
3
- Version: 0.17.1
3
+ Version: 0.17.2
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -20,6 +20,8 @@ Requires-Dist: undetected-chromedriver==3.5.5
20
20
  Requires-Dist: webdriver-manager==4.0.2
21
21
  Requires-Dist: jsonpath-ng==1.5.3
22
22
  Requires-Dist: simplejson==3.19.1
23
+ Requires-Dist: PyMuPDF==1.26.4
24
+ Requires-Dist: pymupdf4llm==0.0.27
23
25
  Requires-Dist: pypdf==5.9.0
24
26
  Requires-Dist: python-dateutil==2.9.0.post0
25
27
  Requires-Dist: html2text==2024.2.26
@@ -5,7 +5,7 @@ parsagon/create.py,sha256=BERrBviwMvifg5OwApqdanvULJHHk39fIvnTCZN3Xkk,4432
5
5
  parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
6
6
  parsagon/edit.py,sha256=5gtnx0gNB7Gvz8ET00SczE-ZS0TomN1um6uObP-OObE,3120
7
7
  parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
8
- parsagon/executor.py,sha256=_jPdLxclmLekb03JCtxvslRpfUhVotL6rnHFw9EibXE,29927
8
+ parsagon/executor.py,sha256=iKSUHJ4yu-6YHerenIJfFuhoX100OmdDGemkBJZ03q8,30260
9
9
  parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
10
10
  parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
11
11
  parsagon/main.py,sha256=_ww08Fpf1S8ePXSDg2GqtmnQvdfSgMK_AiNq5CuhWko,10001
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
25
25
  parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
26
26
  parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
27
27
  parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
28
- parsagon-0.17.1.dist-info/METADATA,sha256=-ylaryWc8rMx_nq9m3Oz_WUsQ7Ygw37pCQqnQn-1hkc,2567
29
- parsagon-0.17.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
- parsagon-0.17.1.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
- parsagon-0.17.1.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
- parsagon-0.17.1.dist-info/RECORD,,
28
+ parsagon-0.17.2.dist-info/METADATA,sha256=v_vLvOpQcU4xM9QAq_XeTRu_19ICXOVCoVSA8YCgrdM,2633
29
+ parsagon-0.17.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
30
+ parsagon-0.17.2.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
31
+ parsagon-0.17.2.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
32
+ parsagon-0.17.2.dist-info/RECORD,,