parsagon 0.14.39__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- parsagon/executor.py +7 -1
- parsagon/main.py +1 -0
- {parsagon-0.14.39.dist-info → parsagon-0.15.0.dist-info}/METADATA +1 -1
- {parsagon-0.14.39.dist-info → parsagon-0.15.0.dist-info}/RECORD +7 -7
- {parsagon-0.14.39.dist-info → parsagon-0.15.0.dist-info}/WHEEL +0 -0
- {parsagon-0.14.39.dist-info → parsagon-0.15.0.dist-info}/entry_points.txt +0 -0
- {parsagon-0.14.39.dist-info → parsagon-0.15.0.dist-info}/top_level.txt +0 -0
parsagon/executor.py
CHANGED
@@ -634,6 +634,7 @@ class Executor:
|
|
634
634
|
nodes = {}
|
635
635
|
css_selectors = {}
|
636
636
|
xpath_selectors = {}
|
637
|
+
flagged_fields = {}
|
637
638
|
if not self.infer:
|
638
639
|
field_types = get_schema_fields(schema)
|
639
640
|
for field, field_type in field_types.items():
|
@@ -659,13 +660,17 @@ class Executor:
|
|
659
660
|
else:
|
660
661
|
browser_print("Scraping data...")
|
661
662
|
result = scrape_page(self.get_visible_html(), schema, self.task, html)
|
663
|
+
print(result)
|
662
664
|
scraped_data = result["data"]
|
663
665
|
nodes = result["nodes"]
|
666
|
+
css_selectors = result["css"]
|
667
|
+
xpath_selectors = result["xpath"]
|
668
|
+
flagged_fields = result["flagged"]
|
664
669
|
if not scraped_data or not nodes:
|
665
670
|
raise ParsagonException(
|
666
671
|
f"Parsagon could not find any data on the page that would fit the format {schema}. You can try rephrasing your prompt, or you can run Parsagon in manual mode to click on elements you want to scrape."
|
667
672
|
)
|
668
|
-
browser_print(f"Scraped data:\n{scraped_data}")
|
673
|
+
browser_print(f"Scraped data:\n{scraped_data}\nFlagged fields: {[k for k, v in flagged_fields.items() if v]}\n")
|
669
674
|
|
670
675
|
custom_function = CustomFunction(
|
671
676
|
"scrape_data",
|
@@ -679,6 +684,7 @@ class Executor:
|
|
679
684
|
"nodes": nodes,
|
680
685
|
"css_selectors": css_selectors,
|
681
686
|
"xpath_selectors": xpath_selectors,
|
687
|
+
"flagged_fields": flagged_fields,
|
682
688
|
"scraped_data": copy.deepcopy(scraped_data),
|
683
689
|
}
|
684
690
|
],
|
parsagon/main.py
CHANGED
@@ -5,10 +5,10 @@ parsagon/create.py,sha256=BERrBviwMvifg5OwApqdanvULJHHk39fIvnTCZN3Xkk,4432
|
|
5
5
|
parsagon/custom_function.py,sha256=oEj28qItaHUnsvLIHD7kg5QL3J3aO6rW6xKKP-H-Drs,770
|
6
6
|
parsagon/edit.py,sha256=5gtnx0gNB7Gvz8ET00SczE-ZS0TomN1um6uObP-OObE,3120
|
7
7
|
parsagon/exceptions.py,sha256=tG1vnpmUN1GdJ1GSpe1MaWH3zWmFLZCwtOfEGu8qPP0,910
|
8
|
-
parsagon/executor.py,sha256=
|
8
|
+
parsagon/executor.py,sha256=bnwAksrXd4qbWkeV8-JfR3ASreajjGfiNZQRmZM-42Q,29554
|
9
9
|
parsagon/gui_entry.py,sha256=bqG9K0CArXWWwDGoT8aV17YLNM8MfjSf6SJ_B3QbNeA,671
|
10
10
|
parsagon/highlights.js,sha256=2UDfUApblU9xtGgTLCq4X7rHRV0wcqDSSFZPmJS6fJg,16643
|
11
|
-
parsagon/main.py,sha256=
|
11
|
+
parsagon/main.py,sha256=LIUE3sfiU7cuz44jgTTfSG807EHJa8avCKtJS1fF18s,9863
|
12
12
|
parsagon/print.py,sha256=-7iVKil0W9e8zX1EJMcdlqNdfpmfPxKTBtZfwzWpGYU,4106
|
13
13
|
parsagon/runs.py,sha256=gi36oak3RxKo1BxB0Bc6GV8K06vrrBrEXu3NYkIVPwY,8633
|
14
14
|
parsagon/secrets.py,sha256=72dr-6q1q2ATBkE75fT18tcvwDM-4nymTb9NDVwjHTE,545
|
@@ -25,8 +25,8 @@ parsagon/tests/test_invalid_args.py,sha256=TAFdHGy92lUxjljPrtODOuEGVss6rn-F5GvEK
|
|
25
25
|
parsagon/tests/test_pipeline_operations.py,sha256=aEwZNtIwOl9X7jdLDLB4YEdgMp7_x8PXCINAE7RT4NY,805
|
26
26
|
parsagon/tests/test_print.py,sha256=BG7f55YDBoL0S7k291-so_Gje_hUAQOkB-jh-bEYsJY,198
|
27
27
|
parsagon/tests/test_secrets.py,sha256=Ctsscl2tmMTZcFAy5dnyqUlgTov2UharZgLpbRCLdEg,2662
|
28
|
-
parsagon-0.
|
29
|
-
parsagon-0.
|
30
|
-
parsagon-0.
|
31
|
-
parsagon-0.
|
32
|
-
parsagon-0.
|
28
|
+
parsagon-0.15.0.dist-info/METADATA,sha256=OczoK1yvt48d_icr1qpNKuG4D7VC8Fu7J2DuX3p0gRg,2555
|
29
|
+
parsagon-0.15.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
30
|
+
parsagon-0.15.0.dist-info/entry_points.txt,sha256=I1UlPUb4oY2k9idkI8kvdkEcrjKGRSOl5pMbA6uu6kw,48
|
31
|
+
parsagon-0.15.0.dist-info/top_level.txt,sha256=ih5uYQzW4qjhRKppys-WiHLIbXVZ99YdqDcfAtlcQwk,9
|
32
|
+
parsagon-0.15.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|