parsagon 0.9.8__tar.gz → 0.10.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsagon-0.9.8 → parsagon-0.10.2}/PKG-INFO +16 -1
- {parsagon-0.9.8 → parsagon-0.10.2}/pyproject.toml +1 -1
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/__init__.py +1 -1
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/api.py +5 -5
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/main.py +19 -6
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/PKG-INFO +16 -1
- {parsagon-0.9.8 → parsagon-0.10.2}/README.md +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/setup.cfg +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/__init__.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/custom_function.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/exceptions.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/executor.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/highlights.js +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/settings.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/__init__.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/api_mocks.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/cli_mocks.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/conftest.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/test_executor.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/test_invalid_args.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon/tests/test_pipeline_operations.py +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/SOURCES.txt +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/dependency_links.txt +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/entry_points.txt +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/requires.txt +0 -0
- {parsagon-0.9.8 → parsagon-0.10.2}/src/parsagon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.2
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
9
9
|
Requires-Python: >=3.8
|
10
10
|
Description-Content-Type: text/markdown
|
11
|
+
Requires-Dist: selenium==4.9.1
|
12
|
+
Requires-Dist: lxml==4.9.2
|
13
|
+
Requires-Dist: httpx==0.24.1
|
14
|
+
Requires-Dist: halo==0.0.31
|
15
|
+
Requires-Dist: pandas==1.4.2
|
16
|
+
Requires-Dist: PyVirtualDisplay==3.0
|
17
|
+
Requires-Dist: selenium-wire==5.1.0
|
18
|
+
Requires-Dist: cssselect==1.1.0
|
19
|
+
Requires-Dist: undetected-chromedriver==3.5.2
|
20
|
+
Requires-Dist: webdriver-manager==4.0.0
|
21
|
+
Requires-Dist: jsonpath-ng==1.5.3
|
22
|
+
Requires-Dist: usaddress==0.5.10
|
23
|
+
Requires-Dist: simplejson==3.19.1
|
11
24
|
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest==7.3.2; extra == "dev"
|
26
|
+
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
12
27
|
|
13
28
|
# parsagon
|
14
29
|
|
@@ -1 +1 @@
|
|
1
|
-
from parsagon.main import create, update, detail, run, delete, get_product
|
1
|
+
from parsagon.main import create, update, detail, run, delete, get_product, get_review_article, get_article_list
|
@@ -58,7 +58,7 @@ def get_program_sketches(description):
|
|
58
58
|
"""
|
59
59
|
Gets a program sketches (full and abridged) from a description.
|
60
60
|
:param description: Description in natural language that will be used to generate the scraping program.
|
61
|
-
:return: A dict with keys "full" and "
|
61
|
+
:return: A dict with keys "full", "abridged", and "pseudocode" for the respective program ASTs and pseudocode.
|
62
62
|
"""
|
63
63
|
return _api_call(httpx.post, "/transformers/get-program-sketch/", json={"description": description})
|
64
64
|
|
@@ -131,9 +131,9 @@ def get_bool_about_data(data, question):
|
|
131
131
|
return data["result"]
|
132
132
|
|
133
133
|
|
134
|
-
def create_pipeline(name, description, program_sketch):
|
134
|
+
def create_pipeline(name, description, program_sketch, pseudocode):
|
135
135
|
return _api_call(
|
136
|
-
httpx.post, "/pipelines/", json={"name": name, "description": description, "program_sketch": program_sketch}
|
136
|
+
httpx.post, "/pipelines/", json={"name": name, "description": description, "program_sketch": program_sketch, "pseudocode": pseudocode}
|
137
137
|
)
|
138
138
|
|
139
139
|
|
@@ -199,5 +199,5 @@ def get_run(run_id):
|
|
199
199
|
)
|
200
200
|
|
201
201
|
|
202
|
-
def
|
203
|
-
return _api_call(httpx.post, "/extract/
|
202
|
+
def poll_data(url, page_type):
|
203
|
+
return _api_call(httpx.post, "/extract/", json={"url": url, "page_type": page_type})
|
@@ -17,7 +17,7 @@ from parsagon.api import (
|
|
17
17
|
get_pipelines,
|
18
18
|
get_pipeline_code,
|
19
19
|
get_run,
|
20
|
-
|
20
|
+
poll_data,
|
21
21
|
APIException,
|
22
22
|
)
|
23
23
|
from parsagon.exceptions import ParsagonException
|
@@ -190,10 +190,11 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
|
|
190
190
|
|
191
191
|
logger.info("Analyzing task description...")
|
192
192
|
program_sketches = get_program_sketches(task)
|
193
|
-
logger.info("Created a program based on task description. Now demonstrating what the program does:\n")
|
194
193
|
|
195
194
|
full_program = program_sketches["full"]
|
196
195
|
abridged_program = program_sketches["abridged"]
|
196
|
+
pseudocode = program_sketches["pseudocode"]
|
197
|
+
logger.info(f"Created a program based on task description. Program does the following:\n\n{pseudocode}\n\nNow executing the program to identify web elements to be scraped:\n")
|
197
198
|
logger.debug("Program:\n%s", abridged_program)
|
198
199
|
abridged_program += "\n\noutput = func()\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n" # Make the program runnable
|
199
200
|
|
@@ -208,7 +209,7 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
|
|
208
209
|
if program_name:
|
209
210
|
logger.info(f"Saving program as {program_name}")
|
210
211
|
try:
|
211
|
-
pipeline = create_pipeline(program_name, task, full_program)
|
212
|
+
pipeline = create_pipeline(program_name, task, full_program, pseudocode)
|
212
213
|
except APIException as e:
|
213
214
|
if isinstance(e.value, list) and "Program with name already exists" in e.value:
|
214
215
|
logger.info("A program with this name already exists. Please choose another name.")
|
@@ -354,13 +355,25 @@ def setup(verbose=False):
|
|
354
355
|
logger.info("Setup complete.")
|
355
356
|
|
356
357
|
|
357
|
-
def
|
358
|
+
def _get_data(url, page_type, timeout):
|
358
359
|
start_time = time.time()
|
359
360
|
with Halo(text="Extracting data...", spinner="dots"):
|
360
361
|
while time.time() - start_time <= timeout:
|
361
|
-
result =
|
362
|
+
result = poll_data(url, page_type)
|
362
363
|
if result["done"]:
|
363
364
|
return result["result"]
|
364
|
-
time.sleep(
|
365
|
+
time.sleep(15)
|
365
366
|
logger.info("No data found")
|
366
367
|
return None
|
368
|
+
|
369
|
+
|
370
|
+
def get_product(url, timeout=300):
|
371
|
+
return _get_data(url, "PRODUCT_DETAIL", timeout)
|
372
|
+
|
373
|
+
|
374
|
+
def get_review_article(url, timeout=300):
|
375
|
+
return _get_data(url, "REVIEW_ARTICLE_DETAIL", timeout)
|
376
|
+
|
377
|
+
|
378
|
+
def get_article_list(url, timeout=300):
|
379
|
+
return _get_data(url, "ARTICLE_LIST", timeout)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.2
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
9
9
|
Requires-Python: >=3.8
|
10
10
|
Description-Content-Type: text/markdown
|
11
|
+
Requires-Dist: selenium==4.9.1
|
12
|
+
Requires-Dist: lxml==4.9.2
|
13
|
+
Requires-Dist: httpx==0.24.1
|
14
|
+
Requires-Dist: halo==0.0.31
|
15
|
+
Requires-Dist: pandas==1.4.2
|
16
|
+
Requires-Dist: PyVirtualDisplay==3.0
|
17
|
+
Requires-Dist: selenium-wire==5.1.0
|
18
|
+
Requires-Dist: cssselect==1.1.0
|
19
|
+
Requires-Dist: undetected-chromedriver==3.5.2
|
20
|
+
Requires-Dist: webdriver-manager==4.0.0
|
21
|
+
Requires-Dist: jsonpath-ng==1.5.3
|
22
|
+
Requires-Dist: usaddress==0.5.10
|
23
|
+
Requires-Dist: simplejson==3.19.1
|
11
24
|
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest==7.3.2; extra == "dev"
|
26
|
+
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
12
27
|
|
13
28
|
# parsagon
|
14
29
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|