parsagon 0.9.6__tar.gz → 0.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {parsagon-0.9.6 → parsagon-0.10.1}/PKG-INFO +16 -1
- {parsagon-0.9.6 → parsagon-0.10.1}/pyproject.toml +1 -1
- parsagon-0.10.1/src/parsagon/__init__.py +1 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/api.py +5 -1
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/main.py +35 -1
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/PKG-INFO +16 -1
- parsagon-0.9.6/src/parsagon/__init__.py +0 -1
- {parsagon-0.9.6 → parsagon-0.10.1}/README.md +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/setup.cfg +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/__init__.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/custom_function.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/exceptions.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/executor.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/highlights.js +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/settings.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/__init__.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/api_mocks.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/cli_mocks.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/conftest.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_executor.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_invalid_args.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_pipeline_operations.py +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/SOURCES.txt +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/dependency_links.txt +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/entry_points.txt +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/requires.txt +0 -0
- {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
9
9
|
Requires-Python: >=3.8
|
10
10
|
Description-Content-Type: text/markdown
|
11
|
+
Requires-Dist: selenium==4.9.1
|
12
|
+
Requires-Dist: lxml==4.9.2
|
13
|
+
Requires-Dist: httpx==0.24.1
|
14
|
+
Requires-Dist: halo==0.0.31
|
15
|
+
Requires-Dist: pandas==1.4.2
|
16
|
+
Requires-Dist: PyVirtualDisplay==3.0
|
17
|
+
Requires-Dist: selenium-wire==5.1.0
|
18
|
+
Requires-Dist: cssselect==1.1.0
|
19
|
+
Requires-Dist: undetected-chromedriver==3.5.2
|
20
|
+
Requires-Dist: webdriver-manager==4.0.0
|
21
|
+
Requires-Dist: jsonpath-ng==1.5.3
|
22
|
+
Requires-Dist: usaddress==0.5.10
|
23
|
+
Requires-Dist: simplejson==3.19.1
|
11
24
|
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest==7.3.2; extra == "dev"
|
26
|
+
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
12
27
|
|
13
28
|
# parsagon
|
14
29
|
|
@@ -0,0 +1 @@
|
|
1
|
+
from parsagon.main import create, update, detail, run, delete, get_product, get_review_article, get_article_list
|
@@ -58,7 +58,7 @@ def get_program_sketches(description):
|
|
58
58
|
"""
|
59
59
|
Gets a program sketches (full and abridged) from a description.
|
60
60
|
:param description: Description in natural language that will be used to generate the scraping program.
|
61
|
-
:return: A dict with keys "full" and "
|
61
|
+
:return: A dict with keys "full", "abridged", and "pseudocode" for the respective program ASTs and pseudocode.
|
62
62
|
"""
|
63
63
|
return _api_call(httpx.post, "/transformers/get-program-sketch/", json={"description": description})
|
64
64
|
|
@@ -197,3 +197,7 @@ def get_run(run_id):
|
|
197
197
|
httpx.get,
|
198
198
|
f"/pipelines/runs/{run_id}/",
|
199
199
|
)
|
200
|
+
|
201
|
+
|
202
|
+
def poll_data(url, page_type):
|
203
|
+
return _api_call(httpx.post, "/extract/", json={"url": url, "page_type": page_type})
|
@@ -17,6 +17,7 @@ from parsagon.api import (
|
|
17
17
|
get_pipelines,
|
18
18
|
get_pipeline_code,
|
19
19
|
get_run,
|
20
|
+
poll_data,
|
20
21
|
APIException,
|
21
22
|
)
|
22
23
|
from parsagon.exceptions import ParsagonException
|
@@ -189,10 +190,11 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
|
|
189
190
|
|
190
191
|
logger.info("Analyzing task description...")
|
191
192
|
program_sketches = get_program_sketches(task)
|
192
|
-
logger.info("Created a program based on task description. Now demonstrating what the program does:\n")
|
193
193
|
|
194
194
|
full_program = program_sketches["full"]
|
195
195
|
abridged_program = program_sketches["abridged"]
|
196
|
+
pseudocode = program_sketches["pseudocode"]
|
197
|
+
logger.info(f"Created a program based on task description. Program does the following:\n\n{pseudocode}\n\nNow executing the program to identify web elements to be scraped:\n")
|
196
198
|
logger.debug("Program:\n%s", abridged_program)
|
197
199
|
abridged_program += "\n\noutput = func()\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n" # Make the program runnable
|
198
200
|
|
@@ -250,6 +252,14 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
|
|
250
252
|
executor = Executor(headless=headless, infer=infer)
|
251
253
|
executor.execute(abridged_program)
|
252
254
|
|
255
|
+
while True:
|
256
|
+
program_name_input = input(f"Type \"{program_name}\" to update this program, or press enter without typing a name to CANCEL: ")
|
257
|
+
if not program_name_input:
|
258
|
+
logger.info("Canceled update.")
|
259
|
+
return
|
260
|
+
if program_name_input == program_name:
|
261
|
+
break
|
262
|
+
|
253
263
|
pipeline_id = pipeline["id"]
|
254
264
|
try:
|
255
265
|
for call_id, custom_function in executor.custom_functions.items():
|
@@ -343,3 +353,27 @@ def setup(verbose=False):
|
|
343
353
|
logger.error("\nCancelled operation.")
|
344
354
|
return
|
345
355
|
logger.info("Setup complete.")
|
356
|
+
|
357
|
+
|
358
|
+
def _get_data(url, page_type, timeout):
|
359
|
+
start_time = time.time()
|
360
|
+
with Halo(text="Extracting data...", spinner="dots"):
|
361
|
+
while time.time() - start_time <= timeout:
|
362
|
+
result = poll_data(url, page_type)
|
363
|
+
if result["done"]:
|
364
|
+
return result["result"]
|
365
|
+
time.sleep(15)
|
366
|
+
logger.info("No data found")
|
367
|
+
return None
|
368
|
+
|
369
|
+
|
370
|
+
def get_product(url, timeout=300):
|
371
|
+
return _get_data(url, "PRODUCT_DETAIL", timeout)
|
372
|
+
|
373
|
+
|
374
|
+
def get_review_article(url, timeout=300):
|
375
|
+
return _get_data(url, "REVIEW_ARTICLE_DETAIL", timeout)
|
376
|
+
|
377
|
+
|
378
|
+
def get_article_list(url, timeout=300):
|
379
|
+
return _get_data(url, "ARTICLE_LIST", timeout)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: parsagon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.10.1
|
4
4
|
Summary: Allows you to create browser automations with natural language
|
5
5
|
Author-email: Sandy Suh <sandy@parsagon.io>
|
6
6
|
Project-URL: Homepage, https://parsagon.io
|
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
|
|
8
8
|
Classifier: Operating System :: OS Independent
|
9
9
|
Requires-Python: >=3.8
|
10
10
|
Description-Content-Type: text/markdown
|
11
|
+
Requires-Dist: selenium==4.9.1
|
12
|
+
Requires-Dist: lxml==4.9.2
|
13
|
+
Requires-Dist: httpx==0.24.1
|
14
|
+
Requires-Dist: halo==0.0.31
|
15
|
+
Requires-Dist: pandas==1.4.2
|
16
|
+
Requires-Dist: PyVirtualDisplay==3.0
|
17
|
+
Requires-Dist: selenium-wire==5.1.0
|
18
|
+
Requires-Dist: cssselect==1.1.0
|
19
|
+
Requires-Dist: undetected-chromedriver==3.5.2
|
20
|
+
Requires-Dist: webdriver-manager==4.0.0
|
21
|
+
Requires-Dist: jsonpath-ng==1.5.3
|
22
|
+
Requires-Dist: usaddress==0.5.10
|
23
|
+
Requires-Dist: simplejson==3.19.1
|
11
24
|
Provides-Extra: dev
|
25
|
+
Requires-Dist: pytest==7.3.2; extra == "dev"
|
26
|
+
Requires-Dist: pytest-mock==3.11.1; extra == "dev"
|
12
27
|
|
13
28
|
# parsagon
|
14
29
|
|
@@ -1 +0,0 @@
|
|
1
|
-
from parsagon.main import create, detail, run, delete
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|