parsagon 0.9.6__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {parsagon-0.9.6 → parsagon-0.10.1}/PKG-INFO +16 -1
  2. {parsagon-0.9.6 → parsagon-0.10.1}/pyproject.toml +1 -1
  3. parsagon-0.10.1/src/parsagon/__init__.py +1 -0
  4. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/api.py +5 -1
  5. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/main.py +35 -1
  6. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/PKG-INFO +16 -1
  7. parsagon-0.9.6/src/parsagon/__init__.py +0 -1
  8. {parsagon-0.9.6 → parsagon-0.10.1}/README.md +0 -0
  9. {parsagon-0.9.6 → parsagon-0.10.1}/setup.cfg +0 -0
  10. {parsagon-0.9.6 → parsagon-0.10.1}/src/__init__.py +0 -0
  11. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/custom_function.py +0 -0
  12. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/exceptions.py +0 -0
  13. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/executor.py +0 -0
  14. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/highlights.js +0 -0
  15. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/settings.py +0 -0
  16. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/__init__.py +0 -0
  17. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/api_mocks.py +0 -0
  18. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/cli_mocks.py +0 -0
  19. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/conftest.py +0 -0
  20. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_executor.py +0 -0
  21. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_invalid_args.py +0 -0
  22. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon/tests/test_pipeline_operations.py +0 -0
  23. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/SOURCES.txt +0 -0
  24. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/dependency_links.txt +0 -0
  25. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/entry_points.txt +0 -0
  26. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/requires.txt +0 -0
  27. {parsagon-0.9.6 → parsagon-0.10.1}/src/parsagon.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.9.6
3
+ Version: 0.10.1
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
8
8
  Classifier: Operating System :: OS Independent
9
9
  Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
+ Requires-Dist: selenium==4.9.1
12
+ Requires-Dist: lxml==4.9.2
13
+ Requires-Dist: httpx==0.24.1
14
+ Requires-Dist: halo==0.0.31
15
+ Requires-Dist: pandas==1.4.2
16
+ Requires-Dist: PyVirtualDisplay==3.0
17
+ Requires-Dist: selenium-wire==5.1.0
18
+ Requires-Dist: cssselect==1.1.0
19
+ Requires-Dist: undetected-chromedriver==3.5.2
20
+ Requires-Dist: webdriver-manager==4.0.0
21
+ Requires-Dist: jsonpath-ng==1.5.3
22
+ Requires-Dist: usaddress==0.5.10
23
+ Requires-Dist: simplejson==3.19.1
11
24
  Provides-Extra: dev
25
+ Requires-Dist: pytest==7.3.2; extra == "dev"
26
+ Requires-Dist: pytest-mock==3.11.1; extra == "dev"
12
27
 
13
28
  # parsagon
14
29
 
@@ -16,7 +16,7 @@ line-length = 120
16
16
 
17
17
  [project]
18
18
  name = "parsagon"
19
- version = "0.9.6"
19
+ version = "0.10.1"
20
20
  description = "Allows you to create browser automations with natural language"
21
21
  readme = "README.md"
22
22
  requires-python = ">=3.8"
@@ -0,0 +1 @@
1
+ from parsagon.main import create, update, detail, run, delete, get_product, get_review_article, get_article_list
@@ -58,7 +58,7 @@ def get_program_sketches(description):
58
58
  """
59
59
  Gets a program sketches (full and abridged) from a description.
60
60
  :param description: Description in natural language that will be used to generate the scraping program.
61
- :return: A dict with keys "full" and "abridged" for the respective program ASTs.
61
+ :return: A dict with keys "full", "abridged", and "pseudocode" for the respective program ASTs and pseudocode.
62
62
  """
63
63
  return _api_call(httpx.post, "/transformers/get-program-sketch/", json={"description": description})
64
64
 
@@ -197,3 +197,7 @@ def get_run(run_id):
197
197
  httpx.get,
198
198
  f"/pipelines/runs/{run_id}/",
199
199
  )
200
+
201
+
202
+ def poll_data(url, page_type):
203
+ return _api_call(httpx.post, "/extract/", json={"url": url, "page_type": page_type})
@@ -17,6 +17,7 @@ from parsagon.api import (
17
17
  get_pipelines,
18
18
  get_pipeline_code,
19
19
  get_run,
20
+ poll_data,
20
21
  APIException,
21
22
  )
22
23
  from parsagon.exceptions import ParsagonException
@@ -189,10 +190,11 @@ def create(task=None, program_name=None, headless=False, infer=False, verbose=Fa
189
190
 
190
191
  logger.info("Analyzing task description...")
191
192
  program_sketches = get_program_sketches(task)
192
- logger.info("Created a program based on task description. Now demonstrating what the program does:\n")
193
193
 
194
194
  full_program = program_sketches["full"]
195
195
  abridged_program = program_sketches["abridged"]
196
+ pseudocode = program_sketches["pseudocode"]
197
+ logger.info(f"Created a program based on task description. Program does the following:\n\n{pseudocode}\n\nNow executing the program to identify web elements to be scraped:\n")
196
198
  logger.debug("Program:\n%s", abridged_program)
197
199
  abridged_program += "\n\noutput = func()\nprint(f'Program finished and returned a value of:\\n{output}\\n')\n" # Make the program runnable
198
200
 
@@ -250,6 +252,14 @@ def update(program_name, variables={}, headless=False, infer=False, replace=Fals
250
252
  executor = Executor(headless=headless, infer=infer)
251
253
  executor.execute(abridged_program)
252
254
 
255
+ while True:
256
+ program_name_input = input(f"Type \"{program_name}\" to update this program, or press enter without typing a name to CANCEL: ")
257
+ if not program_name_input:
258
+ logger.info("Canceled update.")
259
+ return
260
+ if program_name_input == program_name:
261
+ break
262
+
253
263
  pipeline_id = pipeline["id"]
254
264
  try:
255
265
  for call_id, custom_function in executor.custom_functions.items():
@@ -343,3 +353,27 @@ def setup(verbose=False):
343
353
  logger.error("\nCancelled operation.")
344
354
  return
345
355
  logger.info("Setup complete.")
356
+
357
+
358
+ def _get_data(url, page_type, timeout):
359
+ start_time = time.time()
360
+ with Halo(text="Extracting data...", spinner="dots"):
361
+ while time.time() - start_time <= timeout:
362
+ result = poll_data(url, page_type)
363
+ if result["done"]:
364
+ return result["result"]
365
+ time.sleep(15)
366
+ logger.info("No data found")
367
+ return None
368
+
369
+
370
+ def get_product(url, timeout=300):
371
+ return _get_data(url, "PRODUCT_DETAIL", timeout)
372
+
373
+
374
+ def get_review_article(url, timeout=300):
375
+ return _get_data(url, "REVIEW_ARTICLE_DETAIL", timeout)
376
+
377
+
378
+ def get_article_list(url, timeout=300):
379
+ return _get_data(url, "ARTICLE_LIST", timeout)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: parsagon
3
- Version: 0.9.6
3
+ Version: 0.10.1
4
4
  Summary: Allows you to create browser automations with natural language
5
5
  Author-email: Sandy Suh <sandy@parsagon.io>
6
6
  Project-URL: Homepage, https://parsagon.io
@@ -8,7 +8,22 @@ Classifier: Programming Language :: Python :: 3
8
8
  Classifier: Operating System :: OS Independent
9
9
  Requires-Python: >=3.8
10
10
  Description-Content-Type: text/markdown
11
+ Requires-Dist: selenium==4.9.1
12
+ Requires-Dist: lxml==4.9.2
13
+ Requires-Dist: httpx==0.24.1
14
+ Requires-Dist: halo==0.0.31
15
+ Requires-Dist: pandas==1.4.2
16
+ Requires-Dist: PyVirtualDisplay==3.0
17
+ Requires-Dist: selenium-wire==5.1.0
18
+ Requires-Dist: cssselect==1.1.0
19
+ Requires-Dist: undetected-chromedriver==3.5.2
20
+ Requires-Dist: webdriver-manager==4.0.0
21
+ Requires-Dist: jsonpath-ng==1.5.3
22
+ Requires-Dist: usaddress==0.5.10
23
+ Requires-Dist: simplejson==3.19.1
11
24
  Provides-Extra: dev
25
+ Requires-Dist: pytest==7.3.2; extra == "dev"
26
+ Requires-Dist: pytest-mock==3.11.1; extra == "dev"
12
27
 
13
28
  # parsagon
14
29
 
@@ -1 +0,0 @@
1
- from parsagon.main import create, detail, run, delete
File without changes
File without changes
File without changes