PyPI - scrape-cli - Versions diffs - 1.2.2__tar.gz → 1.2.3__tar.gz - Mend

scrape-cli 1.2.2tar.gz → 1.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrape_cli
-Version: 1.2.2
+Version: 1.2.3
 Summary: It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector.
 Author-email: Andrea Borruso <aborruso@gmail.com>
 Project-URL: Homepage, https://github.com/aborruso/scrape-cli

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "scrape_cli"
-version = "1.2.2"
+version = "1.2.3"
 description = "It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector."
 readme = "README.md"
 authors = [

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli/__init__.py RENAMED Viewed

@@ -4,7 +4,7 @@ scrape-cli - A command-line tool to extract HTML elements using XPath or CSS3 se
 from scrape_cli.scrape import main
-__version__ = "1.2.2"
+__version__ = "1.2.3"
 __author__ = "Andrea Borruso"
 __author_email__ = "aborruso@gmail.com"

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli/scrape.py RENAMED Viewed

@@ -49,7 +49,7 @@ def convert_css_to_xpath(expression):
     try:
         return GenericTranslator().css_to_xpath(expression)
     except Exception as e:
-        print(f"Error converting CSS selector to XPath: {e}")
+        print(f"Error converting CSS selector to XPath: {e}", file=sys.stderr)
         sys.exit(1)
 def is_xpath(expression):
@@ -98,7 +98,18 @@ def main():
     # Command line argument parser definition
     parser = argparse.ArgumentParser(
         description='Extract HTML elements using an XPath query or CSS3 selector.',
-        epilog='Example: cat page.html | python scrape.py -e "//a/@href"'
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''\
+examples:
+  scrape -e "//h1" file.html                      XPath expression
+  scrape -e "h1.title" file.html                  CSS selector
+  scrape -e "//a" -a href file.html               extract attribute
+  scrape -t file.html                             extract all text
+  scrape -e "//h1" https://example.com            fetch from URL
+  cat file.html | scrape -e "//h1"                read from stdin
+  scrape -e "//h1" -x file.html                   check existence (exit 0/1)
+  scrape -be "//article" file.html                wrap output in <html><body>
+'''
     )
     parser.add_argument('--version', action='version', version=f'%(prog)s {__version__}')
@@ -134,10 +145,13 @@ def main():
     # Check that at least one expression is provided by the user (unless using -t option)
     if not args.expression and not args.text:
-        parser.print_help()
-        sys.exit(
-            "Error: you must provide at least one XPath query or CSS3 selector using the -e option, or use -t to extract text."
+        print(
+            "Error: no expression specified. Use -e \"//selector\" or -t.\n"
+            "  scrape -e \"//h1\" file.html\n"
+            "  scrape -t file.html",
+            file=sys.stderr
         )
+        sys.exit(1)
     # Determine the source of the input: URL, file, or stdin
     if args.html:
@@ -150,29 +164,29 @@ def main():
                 response.raise_for_status()
                 inp = response.content
             except requests.RequestException as e:
-                print(f"Error downloading HTML: {e}")
+                print(f"Error downloading HTML: {e}", file=sys.stderr)
                 sys.exit(1)
         else:
             # If the input is a local file, try to open it
             try:
                 inp = open(args.html, 'rb').read()
             except FileNotFoundError:
-                print(f"Error: The file '{args.html}' was not found.")
+                print(f"Error: The file '{args.html}' was not found.", file=sys.stderr)
                 sys.exit(1)
     else:
         # If the input is from stdin
         try:
             inp = sys.stdin.buffer.read()
             if not inp:
-                print("Error: No input received from stdin")
+                print("Error: No input received from stdin", file=sys.stderr)
                 sys.exit(1)
         except Exception as e:
-            print(f"Error reading input: {e}")
+            print(f"Error reading input: {e}", file=sys.stderr)
             sys.exit(1)
     # Check for empty or invalid input
     if not inp:
-        print("Error: Input is empty or invalid")
+        print("Error: Input is empty or invalid", file=sys.stderr)
         sys.exit(1)
     # Convert CSS selectors to XPath if necessary
@@ -223,7 +237,7 @@ def main():
                 document = etree.fromstring(inp, html_parser)
     except (etree.XMLSyntaxError, UnicodeDecodeError) as e:
         # Print an error in case of syntax issues in the HTML
-        print(f"Error parsing HTML: {e}")
+        print(f"Error parsing HTML: {e}", file=sys.stderr)
         sys.exit(1)
     results = []

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scrape_cli
-Version: 1.2.2
+Version: 1.2.3
 Summary: It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector.
 Author-email: Andrea Borruso <aborruso@gmail.com>
 Project-URL: Homepage, https://github.com/aborruso/scrape-cli

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/tests/test_scrape.py RENAMED Viewed

@@ -158,21 +158,21 @@ def test_empty_stdin_returns_error():
     result = run_scrape("-e", "//p", input_data="")
     assert result.returncode == 1
-    assert "Error: No input received from stdin" in result.stdout
+    assert "Error: No input received from stdin" in result.stderr
 def test_missing_file_returns_error():
     result = run_scrape("resources/this-file-does-not-exist.html", "-e", "//p")
     assert result.returncode == 1
-    assert "was not found" in result.stdout
+    assert "was not found" in result.stderr
 def test_missing_expression_without_text_returns_error():
     result = run_scrape(str(TEST_HTML))
     assert result.returncode == 1
-    assert "you must provide at least one XPath query or CSS3 selector" in result.stderr
+    assert "no expression specified" in result.stderr
 def test_incorrect_eb_order_exits_with_specific_message():
@@ -186,7 +186,7 @@ def test_invalid_css_selector_fails_conversion():
     result = run_scrape(str(TEST_HTML), "-e", "div[")
     assert result.returncode == 1
-    assert "Error converting CSS selector to XPath" in result.stdout
+    assert "Error converting CSS selector to XPath" in result.stderr
 def test_url_input_downloads_and_extracts_text():

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/README.md RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/entry_points.txt RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/requires.txt RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/scrape_cli.egg-info/top_level.txt RENAMED Viewed

File without changes

{scrape_cli-1.2.2 → scrape_cli-1.2.3}/setup.cfg RENAMED Viewed

File without changes

scrape-cli 1.2.2__tar.gz → 1.2.3__tar.gz

scrape-cli 1.2.2tar.gz → 1.2.3tar.gz