scrape-cli 1.2.0__tar.gz → 1.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrape_cli
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector.
5
- Home-page: https://github.com/aborruso/scrape-cli
6
- Author: Andrea Borruso
7
5
  Author-email: Andrea Borruso <aborruso@gmail.com>
8
6
  Project-URL: Homepage, https://github.com/aborruso/scrape-cli
9
7
  Classifier: Programming Language :: Python :: 3
10
8
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
9
+ Requires-Python: >=3.8
12
10
  Description-Content-Type: text/markdown
13
11
  Requires-Dist: cssselect
14
12
  Requires-Dist: lxml
15
13
  Requires-Dist: requests
16
- Dynamic: author
17
- Dynamic: home-page
18
- Dynamic: requires-python
19
14
 
20
15
  [![PyPI version](https://img.shields.io/pypi/v/scrape-cli.svg?label=PyPI%20version)](https://pypi.org/project/scrape-cli/)
21
16
  [![Python Versions](https://img.shields.io/pypi/pyversions/scrape-cli.svg)](https://pypi.org/project/scrape-cli/)
17
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aborruso/scrape-cli)
22
18
 
23
19
  # scrape cli
24
20
 
@@ -52,7 +48,7 @@ uv tool install scrape-cli
52
48
  uv pip install scrape-cli
53
49
 
54
50
  # Or run temporarily without installing
55
- uvx scrape-cli --help
51
+ uvx --from scrape-cli scrape --help
56
52
  ```
57
53
 
58
54
  ### Using pip
@@ -80,7 +76,15 @@ pip install -e .
80
76
 
81
77
  ### Using the Test HTML File
82
78
 
83
- In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios. Here are some examples:
79
+ In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios.
80
+
81
+ **Note**: You can also test directly from the URL without cloning the repository:
82
+
83
+ ```bash
84
+ scrape -e "h1" https://raw.githubusercontent.com/aborruso/scrape-cli/refs/heads/master/resources/test.html
85
+ ```
86
+
87
+ Here are some examples:
84
88
 
85
89
  1. Extract all table data:
86
90
 
@@ -226,6 +230,60 @@ scrape -te 'h1, h2, h3' resources/test.html
226
230
 
227
231
  The `-t` option automatically excludes text from `<script>` and `<style>` tags and cleans up whitespace for better readability.
228
232
 
233
+ ### JSON Output Integration
234
+
235
+ You can integrate scrape-cli with [xq](https://github.com/kislyuk/yq) (part of yq) to convert HTML output to structured JSON:
236
+
237
+ ```bash
238
+ # Extract and convert to JSON (requires -b for complete HTML)
239
+ scrape -be "a.external-link" resources/test.html | xq .
240
+ ```
241
+
242
+ Output:
243
+
244
+ ```json
245
+ {
246
+ "html": {
247
+ "body": {
248
+ "a": {
249
+ "@href": "https://example.com",
250
+ "@class": "external-link",
251
+ "#text": "Example Link"
252
+ }
253
+ }
254
+ }
255
+ }
256
+ ```
257
+
258
+ Table extraction example:
259
+
260
+ ```bash
261
+ scrape -be "table.data-table td" resources/test.html | xq .
262
+ ```
263
+
264
+ Output:
265
+
266
+ ```json
267
+ {
268
+ "html": {
269
+ "body": {
270
+ "td": [
271
+ "1",
272
+ "John Doe",
273
+ "john@example.com",
274
+ "2",
275
+ "Jane Smith",
276
+ "jane@example.com"
277
+ ]
278
+ }
279
+ }
280
+ }
281
+ ```
282
+
283
+ **Note**: The `-b` flag is mandatory to produce valid HTML with `<html>`, `<head>` and `<body>` tags.
284
+
285
+ Useful for JSON-based pipelines, APIs, databases, and processing with jq/DuckDB.
286
+
229
287
  Some notes on the commands:
230
288
 
231
289
  - `-e` to set the query
@@ -1,5 +1,6 @@
1
1
  [![PyPI version](https://img.shields.io/pypi/v/scrape-cli.svg?label=PyPI%20version)](https://pypi.org/project/scrape-cli/)
2
2
  [![Python Versions](https://img.shields.io/pypi/pyversions/scrape-cli.svg)](https://pypi.org/project/scrape-cli/)
3
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aborruso/scrape-cli)
3
4
 
4
5
  # scrape cli
5
6
 
@@ -33,7 +34,7 @@ uv tool install scrape-cli
33
34
  uv pip install scrape-cli
34
35
 
35
36
  # Or run temporarily without installing
36
- uvx scrape-cli --help
37
+ uvx --from scrape-cli scrape --help
37
38
  ```
38
39
 
39
40
  ### Using pip
@@ -61,7 +62,15 @@ pip install -e .
61
62
 
62
63
  ### Using the Test HTML File
63
64
 
64
- In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios. Here are some examples:
65
+ In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios.
66
+
67
+ **Note**: You can also test directly from the URL without cloning the repository:
68
+
69
+ ```bash
70
+ scrape -e "h1" https://raw.githubusercontent.com/aborruso/scrape-cli/refs/heads/master/resources/test.html
71
+ ```
72
+
73
+ Here are some examples:
65
74
 
66
75
  1. Extract all table data:
67
76
 
@@ -207,6 +216,60 @@ scrape -te 'h1, h2, h3' resources/test.html
207
216
 
208
217
  The `-t` option automatically excludes text from `<script>` and `<style>` tags and cleans up whitespace for better readability.
209
218
 
219
+ ### JSON Output Integration
220
+
221
+ You can integrate scrape-cli with [xq](https://github.com/kislyuk/yq) (part of yq) to convert HTML output to structured JSON:
222
+
223
+ ```bash
224
+ # Extract and convert to JSON (requires -b for complete HTML)
225
+ scrape -be "a.external-link" resources/test.html | xq .
226
+ ```
227
+
228
+ Output:
229
+
230
+ ```json
231
+ {
232
+ "html": {
233
+ "body": {
234
+ "a": {
235
+ "@href": "https://example.com",
236
+ "@class": "external-link",
237
+ "#text": "Example Link"
238
+ }
239
+ }
240
+ }
241
+ }
242
+ ```
243
+
244
+ Table extraction example:
245
+
246
+ ```bash
247
+ scrape -be "table.data-table td" resources/test.html | xq .
248
+ ```
249
+
250
+ Output:
251
+
252
+ ```json
253
+ {
254
+ "html": {
255
+ "body": {
256
+ "td": [
257
+ "1",
258
+ "John Doe",
259
+ "john@example.com",
260
+ "2",
261
+ "Jane Smith",
262
+ "jane@example.com"
263
+ ]
264
+ }
265
+ }
266
+ }
267
+ ```
268
+
269
+ **Note**: The `-b` flag is mandatory to produce valid HTML with `<html>`, `<head>` and `<body>` tags.
270
+
271
+ Useful for JSON-based pipelines, APIs, databases, and processing with jq/DuckDB.
272
+
210
273
  Some notes on the commands:
211
274
 
212
275
  - `-e` to set the query
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scrape_cli"
7
- version = "1.2.0"
7
+ version = "1.2.2"
8
8
  description = "It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector."
9
9
  readme = "README.md"
10
10
  authors = [
@@ -14,7 +14,7 @@ classifiers = [
14
14
  "Programming Language :: Python :: 3",
15
15
  "Operating System :: OS Independent",
16
16
  ]
17
- requires-python = ">=3.6"
17
+ requires-python = ">=3.8"
18
18
  dependencies = [
19
19
  "cssselect",
20
20
  "lxml",
@@ -4,7 +4,7 @@ scrape-cli - A command-line tool to extract HTML elements using XPath or CSS3 se
4
4
 
5
5
  from scrape_cli.scrape import main
6
6
 
7
- __version__ = "1.2.0"
7
+ __version__ = "1.2.2"
8
8
  __author__ = "Andrea Borruso"
9
9
  __author_email__ = "aborruso@gmail.com"
10
10
 
@@ -63,13 +63,13 @@ def is_xpath(expression):
63
63
  - Expressions wrapped in parentheses that contain XPath syntax
64
64
  """
65
65
  expr = expression.strip()
66
-
66
+
67
67
  # Direct XPath patterns
68
68
  if expr.startswith('/') or expr.startswith('//'):
69
69
  return True
70
70
  if '::' in expr:
71
71
  return True
72
-
72
+
73
73
  # Handle expressions wrapped in parentheses
74
74
  if expr.startswith('(') and expr.endswith(')'):
75
75
  # Remove outer parentheses and check inner content
@@ -78,7 +78,7 @@ def is_xpath(expression):
78
78
  return True
79
79
  if '::' in inner_expr:
80
80
  return True
81
-
81
+
82
82
  # Additional XPath indicators
83
83
  # Check for XPath-specific patterns that CSS doesn't have
84
84
  if '//' in expr or expr.startswith('/'):
@@ -91,7 +91,7 @@ def is_xpath(expression):
91
91
  return True
92
92
  if re.search(r'\b(ancestor|descendant|following|preceding|parent|child)::', expr): # XPath axes
93
93
  return True
94
-
94
+
95
95
  return False
96
96
 
97
97
  def main():
@@ -128,6 +128,8 @@ def main():
128
128
  parser.add_argument('-r', '--rawinput', action='store_true', default=False,
129
129
  help="Do not parse HTML before passing to etree (useful for CData)")
130
130
  parser.add_argument('--check-existence', dest='check_existence', action='store_true')
131
+ parser.add_argument('-u', '--user-agent', default=None,
132
+ help="Custom User-Agent string for HTTP requests")
131
133
  args = parser.parse_args()
132
134
 
133
135
  # Check that at least one expression is provided by the user (unless using -t option)
@@ -142,7 +144,9 @@ def main():
142
144
  if args.html.startswith('http://') or args.html.startswith('https://'):
143
145
  # If the input is a URL, download the HTML content
144
146
  try:
145
- response = requests.get(args.html)
147
+ ua = args.user_agent or "Mozilla/5.0 (compatible; scrape-cli/1.0)"
148
+ headers = {"User-Agent": ua}
149
+ response = requests.get(args.html, headers=headers, timeout=30)
146
150
  response.raise_for_status()
147
151
  inp = response.content
148
152
  except requests.RequestException as e:
@@ -189,7 +193,7 @@ def main():
189
193
  meta = re.search(r'<meta[^>]+charset=["\']?([\w-]+)', head)
190
194
  if meta:
191
195
  return meta.group(1)
192
- except:
196
+ except Exception:
193
197
  pass
194
198
  return None
195
199
 
@@ -1,24 +1,20 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scrape_cli
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector.
5
- Home-page: https://github.com/aborruso/scrape-cli
6
- Author: Andrea Borruso
7
5
  Author-email: Andrea Borruso <aborruso@gmail.com>
8
6
  Project-URL: Homepage, https://github.com/aborruso/scrape-cli
9
7
  Classifier: Programming Language :: Python :: 3
10
8
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.6
9
+ Requires-Python: >=3.8
12
10
  Description-Content-Type: text/markdown
13
11
  Requires-Dist: cssselect
14
12
  Requires-Dist: lxml
15
13
  Requires-Dist: requests
16
- Dynamic: author
17
- Dynamic: home-page
18
- Dynamic: requires-python
19
14
 
20
15
  [![PyPI version](https://img.shields.io/pypi/v/scrape-cli.svg?label=PyPI%20version)](https://pypi.org/project/scrape-cli/)
21
16
  [![Python Versions](https://img.shields.io/pypi/pyversions/scrape-cli.svg)](https://pypi.org/project/scrape-cli/)
17
+ [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/aborruso/scrape-cli)
22
18
 
23
19
  # scrape cli
24
20
 
@@ -52,7 +48,7 @@ uv tool install scrape-cli
52
48
  uv pip install scrape-cli
53
49
 
54
50
  # Or run temporarily without installing
55
- uvx scrape-cli --help
51
+ uvx --from scrape-cli scrape --help
56
52
  ```
57
53
 
58
54
  ### Using pip
@@ -80,7 +76,15 @@ pip install -e .
80
76
 
81
77
  ### Using the Test HTML File
82
78
 
83
- In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios. Here are some examples:
79
+ In the `resources` directory you'll find a `test.html` file that you can use to test various scraping scenarios.
80
+
81
+ **Note**: You can also test directly from the URL without cloning the repository:
82
+
83
+ ```bash
84
+ scrape -e "h1" https://raw.githubusercontent.com/aborruso/scrape-cli/refs/heads/master/resources/test.html
85
+ ```
86
+
87
+ Here are some examples:
84
88
 
85
89
  1. Extract all table data:
86
90
 
@@ -226,6 +230,60 @@ scrape -te 'h1, h2, h3' resources/test.html
226
230
 
227
231
  The `-t` option automatically excludes text from `<script>` and `<style>` tags and cleans up whitespace for better readability.
228
232
 
233
+ ### JSON Output Integration
234
+
235
+ You can integrate scrape-cli with [xq](https://github.com/kislyuk/yq) (part of yq) to convert HTML output to structured JSON:
236
+
237
+ ```bash
238
+ # Extract and convert to JSON (requires -b for complete HTML)
239
+ scrape -be "a.external-link" resources/test.html | xq .
240
+ ```
241
+
242
+ Output:
243
+
244
+ ```json
245
+ {
246
+ "html": {
247
+ "body": {
248
+ "a": {
249
+ "@href": "https://example.com",
250
+ "@class": "external-link",
251
+ "#text": "Example Link"
252
+ }
253
+ }
254
+ }
255
+ }
256
+ ```
257
+
258
+ Table extraction example:
259
+
260
+ ```bash
261
+ scrape -be "table.data-table td" resources/test.html | xq .
262
+ ```
263
+
264
+ Output:
265
+
266
+ ```json
267
+ {
268
+ "html": {
269
+ "body": {
270
+ "td": [
271
+ "1",
272
+ "John Doe",
273
+ "john@example.com",
274
+ "2",
275
+ "Jane Smith",
276
+ "jane@example.com"
277
+ ]
278
+ }
279
+ }
280
+ }
281
+ ```
282
+
283
+ **Note**: The `-b` flag is mandatory to produce valid HTML with `<html>`, `<head>` and `<body>` tags.
284
+
285
+ Useful for JSON-based pipelines, APIs, databases, and processing with jq/DuckDB.
286
+
229
287
  Some notes on the commands:
230
288
 
231
289
  - `-e` to set the query
@@ -1,6 +1,5 @@
1
1
  README.md
2
2
  pyproject.toml
3
- setup.py
4
3
  scrape_cli/__init__.py
5
4
  scrape_cli/scrape.py
6
5
  scrape_cli.egg-info/PKG-INFO
@@ -8,4 +7,5 @@ scrape_cli.egg-info/SOURCES.txt
8
7
  scrape_cli.egg-info/dependency_links.txt
9
8
  scrape_cli.egg-info/entry_points.txt
10
9
  scrape_cli.egg-info/requires.txt
11
- scrape_cli.egg-info/top_level.txt
10
+ scrape_cli.egg-info/top_level.txt
11
+ tests/test_scrape.py
@@ -0,0 +1,205 @@
1
+ import subprocess
2
+ import sys
3
+ import threading
4
+ from pathlib import Path
5
+ from http.server import BaseHTTPRequestHandler, HTTPServer
6
+
7
+ ROOT = Path(__file__).resolve().parents[1]
8
+ TEST_HTML = ROOT / "resources" / "test.html"
9
+ sys.path.insert(0, str(ROOT))
10
+
11
+ from scrape_cli.scrape import is_xpath
12
+
13
+
14
+ def run_scrape(*args, input_data=None):
15
+ cmd = [sys.executable, "-m", "scrape_cli.scrape", *args]
16
+ return subprocess.run(
17
+ cmd,
18
+ capture_output=True,
19
+ text=True,
20
+ cwd=ROOT,
21
+ input=input_data,
22
+ )
23
+
24
+
25
+ def run_test_server(html_bytes):
26
+ class Handler(BaseHTTPRequestHandler):
27
+ def do_GET(self):
28
+ self.send_response(200)
29
+ self.send_header("Content-Type", "text/html; charset=utf-8")
30
+ self.end_headers()
31
+ self.wfile.write(html_bytes)
32
+
33
+ def log_message(self, format, *args):
34
+ return
35
+
36
+ server = HTTPServer(("127.0.0.1", 0), Handler)
37
+ thread = threading.Thread(target=server.serve_forever, daemon=True)
38
+ thread.start()
39
+ return server, thread
40
+
41
+
42
+ def test_is_xpath_true_patterns():
43
+ candidates = [
44
+ "//div",
45
+ "/html/body/div",
46
+ "(//div)[1]",
47
+ "//a/@href",
48
+ "//li[2]",
49
+ "ancestor::div",
50
+ "descendant::span",
51
+ "//p/text()",
52
+ ]
53
+
54
+ for expression in candidates:
55
+ assert is_xpath(expression) is True
56
+
57
+
58
+ def test_is_xpath_false_css_patterns():
59
+ candidates = [
60
+ "div.content > a.link",
61
+ "a[href*='/about']",
62
+ "input[type='email']",
63
+ "ul.items-list li:first-child",
64
+ "div.class1.class2",
65
+ ]
66
+
67
+ for expression in candidates:
68
+ assert is_xpath(expression) is False
69
+
70
+
71
+ def test_xpath_parentheses_extracts_first_match():
72
+ result = run_scrape(str(TEST_HTML), "-e", "(//ul[@class='items-list']/li)[1]", "-t")
73
+
74
+ assert result.returncode == 0
75
+ assert result.stdout.strip() == "First item"
76
+
77
+
78
+ def test_css_attribute_selector_is_not_misclassified_as_xpath():
79
+ result = run_scrape(str(TEST_HTML), "-e", ".resource-links a[href*='github.com']", "-t")
80
+
81
+ assert result.returncode == 0
82
+ assert result.stdout.strip() == "GitHub Repository"
83
+
84
+
85
+ def test_check_existence_true_and_false():
86
+ found = run_scrape(str(TEST_HTML), "-e", "//h1", "--check-existence")
87
+ missing = run_scrape(str(TEST_HTML), "-e", "//this-node-does-not-exist", "--check-existence")
88
+
89
+ assert found.returncode == 0
90
+ assert missing.returncode == 1
91
+
92
+
93
+ def test_encoding_meta_charset_iso_8859_1(tmp_path):
94
+ html = """<!doctype html>
95
+ <html>
96
+ <head><meta charset=\"iso-8859-1\"></head>
97
+ <body><p>Perch\xe9</p></body>
98
+ </html>
99
+ """.encode("iso-8859-1")
100
+ sample = tmp_path / "latin1.html"
101
+ sample.write_bytes(html)
102
+
103
+ result = run_scrape(str(sample), "-e", "//p/text()", "-t")
104
+
105
+ assert result.returncode == 0
106
+ assert result.stdout.strip() == "Perché"
107
+
108
+
109
+ def test_argument_extracts_attribute_value():
110
+ result = run_scrape(str(TEST_HTML), "-e", "//a[@class='external-link']", "-a", "href")
111
+
112
+ assert result.returncode == 0
113
+ assert result.stdout.strip() == "https://example.com"
114
+
115
+
116
+ def test_body_flag_wraps_output_in_html_body():
117
+ result = run_scrape(str(TEST_HTML), "-e", "//h1", "-b")
118
+
119
+ assert result.returncode == 0
120
+ assert result.stdout.startswith("<!DOCTYPE html>\n<html>\n<body>\n")
121
+ assert result.stdout.strip().endswith("</body>\n</html>")
122
+ assert "<h1 id=\"main-title\">Welcome to the Test Page</h1>" in result.stdout
123
+
124
+
125
+ def test_text_flag_without_expression_extracts_body_and_skips_script():
126
+ result = run_scrape(str(TEST_HTML), "-t")
127
+
128
+ assert result.returncode == 0
129
+ assert "Welcome to the Test Page" in result.stdout
130
+ assert "document.getElementById('dynamic-content')" not in result.stdout
131
+
132
+
133
+ def test_short_check_existence_flag_x():
134
+ found = run_scrape(str(TEST_HTML), "-e", "//table", "-x")
135
+ missing = run_scrape(str(TEST_HTML), "-e", "//definitely-not-here", "-x")
136
+
137
+ assert found.returncode == 0
138
+ assert missing.returncode == 1
139
+
140
+
141
+ def test_rawinput_parses_xml_without_html_parser():
142
+ xml_data = "<root><item>one</item><item>two</item></root>"
143
+ result = run_scrape("-e", "//item[2]/text()", "-r", input_data=xml_data)
144
+
145
+ assert result.returncode == 0
146
+ assert result.stdout.strip() == "two"
147
+
148
+
149
+ def test_stdin_input_works_when_no_html_argument():
150
+ html_data = "<html><body><p>stdin-ok</p></body></html>"
151
+ result = run_scrape("-e", "//p/text()", "-t", input_data=html_data)
152
+
153
+ assert result.returncode == 0
154
+ assert result.stdout.strip() == "stdin-ok"
155
+
156
+
157
+ def test_empty_stdin_returns_error():
158
+ result = run_scrape("-e", "//p", input_data="")
159
+
160
+ assert result.returncode == 1
161
+ assert "Error: No input received from stdin" in result.stdout
162
+
163
+
164
+ def test_missing_file_returns_error():
165
+ result = run_scrape("resources/this-file-does-not-exist.html", "-e", "//p")
166
+
167
+ assert result.returncode == 1
168
+ assert "was not found" in result.stdout
169
+
170
+
171
+ def test_missing_expression_without_text_returns_error():
172
+ result = run_scrape(str(TEST_HTML))
173
+
174
+ assert result.returncode == 1
175
+ assert "you must provide at least one XPath query or CSS3 selector" in result.stderr
176
+
177
+
178
+ def test_incorrect_eb_order_exits_with_specific_message():
179
+ result = run_scrape("-eb")
180
+
181
+ assert result.returncode == 1
182
+ assert "Please use -be instead of -eb." in result.stderr
183
+
184
+
185
+ def test_invalid_css_selector_fails_conversion():
186
+ result = run_scrape(str(TEST_HTML), "-e", "div[")
187
+
188
+ assert result.returncode == 1
189
+ assert "Error converting CSS selector to XPath" in result.stdout
190
+
191
+
192
+ def test_url_input_downloads_and_extracts_text():
193
+ html_bytes = TEST_HTML.read_bytes()
194
+ server, thread = run_test_server(html_bytes)
195
+
196
+ try:
197
+ url = f"http://127.0.0.1:{server.server_address[1]}"
198
+ result = run_scrape(url, "-e", "//h1/text()", "-t")
199
+ finally:
200
+ server.shutdown()
201
+ server.server_close()
202
+ thread.join(timeout=2)
203
+
204
+ assert result.returncode == 0
205
+ assert result.stdout.strip() == "Welcome to the Test Page"
scrape_cli-1.2.0/setup.py DELETED
@@ -1,37 +0,0 @@
1
- # setup.py
2
- from setuptools import setup
3
- from pathlib import Path
4
-
5
- # Leggi il README
6
- this_directory = Path(__file__).parent
7
- long_description = (this_directory / "README.md").read_text(encoding="utf-8")
8
-
9
- setup(
10
- name="scrape_cli",
11
- version="1.1.9",
12
- description="It's a command-line tool to extract HTML elements using an XPath query or CSS3 selector.",
13
- long_description=long_description,
14
- long_description_content_type="text/markdown",
15
- author="Andrea Borruso",
16
- author_email="aborruso@gmail.com",
17
- url="https://github.com/aborruso/scrape-cli",
18
- license="MIT",
19
- packages=["scrape_cli"],
20
- package_dir={"scrape_cli": "scrape_cli"},
21
- entry_points={
22
- 'console_scripts': [
23
- 'scrape=scrape_cli.scrape:main',
24
- ],
25
- },
26
- install_requires=[
27
- "cssselect",
28
- "lxml",
29
- "requests"
30
- ],
31
- classifiers=[
32
- "Programming Language :: Python :: 3",
33
- "License :: OSI Approved :: MIT License",
34
- "Operating System :: OS Independent",
35
- ],
36
- python_requires='>=3.6',
37
- )
File without changes