amzpy 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
amzpy-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.2
2
+ Name: amzpy
3
+ Version: 0.1.0
4
+ Summary: A lightweight Amazon scraper library.
5
+ Home-page: https://github.com/theonlyanil/amzpy
6
+ Author: Anil Sardiwal
7
+ Author-email: theonlyanil@gmail.com
8
+ Keywords: amazon,scraper,web-scraping,product-data,e-commerce
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.6
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
21
+ Requires-Python: >=3.6
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: requests
24
+ Requires-Dist: beautifulsoup4
25
+ Requires-Dist: fake-useragent
26
+ Dynamic: author
27
+ Dynamic: author-email
28
+ Dynamic: classifier
29
+ Dynamic: description
30
+ Dynamic: description-content-type
31
+ Dynamic: home-page
32
+ Dynamic: keywords
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+ Dynamic: summary
36
+
37
+ # AmzPy - Amazon Product Scraper
38
+
39
+ AmzPy is a lightweight Python library for scraping product information from Amazon. It provides a simple interface to fetch product details like title, price, currency, and image URL while handling anti-bot measures automatically.
40
+
41
+ ## Features
42
+
43
+ - Easy-to-use API for scraping Amazon product data
44
+ - Supports multiple Amazon domains (.com, .in, .co.uk, etc.)
45
+ - Built-in anti-bot protection
46
+ - Automatic retries on detection
47
+ - Clean and typed Python interface
48
+
49
+ ## Installation
50
+
51
+ Install using pip:
52
+ `pip install amzpy`
53
+
54
+ ## Usage
55
+
56
+ Here's a basic example of how to use AmzPy:
57
+
58
+ ```python
59
+ from amzpy import AmazonScraper
60
+
61
+ scraper = AmazonScraper()
62
+ product_details = scraper.get_product_details("https://www.amazon.com/dp/B0D4J2QDVY")
63
+ print(product_details)
64
+ ```
65
+
66
+ This will output the product details including title, price, image URL, and currency.
67
+
68
+ Feel free to explore the codebase for more details and advanced features. Happy scraping!
69
+
70
+ Output:
71
+ ```json
72
+ {
73
+ "title": "Product Title",
74
+ "price": "299",
75
+ "currency": "$",
76
+ "img_url": "https://..."
77
+ }
78
+ ```
79
+ ## Requirements
80
+
81
+ - Python 3.6+
82
+ - requests
83
+ - beautifulsoup4
84
+ - fake-useragent
85
+
86
+ ## License
87
+
88
+ This project is licensed under the MIT License - see the LICENSE file for details.
89
+
90
+ ## Contributing
91
+
92
+ Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute to this project.
amzpy-0.1.0/README.md ADDED
@@ -0,0 +1,56 @@
1
+ # AmzPy - Amazon Product Scraper
2
+
3
+ AmzPy is a lightweight Python library for scraping product information from Amazon. It provides a simple interface to fetch product details like title, price, currency, and image URL while handling anti-bot measures automatically.
4
+
5
+ ## Features
6
+
7
+ - Easy-to-use API for scraping Amazon product data
8
+ - Supports multiple Amazon domains (.com, .in, .co.uk, etc.)
9
+ - Built-in anti-bot protection
10
+ - Automatic retries on detection
11
+ - Clean and typed Python interface
12
+
13
+ ## Installation
14
+
15
+ Install using pip:
16
+ `pip install amzpy`
17
+
18
+ ## Usage
19
+
20
+ Here's a basic example of how to use AmzPy:
21
+
22
+ ```python
23
+ from amzpy import AmazonScraper
24
+
25
+ scraper = AmazonScraper()
26
+ product_details = scraper.get_product_details("https://www.amazon.com/dp/B0D4J2QDVY")
27
+ print(product_details)
28
+ ```
29
+
30
+ This will output the product details including title, price, image URL, and currency.
31
+
32
+ Feel free to explore the codebase for more details and advanced features. Happy scraping!
33
+
34
+ Output:
35
+ ```json
36
+ {
37
+ "title": "Product Title",
38
+ "price": "299",
39
+ "currency": "$",
40
+ "img_url": "https://..."
41
+ }
42
+ ```
43
+ ## Requirements
44
+
45
+ - Python 3.6+
46
+ - requests
47
+ - beautifulsoup4
48
+ - fake-useragent
49
+
50
+ ## License
51
+
52
+ This project is licensed under the MIT License - see the LICENSE file for details.
53
+
54
+ ## Contributing
55
+
56
+ Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute to this project.
@@ -0,0 +1,21 @@
1
+ """
2
+ AmzPy - Amazon Product Scraper
3
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4
+
5
+ A lightweight Python library for scraping product information from Amazon.
6
+
7
+ Basic usage:
8
+ >>> from amzpy import AmazonScraper
9
+ >>> scraper = AmazonScraper()
10
+ >>> details = scraper.get_product_details("https://www.amazon.com/dp/B0D4J2QDVY")
11
+ >>> print(details)
12
+
13
+ :copyright: (c) 2025 by Anil Sardiwal.
14
+ :license: MIT, see LICENSE for more details.
15
+ """
16
+
17
+ from .scraper import AmazonScraper
18
+
19
+ __version__ = "0.1.0"
20
+ __author__ = "Anil Sardiwal"
21
+ __license__ = "MIT"
@@ -0,0 +1,36 @@
1
+ import requests
2
+ from typing import Optional
3
+ from fake_useragent import UserAgent
4
+
5
+ class RequestEngine:
6
+ """Handles all HTTP requests to Amazon with anti-bot measures"""
7
+
8
+ def __init__(self):
9
+ self.session = requests.Session()
10
+ self.ua = UserAgent(browsers=['Edge', 'Chrome'])
11
+ self.headers = {
12
+ 'User-Agent': self.ua.random,
13
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
14
+ 'Accept-Language': 'en-US,en;q=0.5',
15
+ 'Connection': 'keep-alive',
16
+ }
17
+
18
+ def get(self, url: str) -> Optional[str]:
19
+ """
20
+ Make a GET request with anti-bot measures
21
+
22
+ Args:
23
+ url (str): URL to fetch
24
+
25
+ Returns:
26
+ str: HTML content if successful
27
+ None: If request fails
28
+ """
29
+ try:
30
+ # Update User-Agent for each request
31
+ self.headers['User-Agent'] = self.ua.random
32
+ response = self.session.get(url, headers=self.headers)
33
+ response.raise_for_status()
34
+ return response.text
35
+ except Exception:
36
+ return None
@@ -0,0 +1,59 @@
1
+ from bs4 import BeautifulSoup
2
+ from typing import Dict, Optional
3
+ from engine import RequestEngine
4
+
5
+ def parse_product_page(html_content: str, url: str = None, engine: RequestEngine = None, max_retries: int = 0) -> Optional[Dict]:
6
+ """
7
+ Parse Amazon product page HTML and extract relevant information
8
+
9
+ Args:
10
+ html_content (str): Raw HTML content of the product page
11
+ url (str, optional): Product URL for retrying if needed
12
+ engine (RequestEngine, optional): RequestEngine instance for retries
13
+ max_retries (int): Number of remaining retry attempts
14
+
15
+ Returns:
16
+ Dict: Extracted product information (title, price, img_url, currency)
17
+ None: If parsing fails after all retries
18
+ """
19
+ if not html_content:
20
+ return None
21
+
22
+ soup = BeautifulSoup(html_content, 'html.parser')
23
+
24
+ try:
25
+ # Get title
26
+ title = soup.select_one('#productTitle')
27
+ title = title.text.strip() if title else None
28
+
29
+ # If title is None and we have retries left, try again
30
+ if title is None and max_retries > 0 and url and engine:
31
+ print(f"Retry attempt {max_retries} - Anti-bot measure detected")
32
+ new_html = engine.get(url)
33
+ return parse_product_page(new_html, url, engine, max_retries - 1)
34
+
35
+ # Get price
36
+ price_element = soup.select_one('.a-price-whole')
37
+ price = price_element.text.strip().replace(',', '') if price_element else None
38
+
39
+ # Get currency symbol
40
+ currency_element = soup.select_one('.a-price-symbol')
41
+ currency = currency_element.text.strip() if currency_element else None
42
+
43
+ # Get main product image
44
+ img_element = soup.select_one('#landingImage') or soup.select_one('#imgBlkFront')
45
+ img_url = img_element.get('src') if img_element else None
46
+
47
+ return {
48
+ "title": title,
49
+ "price": price,
50
+ "img_url": img_url,
51
+ "currency": currency
52
+ }
53
+ except Exception:
54
+ # If we have retries left, try again
55
+ if max_retries > 0 and url and engine:
56
+ print(f"Retry attempt {max_retries} - Error occurred")
57
+ new_html = engine.get(url)
58
+ return parse_product_page(new_html, url, engine, max_retries - 1)
59
+ return None
@@ -0,0 +1,52 @@
1
+ """
2
+ The main scraper module for the Amazon Product API.
3
+ """
4
+
5
+ from typing import Dict, Optional
6
+ from engine import RequestEngine
7
+ from parser import parse_product_page
8
+ from utils import parse_amazon_url
9
+
10
+ class AmazonScraper:
11
+ """Main scraper class for Amazon product data"""
12
+
13
+ def __init__(self, country_code: str = "com"):
14
+ """Initialize the Amazon scraper"""
15
+ self.base_url = f"https://www.amazon.{country_code}/"
16
+ self.engine = RequestEngine()
17
+
18
+ def get_product_details(self, url: str, max_retries: int = 3) -> Optional[Dict]:
19
+ """
20
+ Fetch details for a product using its Amazon URL
21
+
22
+ Args:
23
+ url (str): Amazon product URL
24
+ max_retries (int): Maximum retry attempts if anti-bot measures detected
25
+
26
+ Returns:
27
+ Dict: Product details including title, price, img_url, and currency
28
+ None: If URL is invalid or error occurs
29
+ """
30
+ parsed = parse_amazon_url(url)
31
+ if not parsed:
32
+ return None
33
+
34
+ base_url, product_id = parsed
35
+
36
+ # Construct product URL and get HTML
37
+ product_url = f"{base_url}dp/{product_id}"
38
+ html_content = self.engine.get(product_url)
39
+ if not html_content:
40
+ return None
41
+
42
+ # Parse the product page and return the data
43
+ return parse_product_page(html_content, product_url, self.engine, max_retries)
44
+
45
+ def main():
46
+ scraper = AmazonScraper()
47
+ url = "https://www.amazon.in/dp/B0D4J2QDVY"
48
+ details = scraper.get_product_details(url, max_retries=5)
49
+ print("Product details:", details)
50
+
51
+ if __name__ == "__main__":
52
+ main()
@@ -0,0 +1,38 @@
1
+ from typing import Tuple, Optional
2
+ import re
3
+
4
+ def parse_amazon_url(url: str) -> Optional[Tuple[str, str]]:
5
+ """
6
+ Parse Amazon product URL to extract base URL and product ID
7
+
8
+ Args:
9
+ url (str): Full Amazon product URL
10
+
11
+ Returns:
12
+ Tuple[str, str]: (base_url, product_id) if valid
13
+ None: If URL is invalid
14
+
15
+ Examples:
16
+ >>> parse_amazon_url("https://www.amazon.com/dp/B0D4J2QDVY")
17
+ ("https://www.amazon.com/", "B0D4J2QDVY")
18
+ >>> parse_amazon_url("https://www.amazon.in/product-name/dp/B0D4J2QDVY/ref=...")
19
+ ("https://www.amazon.in/", "B0D4J2QDVY")
20
+ """
21
+ # Clean up the URL
22
+ url = url.strip()
23
+
24
+ # Match Amazon domain and product ID
25
+ pattern = r'https?://(?:www\.)?amazon\.([a-z.]+)(?:/[^/]+)*?/(?:dp|gp/product)/([A-Z0-9]{10})'
26
+ match = re.search(pattern, url)
27
+
28
+ if not match:
29
+ return None
30
+
31
+ # Extract domain and construct base URL
32
+ domain = match.group(1)
33
+ base_url = f"https://www.amazon.{domain}/"
34
+
35
+ # Extract product ID
36
+ product_id = match.group(2)
37
+
38
+ return base_url, product_id
@@ -0,0 +1,92 @@
1
+ Metadata-Version: 2.2
2
+ Name: amzpy
3
+ Version: 0.1.0
4
+ Summary: A lightweight Amazon scraper library.
5
+ Home-page: https://github.com/theonlyanil/amzpy
6
+ Author: Anil Sardiwal
7
+ Author-email: theonlyanil@gmail.com
8
+ Keywords: amazon,scraper,web-scraping,product-data,e-commerce
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.6
15
+ Classifier: Programming Language :: Python :: 3.7
16
+ Classifier: Programming Language :: Python :: 3.8
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
21
+ Requires-Python: >=3.6
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: requests
24
+ Requires-Dist: beautifulsoup4
25
+ Requires-Dist: fake-useragent
26
+ Dynamic: author
27
+ Dynamic: author-email
28
+ Dynamic: classifier
29
+ Dynamic: description
30
+ Dynamic: description-content-type
31
+ Dynamic: home-page
32
+ Dynamic: keywords
33
+ Dynamic: requires-dist
34
+ Dynamic: requires-python
35
+ Dynamic: summary
36
+
37
+ # AmzPy - Amazon Product Scraper
38
+
39
+ AmzPy is a lightweight Python library for scraping product information from Amazon. It provides a simple interface to fetch product details like title, price, currency, and image URL while handling anti-bot measures automatically.
40
+
41
+ ## Features
42
+
43
+ - Easy-to-use API for scraping Amazon product data
44
+ - Supports multiple Amazon domains (.com, .in, .co.uk, etc.)
45
+ - Built-in anti-bot protection
46
+ - Automatic retries on detection
47
+ - Clean and typed Python interface
48
+
49
+ ## Installation
50
+
51
+ Install using pip:
52
+ `pip install amzpy`
53
+
54
+ ## Usage
55
+
56
+ Here's a basic example of how to use AmzPy:
57
+
58
+ ```python
59
+ from amzpy import AmazonScraper
60
+
61
+ scraper = AmazonScraper()
62
+ product_details = scraper.get_product_details("https://www.amazon.com/dp/B0D4J2QDVY")
63
+ print(product_details)
64
+ ```
65
+
66
+ This will output the product details including title, price, image URL, and currency.
67
+
68
+ Feel free to explore the codebase for more details and advanced features. Happy scraping!
69
+
70
+ Output:
71
+ ```json
72
+ {
73
+ "title": "Product Title",
74
+ "price": "299",
75
+ "currency": "$",
76
+ "img_url": "https://..."
77
+ }
78
+ ```
79
+ ## Requirements
80
+
81
+ - Python 3.6+
82
+ - requests
83
+ - beautifulsoup4
84
+ - fake-useragent
85
+
86
+ ## License
87
+
88
+ This project is licensed under the MIT License - see the LICENSE file for details.
89
+
90
+ ## Contributing
91
+
92
+ Contributions are welcome! Please read [CONTRIBUTING.md](CONTRIBUTING.md) for details on how to contribute to this project.
@@ -0,0 +1,14 @@
1
+ README.md
2
+ setup.py
3
+ amzpy/__init__.py
4
+ amzpy/engine.py
5
+ amzpy/parser.py
6
+ amzpy/scraper.py
7
+ amzpy/utils.py
8
+ amzpy.egg-info/PKG-INFO
9
+ amzpy.egg-info/SOURCES.txt
10
+ amzpy.egg-info/dependency_links.txt
11
+ amzpy.egg-info/requires.txt
12
+ amzpy.egg-info/top_level.txt
13
+ tests/__init__.py
14
+ tests/test_scraper.py
@@ -0,0 +1,3 @@
1
+ requests
2
+ beautifulsoup4
3
+ fake-useragent
@@ -0,0 +1,2 @@
1
+ amzpy
2
+ tests
amzpy-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
amzpy-0.1.0/setup.py ADDED
@@ -0,0 +1,35 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="amzpy",
5
+ version="0.1.0",
6
+ description="A lightweight Amazon scraper library.",
7
+ long_description=open("README.md").read(),
8
+ long_description_content_type="text/markdown",
9
+ author="Anil Sardiwal",
10
+ author_email="theonlyanil@gmail.com",
11
+ url="https://github.com/theonlyanil/amzpy",
12
+ packages=find_packages(),
13
+ license_files = ('LICENSE'),
14
+ install_requires=[
15
+ "requests",
16
+ "beautifulsoup4",
17
+ "fake-useragent",
18
+ ],
19
+ classifiers=[
20
+ "Development Status :: 3 - Alpha",
21
+ "Intended Audience :: Developers",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Operating System :: OS Independent",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.6",
26
+ "Programming Language :: Python :: 3.7",
27
+ "Programming Language :: Python :: 3.8",
28
+ "Programming Language :: Python :: 3.9",
29
+ "Programming Language :: Python :: 3.10",
30
+ "Topic :: Software Development :: Libraries :: Python Modules",
31
+ "Topic :: Internet :: WWW/HTTP :: Dynamic Content",
32
+ ],
33
+ python_requires=">=3.6",
34
+ keywords="amazon, scraper, web-scraping, product-data, e-commerce",
35
+ )
File without changes
@@ -0,0 +1,16 @@
1
+ import unittest
2
+ from amzpy.scraper import AmazonScraper
3
+
4
+ class TestAmazonScraper(unittest.TestCase):
5
+ def setUp(self):
6
+ self.scraper = AmazonScraper(retries=5, delay=1)
7
+
8
+ def test_product_details(self):
9
+ url = "https://www.amazon.in/dp/B09JK1PZ7N"
10
+ result = self.scraper.get_product_details(url)
11
+
12
+ self.assertNotEqual(result["title"], "Title not found", "Title should be properly scraped.")
13
+ self.assertNotEqual(result["price"], "Price not found", "Price should be properly scraped.")
14
+
15
+ if __name__ == "__main__":
16
+ unittest.main()