google-maps-scraper 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- google_maps_scraper-0.1.0/.gitignore +32 -0
- google_maps_scraper-0.1.0/LICENSE +21 -0
- google_maps_scraper-0.1.0/PKG-INFO +284 -0
- google_maps_scraper-0.1.0/README.md +250 -0
- google_maps_scraper-0.1.0/pyproject.toml +71 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/__init__.py +33 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/batch.py +263 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/cli.py +238 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/models.py +82 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/parser.py +546 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/scraper.py +494 -0
- google_maps_scraper-0.1.0/src/gmaps_scraper/utils.py +205 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.egg
|
|
9
|
+
.eggs/
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
venv/
|
|
13
|
+
.venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# IDE
|
|
17
|
+
.vscode/
|
|
18
|
+
.idea/
|
|
19
|
+
*.swp
|
|
20
|
+
*.swo
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
25
|
+
|
|
26
|
+
# Testing
|
|
27
|
+
.pytest_cache/
|
|
28
|
+
.coverage
|
|
29
|
+
htmlcov/
|
|
30
|
+
|
|
31
|
+
# Scraper output
|
|
32
|
+
test_data/*_output.*
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Yan-Ying Liao
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: google-maps-scraper
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Scrape Google Maps place details (rating, reviews, address, etc.) using Playwright — no API key needed
|
|
5
|
+
Project-URL: Homepage, https://github.com/noworneverev/google-maps-scraper
|
|
6
|
+
Project-URL: Repository, https://github.com/noworneverev/google-maps-scraper
|
|
7
|
+
Project-URL: Issues, https://github.com/noworneverev/google-maps-scraper/issues
|
|
8
|
+
Project-URL: Changelog, https://github.com/noworneverev/google-maps-scraper/releases
|
|
9
|
+
Author-email: Yan-Ying Liao <liao961120@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: async,google-maps,places,playwright,ratings,reviews,scraper,web-scraping
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Framework :: AsyncIO
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Requires-Dist: playwright>=1.40.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: tqdm>=4.60.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
31
|
+
Provides-Extra: stealth
|
|
32
|
+
Requires-Dist: playwright-stealth>=1.0.6; extra == 'stealth'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# google-maps-scraper
|
|
36
|
+
|
|
37
|
+
[](https://pypi.org/project/google-maps-scraper/)
|
|
38
|
+
[](https://www.python.org/downloads/)
|
|
39
|
+
[](https://opensource.org/licenses/MIT)
|
|
40
|
+
|
|
41
|
+
Scrape Google Maps place details — **rating, review count, address, phone, hours, coordinates, and more** — without an API key.
|
|
42
|
+
|
|
43
|
+
Built with [Playwright](https://playwright.dev/) (Firefox) for reliable rendering and **asyncio** for high-throughput batch processing.
|
|
44
|
+
|
|
45
|
+
## Features
|
|
46
|
+
|
|
47
|
+
- 🔍 **Scrape place details** from any Google Maps URL or search query
|
|
48
|
+
- ⭐ **Extract 20+ fields** — rating, review count, address, phone, website, hours, coordinates, category, and more
|
|
49
|
+
- 📝 **Review scraping** — extract individual user reviews with ratings and text
|
|
50
|
+
- 🚀 **Async batch processing** — configurable concurrency for scraping thousands of URLs
|
|
51
|
+
- 💾 **Crash recovery** — auto-save with resume support; pick up where you left off
|
|
52
|
+
- 🌍 **Multi-language** — supports any Google Maps locale (`en`, `ja`, `zh-TW`, `ko`, ...)
|
|
53
|
+
- 🔎 **Smart search handling** — auto-clicks the first search result when a query returns multiple matches
|
|
54
|
+
- 🤖 **Headless-ready** — runs perfectly in CI/CD and headless environments
|
|
55
|
+
- 📦 **CLI + Python API** — use from the command line or import as a library
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install google-maps-scraper
|
|
61
|
+
playwright install firefox
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
> **Note:** If running on a server without GUI, use `playwright install firefox --with-deps` to install browser dependencies.
|
|
65
|
+
|
|
66
|
+
### Optional: Stealth Mode
|
|
67
|
+
|
|
68
|
+
For better anti-detection, install `playwright-stealth`:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install google-maps-scraper[stealth]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### CLI
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Scrape a single place
|
|
80
|
+
gmaps-scraper scrape "https://www.google.com/maps/search/?api=1&query=Eiffel+Tower"
|
|
81
|
+
|
|
82
|
+
# Scrape with language setting
|
|
83
|
+
gmaps-scraper scrape "https://www.google.com/maps/search/?api=1&query=東京タワー" --lang ja
|
|
84
|
+
|
|
85
|
+
# Batch scrape from CSV
|
|
86
|
+
gmaps-scraper batch urls.csv -o results.json --concurrency 5
|
|
87
|
+
|
|
88
|
+
# Batch scrape to CSV
|
|
89
|
+
gmaps-scraper batch urls.csv -o results.csv --lang zh-TW --concurrency 3
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### Python API (Async)
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
import asyncio
|
|
96
|
+
from gmaps_scraper import GoogleMapsScraper, ScrapeConfig
|
|
97
|
+
|
|
98
|
+
async def main():
|
|
99
|
+
config = ScrapeConfig(language="en", headless=True)
|
|
100
|
+
async with GoogleMapsScraper(config) as scraper:
|
|
101
|
+
result = await scraper.scrape(
|
|
102
|
+
"https://www.google.com/maps/search/?api=1&query=Machu+Picchu"
|
|
103
|
+
)
|
|
104
|
+
if result.success:
|
|
105
|
+
print(f"Name: {result.place.name}")
|
|
106
|
+
print(f"Rating: {result.place.rating}")
|
|
107
|
+
print(f"Reviews: {result.place.review_count}")
|
|
108
|
+
print(f"Address: {result.place.address}")
|
|
109
|
+
|
|
110
|
+
asyncio.run(main())
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Python API (Sync)
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from gmaps_scraper import scrape_place
|
|
117
|
+
|
|
118
|
+
result = scrape_place("https://www.google.com/maps/search/?api=1&query=Colosseum")
|
|
119
|
+
print(result.place.name, result.place.rating)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Batch Processing
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
import asyncio
|
|
126
|
+
from gmaps_scraper import scrape_batch, ScrapeConfig
|
|
127
|
+
|
|
128
|
+
async def main():
|
|
129
|
+
urls = open("urls.txt").read().splitlines()
|
|
130
|
+
|
|
131
|
+
config = ScrapeConfig(
|
|
132
|
+
concurrency=5,
|
|
133
|
+
delay_min=1.0,
|
|
134
|
+
delay_max=3.0,
|
|
135
|
+
headless=True,
|
|
136
|
+
save_interval=50,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
results = await scrape_batch(
|
|
140
|
+
urls=urls,
|
|
141
|
+
config=config,
|
|
142
|
+
output_path="results.json",
|
|
143
|
+
resume=True, # Skip already-scraped URLs on restart
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
success = sum(1 for r in results if r.success)
|
|
147
|
+
print(f"Done: {success}/{len(results)} succeeded")
|
|
148
|
+
|
|
149
|
+
asyncio.run(main())
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## CLI Reference
|
|
153
|
+
|
|
154
|
+
### `gmaps-scraper scrape <url>`
|
|
155
|
+
|
|
156
|
+
Scrape a single Google Maps URL and output JSON.
|
|
157
|
+
|
|
158
|
+
| Option | Default | Description |
|
|
159
|
+
|---|---|---|
|
|
160
|
+
| `--lang` | — | Language code (e.g., `en`, `ja`, `zh-TW`) |
|
|
161
|
+
| `--no-headless` | — | Show the browser window (for debugging) |
|
|
162
|
+
| `--reviews` | — | Also scrape individual reviews |
|
|
163
|
+
| `--max-reviews` | `20` | Max reviews to extract |
|
|
164
|
+
| `-v, --verbose` | — | Enable debug logging |
|
|
165
|
+
|
|
166
|
+
### `gmaps-scraper batch <input> -o <output>`
|
|
167
|
+
|
|
168
|
+
Batch scrape URLs from a file. Output format is inferred from file extension (`.json` or `.csv`).
|
|
169
|
+
|
|
170
|
+
| Option | Default | Description |
|
|
171
|
+
|---|---|---|
|
|
172
|
+
| `-o, --output` | *required* | Output file path (`.json` or `.csv`) |
|
|
173
|
+
| `--concurrency` | `5` | Parallel browser tabs |
|
|
174
|
+
| `--lang` | — | Language code |
|
|
175
|
+
| `--proxy` | — | Proxy server URL (e.g., `http://proxy:8080`) |
|
|
176
|
+
| `--delay-min` | `2.0` | Min delay between requests (seconds) |
|
|
177
|
+
| `--delay-max` | `5.0` | Max delay between requests (seconds) |
|
|
178
|
+
| `--no-resume` | — | Start fresh, don't resume from existing output |
|
|
179
|
+
| `--reviews` | — | Also scrape individual reviews |
|
|
180
|
+
| `--max-reviews` | `20` | Max reviews per place |
|
|
181
|
+
| `--save-interval` | `50` | Auto-save every N results |
|
|
182
|
+
|
|
183
|
+
## Input File Format
|
|
184
|
+
|
|
185
|
+
**CSV** — the scraper looks for a column named `url`, `URL`, or `link`:
|
|
186
|
+
|
|
187
|
+
```csv
|
|
188
|
+
url,name
|
|
189
|
+
https://www.google.com/maps/search/?api=1&query=Eiffel+Tower,Eiffel Tower
|
|
190
|
+
https://www.google.com/maps/search/?api=1&query=Colosseum,Colosseum
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
**Text** — one URL per line:
|
|
194
|
+
|
|
195
|
+
```text
|
|
196
|
+
https://www.google.com/maps/search/?api=1&query=Eiffel+Tower
|
|
197
|
+
https://www.google.com/maps/search/?api=1&query=Colosseum
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Output Format
|
|
201
|
+
|
|
202
|
+
### JSON
|
|
203
|
+
|
|
204
|
+
```json
|
|
205
|
+
[
|
|
206
|
+
{
|
|
207
|
+
"input_url": "https://www.google.com/maps/search/?api=1&query=Eiffel+Tower",
|
|
208
|
+
"success": true,
|
|
209
|
+
"place": {
|
|
210
|
+
"name": "Eiffel Tower",
|
|
211
|
+
"rating": 4.7,
|
|
212
|
+
"review_count": 344856,
|
|
213
|
+
"address": "Av. Gustave Eiffel, 75007 Paris, France",
|
|
214
|
+
"phone": "+33 8 92 70 12 39",
|
|
215
|
+
"website": "https://www.toureiffel.paris/",
|
|
216
|
+
"category": "Historical landmark",
|
|
217
|
+
"latitude": 48.8583701,
|
|
218
|
+
"longitude": 2.2944813,
|
|
219
|
+
"hours": ["Monday 09:30–23:45", "..."],
|
|
220
|
+
"google_maps_url": "https://www.google.com/maps/place/...",
|
|
221
|
+
"permanently_closed": false
|
|
222
|
+
},
|
|
223
|
+
"reviews": [],
|
|
224
|
+
"scraped_at": "2025-03-06T12:00:00"
|
|
225
|
+
}
|
|
226
|
+
]
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### CSV
|
|
230
|
+
|
|
231
|
+
Flat structure with all place fields as columns. Ideal for data analysis.
|
|
232
|
+
|
|
233
|
+
## Extracted Fields
|
|
234
|
+
|
|
235
|
+
| Field | Type | Description |
|
|
236
|
+
|---|---|---|
|
|
237
|
+
| `name` | `str` | Place name |
|
|
238
|
+
| `rating` | `float` | Star rating (1.0–5.0) |
|
|
239
|
+
| `review_count` | `int` | Total number of reviews |
|
|
240
|
+
| `address` | `str` | Full address |
|
|
241
|
+
| `phone` | `str` | Phone number |
|
|
242
|
+
| `website` | `str` | Website URL |
|
|
243
|
+
| `category` | `str` | Place category (e.g., "Restaurant") |
|
|
244
|
+
| `hours` | `list[str]` | Opening hours per day |
|
|
245
|
+
| `latitude` | `float` | Latitude coordinate |
|
|
246
|
+
| `longitude` | `float` | Longitude coordinate |
|
|
247
|
+
| `plus_code` | `str` | Google Plus Code |
|
|
248
|
+
| `place_id` | `str` | Google Maps Place ID |
|
|
249
|
+
| `url` | `str` | Canonical Google Maps URL |
|
|
250
|
+
| `google_maps_url` | `str` | Direct Google Maps link |
|
|
251
|
+
| `price_level` | `str` | Price level indicator |
|
|
252
|
+
| `image_url` | `str` | Main image URL |
|
|
253
|
+
| `description` | `str` | Place description |
|
|
254
|
+
| `photos_count` | `int` | Number of photos |
|
|
255
|
+
| `permanently_closed` | `bool` | Whether permanently closed |
|
|
256
|
+
| `temporarily_closed` | `bool` | Whether temporarily closed |
|
|
257
|
+
|
|
258
|
+
## Performance Guide
|
|
259
|
+
|
|
260
|
+
| Concurrency | Est. Throughput | Time for 10K URLs | Notes |
|
|
261
|
+
|---|---|---|---|
|
|
262
|
+
| 3 | ~1,200/hr | ~8.3 hrs | Conservative, stable |
|
|
263
|
+
| 5 | ~2,000/hr | ~5.0 hrs | Default |
|
|
264
|
+
| 10 | ~4,000/hr | ~2.5 hrs | Recommended with proxy |
|
|
265
|
+
|
|
266
|
+
**Tips:**
|
|
267
|
+
|
|
268
|
+
- Use `--proxy` with rotating proxies for higher concurrency
|
|
269
|
+
- The scraper auto-saves progress; if interrupted, just re-run and it will resume
|
|
270
|
+
- For large batches in CI (e.g., GitHub Actions with 6-hour limit), split into chunks
|
|
271
|
+
|
|
272
|
+
## Development
|
|
273
|
+
|
|
274
|
+
```bash
|
|
275
|
+
git clone https://github.com/noworneverev/google-maps-scraper.git
|
|
276
|
+
cd google-maps-scraper
|
|
277
|
+
pip install -e ".[dev]"
|
|
278
|
+
playwright install firefox
|
|
279
|
+
pytest tests/ -v
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
## License
|
|
283
|
+
|
|
284
|
+
[MIT](LICENSE) © [Yan-Ying Liao](https://github.com/noworneverev)
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# google-maps-scraper
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/google-maps-scraper/)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
Scrape Google Maps place details — **rating, review count, address, phone, hours, coordinates, and more** — without an API key.
|
|
8
|
+
|
|
9
|
+
Built with [Playwright](https://playwright.dev/) (Firefox) for reliable rendering and **asyncio** for high-throughput batch processing.
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- 🔍 **Scrape place details** from any Google Maps URL or search query
|
|
14
|
+
- ⭐ **Extract 20+ fields** — rating, review count, address, phone, website, hours, coordinates, category, and more
|
|
15
|
+
- 📝 **Review scraping** — extract individual user reviews with ratings and text
|
|
16
|
+
- 🚀 **Async batch processing** — configurable concurrency for scraping thousands of URLs
|
|
17
|
+
- 💾 **Crash recovery** — auto-save with resume support; pick up where you left off
|
|
18
|
+
- 🌍 **Multi-language** — supports any Google Maps locale (`en`, `ja`, `zh-TW`, `ko`, ...)
|
|
19
|
+
- 🔎 **Smart search handling** — auto-clicks the first search result when a query returns multiple matches
|
|
20
|
+
- 🤖 **Headless-ready** — runs perfectly in CI/CD and headless environments
|
|
21
|
+
- 📦 **CLI + Python API** — use from the command line or import as a library
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install google-maps-scraper
|
|
27
|
+
playwright install firefox
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
> **Note:** If running on a server without GUI, use `playwright install firefox --with-deps` to install browser dependencies.
|
|
31
|
+
|
|
32
|
+
### Optional: Stealth Mode
|
|
33
|
+
|
|
34
|
+
For better anti-detection, install `playwright-stealth`:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
pip install google-maps-scraper[stealth]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Quick Start
|
|
41
|
+
|
|
42
|
+
### CLI
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
# Scrape a single place
|
|
46
|
+
gmaps-scraper scrape "https://www.google.com/maps/search/?api=1&query=Eiffel+Tower"
|
|
47
|
+
|
|
48
|
+
# Scrape with language setting
|
|
49
|
+
gmaps-scraper scrape "https://www.google.com/maps/search/?api=1&query=東京タワー" --lang ja
|
|
50
|
+
|
|
51
|
+
# Batch scrape from CSV
|
|
52
|
+
gmaps-scraper batch urls.csv -o results.json --concurrency 5
|
|
53
|
+
|
|
54
|
+
# Batch scrape to CSV
|
|
55
|
+
gmaps-scraper batch urls.csv -o results.csv --lang zh-TW --concurrency 3
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Python API (Async)
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
import asyncio
|
|
62
|
+
from gmaps_scraper import GoogleMapsScraper, ScrapeConfig
|
|
63
|
+
|
|
64
|
+
async def main():
|
|
65
|
+
config = ScrapeConfig(language="en", headless=True)
|
|
66
|
+
async with GoogleMapsScraper(config) as scraper:
|
|
67
|
+
result = await scraper.scrape(
|
|
68
|
+
"https://www.google.com/maps/search/?api=1&query=Machu+Picchu"
|
|
69
|
+
)
|
|
70
|
+
if result.success:
|
|
71
|
+
print(f"Name: {result.place.name}")
|
|
72
|
+
print(f"Rating: {result.place.rating}")
|
|
73
|
+
print(f"Reviews: {result.place.review_count}")
|
|
74
|
+
print(f"Address: {result.place.address}")
|
|
75
|
+
|
|
76
|
+
asyncio.run(main())
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Python API (Sync)
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
from gmaps_scraper import scrape_place
|
|
83
|
+
|
|
84
|
+
result = scrape_place("https://www.google.com/maps/search/?api=1&query=Colosseum")
|
|
85
|
+
print(result.place.name, result.place.rating)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Batch Processing
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
import asyncio
|
|
92
|
+
from gmaps_scraper import scrape_batch, ScrapeConfig
|
|
93
|
+
|
|
94
|
+
async def main():
|
|
95
|
+
urls = open("urls.txt").read().splitlines()
|
|
96
|
+
|
|
97
|
+
config = ScrapeConfig(
|
|
98
|
+
concurrency=5,
|
|
99
|
+
delay_min=1.0,
|
|
100
|
+
delay_max=3.0,
|
|
101
|
+
headless=True,
|
|
102
|
+
save_interval=50,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
results = await scrape_batch(
|
|
106
|
+
urls=urls,
|
|
107
|
+
config=config,
|
|
108
|
+
output_path="results.json",
|
|
109
|
+
resume=True, # Skip already-scraped URLs on restart
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
success = sum(1 for r in results if r.success)
|
|
113
|
+
print(f"Done: {success}/{len(results)} succeeded")
|
|
114
|
+
|
|
115
|
+
asyncio.run(main())
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
## CLI Reference
|
|
119
|
+
|
|
120
|
+
### `gmaps-scraper scrape <url>`
|
|
121
|
+
|
|
122
|
+
Scrape a single Google Maps URL and output JSON.
|
|
123
|
+
|
|
124
|
+
| Option | Default | Description |
|
|
125
|
+
|---|---|---|
|
|
126
|
+
| `--lang` | — | Language code (e.g., `en`, `ja`, `zh-TW`) |
|
|
127
|
+
| `--no-headless` | — | Show the browser window (for debugging) |
|
|
128
|
+
| `--reviews` | — | Also scrape individual reviews |
|
|
129
|
+
| `--max-reviews` | `20` | Max reviews to extract |
|
|
130
|
+
| `-v, --verbose` | — | Enable debug logging |
|
|
131
|
+
|
|
132
|
+
### `gmaps-scraper batch <input> -o <output>`
|
|
133
|
+
|
|
134
|
+
Batch scrape URLs from a file. Output format is inferred from file extension (`.json` or `.csv`).
|
|
135
|
+
|
|
136
|
+
| Option | Default | Description |
|
|
137
|
+
|---|---|---|
|
|
138
|
+
| `-o, --output` | *required* | Output file path (`.json` or `.csv`) |
|
|
139
|
+
| `--concurrency` | `5` | Parallel browser tabs |
|
|
140
|
+
| `--lang` | — | Language code |
|
|
141
|
+
| `--proxy` | — | Proxy server URL (e.g., `http://proxy:8080`) |
|
|
142
|
+
| `--delay-min` | `2.0` | Min delay between requests (seconds) |
|
|
143
|
+
| `--delay-max` | `5.0` | Max delay between requests (seconds) |
|
|
144
|
+
| `--no-resume` | — | Start fresh, don't resume from existing output |
|
|
145
|
+
| `--reviews` | — | Also scrape individual reviews |
|
|
146
|
+
| `--max-reviews` | `20` | Max reviews per place |
|
|
147
|
+
| `--save-interval` | `50` | Auto-save every N results |
|
|
148
|
+
|
|
149
|
+
## Input File Format
|
|
150
|
+
|
|
151
|
+
**CSV** — the scraper looks for a column named `url`, `URL`, or `link`:
|
|
152
|
+
|
|
153
|
+
```csv
|
|
154
|
+
url,name
|
|
155
|
+
https://www.google.com/maps/search/?api=1&query=Eiffel+Tower,Eiffel Tower
|
|
156
|
+
https://www.google.com/maps/search/?api=1&query=Colosseum,Colosseum
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
**Text** — one URL per line:
|
|
160
|
+
|
|
161
|
+
```text
|
|
162
|
+
https://www.google.com/maps/search/?api=1&query=Eiffel+Tower
|
|
163
|
+
https://www.google.com/maps/search/?api=1&query=Colosseum
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Output Format
|
|
167
|
+
|
|
168
|
+
### JSON
|
|
169
|
+
|
|
170
|
+
```json
|
|
171
|
+
[
|
|
172
|
+
{
|
|
173
|
+
"input_url": "https://www.google.com/maps/search/?api=1&query=Eiffel+Tower",
|
|
174
|
+
"success": true,
|
|
175
|
+
"place": {
|
|
176
|
+
"name": "Eiffel Tower",
|
|
177
|
+
"rating": 4.7,
|
|
178
|
+
"review_count": 344856,
|
|
179
|
+
"address": "Av. Gustave Eiffel, 75007 Paris, France",
|
|
180
|
+
"phone": "+33 8 92 70 12 39",
|
|
181
|
+
"website": "https://www.toureiffel.paris/",
|
|
182
|
+
"category": "Historical landmark",
|
|
183
|
+
"latitude": 48.8583701,
|
|
184
|
+
"longitude": 2.2944813,
|
|
185
|
+
"hours": ["Monday 09:30–23:45", "..."],
|
|
186
|
+
"google_maps_url": "https://www.google.com/maps/place/...",
|
|
187
|
+
"permanently_closed": false
|
|
188
|
+
},
|
|
189
|
+
"reviews": [],
|
|
190
|
+
"scraped_at": "2025-03-06T12:00:00"
|
|
191
|
+
}
|
|
192
|
+
]
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
### CSV
|
|
196
|
+
|
|
197
|
+
Flat structure with all place fields as columns. Ideal for data analysis.
|
|
198
|
+
|
|
199
|
+
## Extracted Fields
|
|
200
|
+
|
|
201
|
+
| Field | Type | Description |
|
|
202
|
+
|---|---|---|
|
|
203
|
+
| `name` | `str` | Place name |
|
|
204
|
+
| `rating` | `float` | Star rating (1.0–5.0) |
|
|
205
|
+
| `review_count` | `int` | Total number of reviews |
|
|
206
|
+
| `address` | `str` | Full address |
|
|
207
|
+
| `phone` | `str` | Phone number |
|
|
208
|
+
| `website` | `str` | Website URL |
|
|
209
|
+
| `category` | `str` | Place category (e.g., "Restaurant") |
|
|
210
|
+
| `hours` | `list[str]` | Opening hours per day |
|
|
211
|
+
| `latitude` | `float` | Latitude coordinate |
|
|
212
|
+
| `longitude` | `float` | Longitude coordinate |
|
|
213
|
+
| `plus_code` | `str` | Google Plus Code |
|
|
214
|
+
| `place_id` | `str` | Google Maps Place ID |
|
|
215
|
+
| `url` | `str` | Canonical Google Maps URL |
|
|
216
|
+
| `google_maps_url` | `str` | Direct Google Maps link |
|
|
217
|
+
| `price_level` | `str` | Price level indicator |
|
|
218
|
+
| `image_url` | `str` | Main image URL |
|
|
219
|
+
| `description` | `str` | Place description |
|
|
220
|
+
| `photos_count` | `int` | Number of photos |
|
|
221
|
+
| `permanently_closed` | `bool` | Whether permanently closed |
|
|
222
|
+
| `temporarily_closed` | `bool` | Whether temporarily closed |
|
|
223
|
+
|
|
224
|
+
## Performance Guide
|
|
225
|
+
|
|
226
|
+
| Concurrency | Est. Throughput | Time for 10K URLs | Notes |
|
|
227
|
+
|---|---|---|---|
|
|
228
|
+
| 3 | ~1,200/hr | ~8.3 hrs | Conservative, stable |
|
|
229
|
+
| 5 | ~2,000/hr | ~5.0 hrs | Default |
|
|
230
|
+
| 10 | ~4,000/hr | ~2.5 hrs | Recommended with proxy |
|
|
231
|
+
|
|
232
|
+
**Tips:**
|
|
233
|
+
|
|
234
|
+
- Use `--proxy` with rotating proxies for higher concurrency
|
|
235
|
+
- The scraper auto-saves progress; if interrupted, just re-run and it will resume
|
|
236
|
+
- For large batches in CI (e.g., GitHub Actions with 6-hour limit), split into chunks
|
|
237
|
+
|
|
238
|
+
## Development
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
git clone https://github.com/noworneverev/google-maps-scraper.git
|
|
242
|
+
cd google-maps-scraper
|
|
243
|
+
pip install -e ".[dev]"
|
|
244
|
+
playwright install firefox
|
|
245
|
+
pytest tests/ -v
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
[MIT](LICENSE) © [Yan-Ying Liao](https://github.com/noworneverev)
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "google-maps-scraper"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Scrape Google Maps place details (rating, reviews, address, etc.) using Playwright — no API key needed"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Yan-Ying Liao", email = "liao961120@gmail.com" },
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"google-maps",
|
|
17
|
+
"scraper",
|
|
18
|
+
"reviews",
|
|
19
|
+
"ratings",
|
|
20
|
+
"playwright",
|
|
21
|
+
"async",
|
|
22
|
+
"places",
|
|
23
|
+
"web-scraping",
|
|
24
|
+
]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 4 - Beta",
|
|
27
|
+
"Intended Audience :: Developers",
|
|
28
|
+
"License :: OSI Approved :: MIT License",
|
|
29
|
+
"Programming Language :: Python :: 3",
|
|
30
|
+
"Programming Language :: Python :: 3.10",
|
|
31
|
+
"Programming Language :: Python :: 3.11",
|
|
32
|
+
"Programming Language :: Python :: 3.12",
|
|
33
|
+
"Programming Language :: Python :: 3.13",
|
|
34
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
35
|
+
"Topic :: Internet :: WWW/HTTP :: Dynamic Content",
|
|
36
|
+
"Framework :: AsyncIO",
|
|
37
|
+
]
|
|
38
|
+
dependencies = [
|
|
39
|
+
"playwright>=1.40.0",
|
|
40
|
+
"pydantic>=2.0.0",
|
|
41
|
+
"tqdm>=4.60.0",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
[project.optional-dependencies]
|
|
45
|
+
stealth = ["playwright-stealth>=1.0.6"]
|
|
46
|
+
dev = [
|
|
47
|
+
"pytest>=7.0.0",
|
|
48
|
+
"pytest-asyncio>=0.21.0",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[project.scripts]
|
|
52
|
+
gmaps-scraper = "gmaps_scraper.cli:main"
|
|
53
|
+
|
|
54
|
+
[project.urls]
|
|
55
|
+
Homepage = "https://github.com/noworneverev/google-maps-scraper"
|
|
56
|
+
Repository = "https://github.com/noworneverev/google-maps-scraper"
|
|
57
|
+
Issues = "https://github.com/noworneverev/google-maps-scraper/issues"
|
|
58
|
+
Changelog = "https://github.com/noworneverev/google-maps-scraper/releases"
|
|
59
|
+
|
|
60
|
+
[tool.hatch.build.targets.wheel]
|
|
61
|
+
packages = ["src/gmaps_scraper"]
|
|
62
|
+
|
|
63
|
+
[tool.hatch.build.targets.sdist]
|
|
64
|
+
exclude = [
|
|
65
|
+
"test_data/",
|
|
66
|
+
"tests/",
|
|
67
|
+
".github/",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
[tool.pytest.ini_options]
|
|
71
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Google Maps Reviews Scraper - Scrape place details without API key.
|
|
2
|
+
|
|
3
|
+
Usage:
|
|
4
|
+
# Async API
|
|
5
|
+
async with GoogleMapsScraper() as scraper:
|
|
6
|
+
result = await scraper.scrape("https://www.google.com/maps/search/...")
|
|
7
|
+
print(result.place.name, result.place.rating)
|
|
8
|
+
|
|
9
|
+
# Sync convenience
|
|
10
|
+
from gmaps_scraper import scrape_place
|
|
11
|
+
result = scrape_place("https://www.google.com/maps/search/...")
|
|
12
|
+
|
|
13
|
+
# Batch processing
|
|
14
|
+
from gmaps_scraper import scrape_batch, ScrapeConfig
|
|
15
|
+
config = ScrapeConfig(concurrency=5)
|
|
16
|
+
results = asyncio.run(scrape_batch(urls, config))
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from gmaps_scraper.batch import scrape_batch
|
|
20
|
+
from gmaps_scraper.models import PlaceDetails, Review, ScrapeConfig, ScrapeResult
|
|
21
|
+
from gmaps_scraper.scraper import GoogleMapsScraper, scrape_place
|
|
22
|
+
|
|
23
|
+
__version__ = "0.1.0"
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"GoogleMapsScraper",
|
|
27
|
+
"scrape_place",
|
|
28
|
+
"scrape_batch",
|
|
29
|
+
"PlaceDetails",
|
|
30
|
+
"Review",
|
|
31
|
+
"ScrapeConfig",
|
|
32
|
+
"ScrapeResult",
|
|
33
|
+
]
|