pypararius 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pypararius/__init__.py ADDED
@@ -0,0 +1,20 @@
1
+ """
2
+ Pararius - Python API for Pararius.com real estate listings.
3
+
4
+ Example usage:
5
+ >>> from pypararius import Pararius
6
+ >>> p = Pararius()
7
+ >>> listing = p.get_listing('amsterdam/abc123/street')
8
+ >>> print(listing['title'], listing['price'])
9
+ Ridderspoorweg 10 1850
10
+
11
+ >>> results = p.search_listing('amsterdam', price_max=2000)
12
+ >>> for r in results[:3]:
13
+ ... print(r['title'], r['city'])
14
+ """
15
+
16
+ from pypararius.pararius import Pararius, ParariusAPI
17
+ from pypararius.listing import Listing
18
+
19
+ __version__ = "2.0.0"
20
+ __all__ = ["Pararius", "ParariusAPI", "Listing"]
pypararius/listing.py ADDED
@@ -0,0 +1,138 @@
1
+ """Listing class - represents a Pararius property listing."""
2
+
3
+ from typing import Any
4
+
5
+
6
+ class Listing:
7
+ """A Pararius property listing.
8
+
9
+ Data can be accessed as listing['key'] or listing.get('key').
10
+
11
+ Example:
12
+ >>> listing = pararius.get_listing('amsterdam/abc123/street')
13
+ >>> listing['title']
14
+ 'Ridderspoorweg 10'
15
+ >>> listing['price']
16
+ 1850
17
+ >>> listing['city']
18
+ 'Amsterdam'
19
+ """
20
+
21
+ keys_alias = {
22
+ 'name': 'title',
23
+ 'address': 'title',
24
+ 'location': 'city',
25
+ 'locality': 'city',
26
+ 'area': 'living_area',
27
+ 'size': 'living_area',
28
+ 'area_m2': 'living_area',
29
+ 'coords': 'coordinates',
30
+ 'lat': 'latitude',
31
+ 'lng': 'longitude',
32
+ 'lon': 'longitude',
33
+ 'zip': 'postcode',
34
+ 'zipcode': 'postcode',
35
+ 'postal_code': 'postcode',
36
+ 'type': 'object_type',
37
+ 'property_type': 'object_type',
38
+ 'images': 'photos',
39
+ 'pictures': 'photos',
40
+ 'media': 'photos',
41
+ 'desc': 'description',
42
+ 'text': 'description',
43
+ 'agent': 'broker',
44
+ 'realtor': 'broker',
45
+ 'makelaar': 'broker',
46
+ 'energy_rating': 'energy_label',
47
+ 'street': 'title',
48
+ }
49
+
50
+ def __init__(self, listing_id: str | int | None = None, data: dict | None = None):
51
+ self.listing_id = str(listing_id) if listing_id else None
52
+ self.data: dict[str, Any] = data or {}
53
+
54
+ def __repr__(self) -> str:
55
+ title = self.data.get('title', 'Unknown')
56
+ city = self.data.get('city', '')
57
+ return f"<Listing id:{self.listing_id} [{title}, {city}]>"
58
+
59
+ def __str__(self) -> str:
60
+ return self.__repr__()
61
+
62
+ def __contains__(self, key: str) -> bool:
63
+ return self._normalize_key(key) in self.data
64
+
65
+ def __getitem__(self, key: str) -> Any:
66
+ normalized = self._normalize_key(key)
67
+ if normalized not in self.data:
68
+ raise KeyError(key)
69
+ return self.data[normalized]
70
+
71
+ def __setitem__(self, key: str, value: Any) -> None:
72
+ self.data[self._normalize_key(key)] = value
73
+
74
+ def __bool__(self) -> bool:
75
+ return bool(self.listing_id or self.data.get('title'))
76
+
77
+ def _normalize_key(self, key: str) -> str:
78
+ """Normalize key using aliases."""
79
+ key = key.lower().replace('-', '_').replace(' ', '_')
80
+ return self.keys_alias.get(key, key)
81
+
82
+ def get(self, key: str, default: Any = None) -> Any:
83
+ """Get a value with optional default."""
84
+ try:
85
+ return self[key]
86
+ except KeyError:
87
+ return default
88
+
89
+ def keys(self) -> list[str]:
90
+ """Return all available keys."""
91
+ return list(self.data.keys())
92
+
93
+ def items(self) -> list[tuple[str, Any]]:
94
+ """Return all key-value pairs."""
95
+ return list(self.data.items())
96
+
97
+ def values(self) -> list[Any]:
98
+ """Return all values."""
99
+ return list(self.data.values())
100
+
101
+ def to_dict(self) -> dict[str, Any]:
102
+ """Return data as a plain dictionary."""
103
+ return self.data.copy()
104
+
105
+ def summary(self) -> str:
106
+ """Return a text summary of the listing."""
107
+ lines = []
108
+ title = self.data.get('title', 'Unknown')
109
+ city = self.data.get('city', '')
110
+ lines.append(f"Listing: {title}, {city}")
111
+
112
+ if price := self.data.get('price_formatted'):
113
+ lines.append(f"Price: {price}")
114
+ elif price := self.data.get('price'):
115
+ lines.append(f"Price: €{price:,}")
116
+
117
+ if area := self.data.get('living_area'):
118
+ lines.append(f"Living area: {area} m²")
119
+
120
+ if bedrooms := self.data.get('bedrooms'):
121
+ lines.append(f"Bedrooms: {bedrooms}")
122
+
123
+ if energy := self.data.get('energy_label'):
124
+ lines.append(f"Energy label: {energy}")
125
+
126
+ if url := self.data.get('url'):
127
+ lines.append(f"URL: {url}")
128
+
129
+ return '\n'.join(lines)
130
+
131
+ def getID(self) -> str | None:
132
+ """Return the listing ID."""
133
+ return self.listing_id
134
+
135
+ @property
136
+ def id(self) -> str | None:
137
+ """Alias for listing_id."""
138
+ return self.listing_id
pypararius/pararius.py ADDED
@@ -0,0 +1,238 @@
1
+ """Main Pararius API class."""
2
+
3
+ import re
4
+ from urllib.parse import urljoin
5
+
6
+ import httpx
7
+
8
+ from pypararius.listing import Listing
9
+ from pypararius.parser import parse_listing_details, parse_search_response
10
+
11
+
12
+ # Base URL
13
+ BASE_URL = "https://www.pararius.com"
14
+
15
+ # Headers
16
+ HEADERS = {
17
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
18
+ "Accept": "application/json, text/html",
19
+ }
20
+
21
+
22
+ class Pararius:
23
+ """Main interface to Pararius API.
24
+
25
+ Example:
26
+ >>> from pypararius import Pararius
27
+ >>> p = Pararius()
28
+ >>> listing = p.get_listing('amsterdam/abc123/street')
29
+ >>> print(listing['title'], listing['city'])
30
+ Ridderspoorweg 10 Amsterdam
31
+ >>> results = p.search_listing('amsterdam', price_max=2000)
32
+ >>> for r in results[:3]:
33
+ ... print(r['title'], r['price'])
34
+ """
35
+
36
+ ID_PATTERN = re.compile(r"/([a-f0-9]{8})/")
37
+
38
+ def __init__(self, timeout: int = 30):
39
+ """Initialize Pararius API client.
40
+
41
+ Args:
42
+ timeout: Request timeout in seconds
43
+ """
44
+ self.timeout = timeout
45
+ self._client: httpx.Client | None = None
46
+
47
+ @property
48
+ def client(self) -> httpx.Client:
49
+ """Lazily create HTTP client."""
50
+ if self._client is None:
51
+ self._client = httpx.Client(
52
+ timeout=self.timeout,
53
+ headers=HEADERS,
54
+ follow_redirects=True,
55
+ )
56
+ return self._client
57
+
58
+ def close(self) -> None:
59
+ """Close the HTTP client."""
60
+ if self._client:
61
+ self._client.close()
62
+ self._client = None
63
+
64
+ def __enter__(self) -> "Pararius":
65
+ return self
66
+
67
+ def __exit__(self, *args) -> None:
68
+ self.close()
69
+
70
+ # -------------------------------------------------------------------------
71
+ # Listing methods
72
+ # -------------------------------------------------------------------------
73
+
74
+ def get_listing(self, listing_id: str) -> Listing:
75
+ """Get a listing by ID or URL.
76
+
77
+ Args:
78
+ listing_id: Listing ID (e.g., 'abc123de') or full/partial URL
79
+
80
+ Returns:
81
+ Listing object with property data
82
+
83
+ Example:
84
+ >>> p.get_listing('eecd88d9')
85
+ >>> p.get_listing('amsterdam/eecd88d9/ridderspoorweg')
86
+ >>> p.get_listing('https://www.pararius.com/apartment-for-rent/amsterdam/eecd88d9/ridderspoorweg')
87
+ """
88
+ # If it's a full URL, use it directly
89
+ if listing_id.startswith("http"):
90
+ url = listing_id
91
+ # If it contains slashes, it's a partial path
92
+ elif "/" in listing_id:
93
+ # Could be 'amsterdam/abc123/street' or '/apartment-for-rent/amsterdam/abc123/street'
94
+ if listing_id.startswith("/"):
95
+ url = urljoin(BASE_URL, listing_id)
96
+ else:
97
+ url = f"{BASE_URL}/apartment-for-rent/{listing_id}"
98
+ else:
99
+ # Just an ID - we need to search for it
100
+ raise ValueError(
101
+ f"Cannot fetch listing by ID alone. Please provide a URL or path like 'amsterdam/{listing_id}/street'"
102
+ )
103
+
104
+ response = self.client.get(url)
105
+
106
+ if response.status_code == 404:
107
+ raise LookupError(f"Listing {listing_id} not found")
108
+
109
+ response.raise_for_status()
110
+ return parse_listing_details(response.text, str(response.url))
111
+
112
+ def search_listing(
113
+ self,
114
+ location: str | list[str] | None = None,
115
+ price_min: int | None = None,
116
+ price_max: int | None = None,
117
+ area_min: int | None = None,
118
+ bedrooms: int | None = None,
119
+ interior: str | None = None,
120
+ sort: str | None = None,
121
+ page: int = 0,
122
+ ) -> list[Listing]:
123
+ """Search for listings.
124
+
125
+ Args:
126
+ location: City name to search in (e.g., 'amsterdam')
127
+ price_min: Minimum rent price
128
+ price_max: Maximum rent price
129
+ area_min: Minimum living area in m²
130
+ bedrooms: Minimum number of bedrooms
131
+ interior: Interior type ('furnished', 'upholstered', 'shell')
132
+ sort: Sort order - 'newest', 'price_asc', 'price_desc',
133
+ 'area_asc', 'area_desc', or None
134
+ page: Page number (0-indexed, ~30 results per page)
135
+
136
+ Returns:
137
+ List of Listing objects
138
+
139
+ Example:
140
+ >>> p.search_listing('amsterdam', price_max=2000)
141
+ >>> p.search_listing('rotterdam', bedrooms=2, interior='furnished')
142
+ """
143
+ # Normalize location
144
+ if isinstance(location, list):
145
+ city = location[0] if location else "amsterdam"
146
+ else:
147
+ city = location or "amsterdam"
148
+
149
+ city = city.lower().replace(" ", "-")
150
+
151
+ # Build URL
152
+ url = self._build_search_url(
153
+ city=city,
154
+ price_min=price_min,
155
+ price_max=price_max,
156
+ area_min=area_min,
157
+ bedrooms=bedrooms,
158
+ interior=interior,
159
+ sort=sort,
160
+ page=page + 1, # Pararius uses 1-indexed pages
161
+ )
162
+
163
+ # Add XHR header to get JSON response
164
+ headers = {"X-Requested-With": "XMLHttpRequest"}
165
+
166
+ response = self.client.get(url, headers=headers)
167
+
168
+ if response.status_code != 200:
169
+ raise RuntimeError(f"Search failed (status {response.status_code})")
170
+
171
+ data = response.json()
172
+ return parse_search_response(data, city)
173
+
174
+ # -------------------------------------------------------------------------
175
+ # URL building
176
+ # -------------------------------------------------------------------------
177
+
178
+ def _build_search_url(
179
+ self,
180
+ city: str,
181
+ price_min: int | None = None,
182
+ price_max: int | None = None,
183
+ area_min: int | None = None,
184
+ bedrooms: int | None = None,
185
+ interior: str | None = None,
186
+ sort: str | None = None,
187
+ page: int = 1,
188
+ ) -> str:
189
+ """Build the search URL with filters."""
190
+ parts = [f"{BASE_URL}/apartments/{city}"]
191
+
192
+ # Price filter
193
+ if price_min is not None or price_max is not None:
194
+ p_min = price_min or 0
195
+ p_max = price_max or 0
196
+ if p_min > 0 or p_max > 0:
197
+ parts.append(f"{p_min}-{p_max}")
198
+
199
+ # Bedrooms filter
200
+ if bedrooms is not None and bedrooms > 0:
201
+ parts.append(f"{bedrooms}-bedrooms")
202
+
203
+ # Area filter
204
+ if area_min is not None and area_min > 0:
205
+ parts.append(f"{area_min}m2")
206
+
207
+ # Interior filter
208
+ if interior is not None:
209
+ interior_map = {
210
+ "furnished": "furnished",
211
+ "upholstered": "upholstered",
212
+ "shell": "shell",
213
+ }
214
+ if interior.lower() in interior_map:
215
+ parts.append(interior_map[interior.lower()])
216
+
217
+ # Page (must come before sort in URL)
218
+ if page > 1:
219
+ parts.append(f"page-{page}")
220
+
221
+ # Sort order
222
+ if sort is not None:
223
+ sort_map = {
224
+ "newest": "", # Default
225
+ "price_asc": "sort-price-low",
226
+ "price_desc": "sort-price-high",
227
+ "area_asc": "sort-floor-low",
228
+ "area_desc": "sort-floor-high",
229
+ }
230
+ sort_val = sort_map.get(sort, "")
231
+ if sort_val:
232
+ parts.append(sort_val)
233
+
234
+ return "/".join(parts)
235
+
236
+
237
+ # Convenience alias
238
+ ParariusAPI = Pararius
pypararius/parser.py ADDED
@@ -0,0 +1,328 @@
1
+ """Parser utilities for Pararius HTML responses."""
2
+
3
+ import json
4
+ import re
5
+ from typing import Optional
6
+
7
+ from .listing import Listing
8
+
9
+
10
+ def parse_search_response(data: dict, city: str) -> list[Listing]:
11
+ """Parse the JSON response from search endpoint into list of Listings."""
12
+ results_html = data.get("components", {}).get("results", "")
13
+ return _parse_listings_from_html(results_html, city)
14
+
15
+
16
+ def _parse_listings_from_html(html: str, city: str) -> list[Listing]:
17
+ """Extract listings from search results HTML."""
18
+ listings = []
19
+
20
+ # Split by section starts
21
+ sections = re.split(r'<section\s+class="listing-search-item[^>]*>', html)
22
+
23
+ for section in sections[1:]: # Skip first split
24
+ end_idx = section.find("</section>")
25
+ if end_idx <= 0:
26
+ continue
27
+
28
+ block = section[:end_idx]
29
+ listing = _parse_listing_block(block, city)
30
+ if listing:
31
+ listings.append(listing)
32
+
33
+ return listings
34
+
35
+
36
+ def _parse_listing_block(block: str, city: str) -> Optional[Listing]:
37
+ """Parse a single listing block from search HTML."""
38
+ # URL and ID
39
+ url_match = re.search(r'href="(/apartment-for-rent/[^/]+/([^/]+)/([^"]+))"', block)
40
+ if not url_match:
41
+ return None
42
+
43
+ listing_id = url_match.group(2)
44
+ street = url_match.group(3).replace("-", " ").title()
45
+ url = f"https://www.pararius.com{url_match.group(1)}"
46
+
47
+ # Title from analytics data (more accurate street name)
48
+ title_match = re.search(r'element_text&quot;:&quot;([^&]+)&quot;', block)
49
+ if title_match:
50
+ title = title_match.group(1)
51
+ # Extract street from title like "Flat Ridderspoorweg"
52
+ if " " in title:
53
+ street = " ".join(title.split()[1:])
54
+
55
+ # Price
56
+ price = None
57
+ price_formatted = None
58
+ price_match = re.search(r'listing-search-item__price-main">([^<]+)</span>', block)
59
+ if price_match:
60
+ price_formatted = price_match.group(1).strip()
61
+ # Extract numeric price
62
+ price_nums = re.sub(r'[^\d]', '', price_formatted)
63
+ if price_nums:
64
+ price = int(price_nums)
65
+
66
+ # Neighborhood
67
+ neighbourhood = None
68
+ sub_match = re.search(r'listing-search-item__sub-title"[^>]*>\s*([^<]+)<', block)
69
+ if sub_match:
70
+ neighbourhood = sub_match.group(1).strip()
71
+
72
+ # Area
73
+ living_area = None
74
+ area_match = re.search(r'title="(\d+)\s*m[²2]"', block)
75
+ if area_match:
76
+ living_area = int(area_match.group(1))
77
+
78
+ # Rooms
79
+ rooms = None
80
+ rooms_match = re.search(r'title="(\d+)\s*room', block)
81
+ if rooms_match:
82
+ rooms = int(rooms_match.group(1))
83
+
84
+ # Image
85
+ photo_url = None
86
+ img_match = re.search(r'data-src="([^"]+)"', block)
87
+ if img_match:
88
+ photo_url = img_match.group(1).replace("&amp;", "&")
89
+
90
+ listing_data = {
91
+ "title": street,
92
+ "city": city.title(),
93
+ "neighbourhood": neighbourhood,
94
+ "price": price,
95
+ "price_formatted": price_formatted,
96
+ "living_area": living_area,
97
+ "rooms": rooms,
98
+ "url": url,
99
+ "photos": [photo_url] if photo_url else [],
100
+ "photo_urls": [photo_url] if photo_url else [],
101
+ }
102
+
103
+ return Listing(listing_id=listing_id, data=listing_data)
104
+
105
+
106
+ def parse_listing_details(html: str, url: str) -> Listing:
107
+ """Parse full listing details from detail page HTML."""
108
+ listing_id = url.split("/")[-2] if "/" in url else ""
109
+
110
+ # Extract JSON-LD
111
+ jsonld = _extract_jsonld(html)
112
+
113
+ # Basic info from JSON-LD
114
+ name = jsonld.get("name", "")
115
+ description = jsonld.get("description")
116
+ main_image = jsonld.get("image")
117
+
118
+ # Address
119
+ addr_data = jsonld.get("address", {})
120
+ street = addr_data.get("streetAddress", "")
121
+ city = addr_data.get("addressLocality", "")
122
+ postcode = addr_data.get("postalCode")
123
+ neighbourhood = addr_data.get("addressRegion")
124
+
125
+ # Rooms and area from JSON-LD
126
+ rooms = None
127
+ rooms_data = jsonld.get("numberOfRooms", [])
128
+ if rooms_data and isinstance(rooms_data, list) and len(rooms_data) > 0:
129
+ rooms = rooms_data[0].get("value")
130
+
131
+ living_area = None
132
+ floor_data = jsonld.get("floorSize", {})
133
+ if floor_data:
134
+ living_area = floor_data.get("value")
135
+
136
+ # Price
137
+ price = None
138
+ currency = "EUR"
139
+ offer = jsonld.get("offers", {})
140
+ if offer:
141
+ price_str = offer.get("price")
142
+ if price_str:
143
+ price = int(float(price_str))
144
+ currency = offer.get("priceCurrency", "EUR")
145
+
146
+ # Features from HTML
147
+ features = _extract_features(html)
148
+
149
+ # All images
150
+ images = _extract_images(html)
151
+ if main_image and main_image not in images:
152
+ images.insert(0, main_image)
153
+
154
+ # Agent/Broker
155
+ broker = _extract_agent(html)
156
+
157
+ # Coordinates
158
+ coords = _extract_coordinates(html)
159
+
160
+ # Extract specific features
161
+ deposit = features.get("Deposit")
162
+ interior = features.get("Interior")
163
+ available = features.get("Available")
164
+ offered_since = features.get("Offered since")
165
+ rental_agreement = features.get("Rental agreement")
166
+ energy_label = features.get("Energy rating")
167
+
168
+ # Boolean features
169
+ smoking_allowed = None
170
+ pets_allowed = None
171
+ if "Smoking allowed" in features:
172
+ smoking_allowed = features["Smoking allowed"].lower() in ("yes", "ja", "allowed")
173
+ if "Pets allowed" in features:
174
+ pets_allowed = features["Pets allowed"].lower() in ("yes", "ja", "allowed", "in consultation")
175
+
176
+ # Bedrooms
177
+ bedrooms = None
178
+ if "Number of bedrooms" in features:
179
+ try:
180
+ bedrooms = int(features["Number of bedrooms"])
181
+ except ValueError:
182
+ pass
183
+
184
+ # Price formatted
185
+ price_formatted = f"€{price:,} per month" if price else None
186
+
187
+ listing_data = {
188
+ "title": name or street,
189
+ "city": city,
190
+ "postcode": postcode,
191
+ "neighbourhood": neighbourhood,
192
+ "price": price,
193
+ "price_formatted": price_formatted,
194
+ "currency": currency,
195
+ "living_area": living_area,
196
+ "rooms": rooms,
197
+ "bedrooms": bedrooms,
198
+ "description": description,
199
+ "url": url,
200
+ "photos": images,
201
+ "photo_urls": images,
202
+ "photo_count": len(images),
203
+ "energy_label": energy_label,
204
+ "offered_since": offered_since,
205
+ "characteristics": features,
206
+ # Rental-specific
207
+ "deposit": deposit,
208
+ "interior": interior,
209
+ "available": available,
210
+ "rental_agreement": rental_agreement,
211
+ "smoking_allowed": smoking_allowed,
212
+ "pets_allowed": pets_allowed,
213
+ "offering_type": "rent",
214
+ "object_type": "apartment",
215
+ }
216
+
217
+ # Coordinates
218
+ if coords:
219
+ listing_data["latitude"] = coords[0]
220
+ listing_data["longitude"] = coords[1]
221
+ listing_data["coordinates"] = coords
222
+
223
+ # Broker
224
+ if broker:
225
+ listing_data["broker"] = broker.get("name")
226
+ listing_data["broker_url"] = broker.get("url")
227
+ listing_data["broker_phone"] = broker.get("phone")
228
+
229
+ return Listing(listing_id=listing_id, data=listing_data)
230
+
231
+
232
+ def _extract_jsonld(html: str) -> dict:
233
+ """Extract JSON-LD structured data from HTML."""
234
+ matches = re.findall(
235
+ r'<script type="application/ld\+json">(.*?)</script>',
236
+ html,
237
+ re.DOTALL,
238
+ )
239
+ for match in matches:
240
+ try:
241
+ data = json.loads(match)
242
+ type_val = data.get("@type", "")
243
+ if "House" in str(type_val) or "Product" in str(type_val):
244
+ return data
245
+ except json.JSONDecodeError:
246
+ continue
247
+ return {}
248
+
249
+
250
+ def _extract_features(html: str) -> dict[str, str]:
251
+ """Extract features from listing HTML."""
252
+ features = {}
253
+
254
+ # Pattern 1: <dd class="listing-features__term">Term</dd> <dd ...><span>Value</span>
255
+ pattern1 = (
256
+ r'<dd class="listing-features__term">([^<]+)</dd>\s*'
257
+ r'<dd class="listing-features__description[^"]*">\s*'
258
+ r'(?:<span class="listing-features__main-description">)?([^<]+)'
259
+ )
260
+ for term, value in re.findall(pattern1, html):
261
+ features[term.strip()] = value.strip().replace("&nbsp;", " ")
262
+
263
+ # Pattern 2: <dt ...>Term</dt> <dd ...><span>Value</span> (for some features)
264
+ pattern2 = (
265
+ r'<dt class="listing-features__term[^"]*">([^<]+)</dt>\s*'
266
+ r'<dd class="listing-features__description[^"]*">\s*'
267
+ r'(?:\s*<span class="listing-features__main-description">)?([^<]+)'
268
+ )
269
+ for term, value in re.findall(pattern2, html):
270
+ features[term.strip()] = value.strip().replace("&nbsp;", " ")
271
+
272
+ return features
273
+
274
+
275
+ def _extract_images(html: str) -> list[str]:
276
+ """Extract all listing images from HTML."""
277
+ images = set()
278
+ pattern = r'(https://casco-media-prod[^"&\s]+\.(?:jpg|png|webp))'
279
+ for img in re.findall(pattern, html):
280
+ # Prefer full-size images
281
+ if "width=600" in img or "width=" not in img:
282
+ clean_url = img.replace("&amp;", "&")
283
+ images.add(clean_url)
284
+ return list(images)[:20] # Limit to 20 images
285
+
286
+
287
+ def _extract_agent(html: str) -> Optional[dict]:
288
+ """Extract agent information from HTML."""
289
+ agent_url = None
290
+ agent_name = None
291
+ agent_phone = None
292
+
293
+ url_match = re.search(r'href="(/real-estate-agent[^"]+)"', html)
294
+ if url_match:
295
+ agent_url = f"https://www.pararius.com{url_match.group(1)}"
296
+
297
+ # Agent name is inside: <a class="agent-summary__title-link" ...>Name</a>
298
+ name_match = re.search(r'agent-summary__title-link"[^>]*>([^<]+)', html)
299
+ if name_match:
300
+ agent_name = name_match.group(1).strip()
301
+
302
+ phone_match = re.search(r'tel:([^"]+)', html)
303
+ if phone_match:
304
+ agent_phone = phone_match.group(1)
305
+
306
+ if agent_url or agent_name:
307
+ return {"name": agent_name, "url": agent_url, "phone": agent_phone}
308
+ return None
309
+
310
+
311
+ def _extract_coordinates(html: str) -> Optional[tuple[float, float]]:
312
+ """Extract map coordinates from HTML."""
313
+ # Try data-latitude/data-longitude attributes
314
+ match = re.search(r'data-latitude="([^"]+)"[^>]*data-longitude="([^"]+)"', html)
315
+ if match:
316
+ return (float(match.group(1)), float(match.group(2)))
317
+
318
+ # Try data-lat/data-lon attributes (fallback)
319
+ match = re.search(r'data-lat="([^"]+)"[^>]*data-lon="([^"]+)"', html)
320
+ if match:
321
+ return (float(match.group(1)), float(match.group(2)))
322
+
323
+ # Try JSON in script
324
+ match = re.search(r'"lat":\s*([\d.]+).*?"lon":\s*([\d.]+)', html)
325
+ if match:
326
+ return (float(match.group(1)), float(match.group(2)))
327
+
328
+ return None
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: pypararius
3
+ Version: 2.0.0
4
+ Summary: Python API wrapper for Pararius.com real estate listings
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
7
+ Provides-Extra: dev
8
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
9
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
@@ -0,0 +1,7 @@
1
+ pypararius/__init__.py,sha256=q68u5npo3QIW3BV84YCIPrRyAqcydlJ4TrILELdlB3o,586
2
+ pypararius/listing.py,sha256=sxm7jl6Sod0ynW5-hcTFdZpvgIKgDXdjmQS60CnSO90,4166
3
+ pypararius/pararius.py,sha256=1673qk6Hd8nU4OKS3pbGcdczuN5BpyIbBWYnEvEHdIU,7493
4
+ pypararius/parser.py,sha256=mVtSkPCExxQMLLG5PzZXli_GUJeqmUKPpPXE4P8-DlI,10519
5
+ pypararius-2.0.0.dist-info/METADATA,sha256=QsYdxD0TbJB10bU9vH0kofGI1n8Uv9rs5w76XQsr17o,292
6
+ pypararius-2.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
7
+ pypararius-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any