fmslist 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fmslist/scraper.py +151 -0
- {fmslist-0.0.1.dist-info → fmslist-0.0.3.dist-info}/METADATA +2 -1
- fmslist-0.0.3.dist-info/RECORD +6 -0
- fmslist-0.0.1.dist-info/RECORD +0 -6
- {fmslist-0.0.1.dist-info → fmslist-0.0.3.dist-info}/WHEEL +0 -0
- {fmslist-0.0.1.dist-info → fmslist-0.0.3.dist-info}/licenses/LICENSE +0 -0
fmslist/scraper.py
CHANGED
@@ -0,0 +1,151 @@
|
|
1
|
+
import json
|
2
|
+
import re
|
3
|
+
import time
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Mapping
|
7
|
+
|
8
|
+
import requests
|
9
|
+
|
10
|
+
|
11
|
+
@dataclass
|
12
|
+
class Variant:
|
13
|
+
"""A class to hold details of a variant of an item."""
|
14
|
+
|
15
|
+
id: int
|
16
|
+
name: str
|
17
|
+
price: str
|
18
|
+
available: bool
|
19
|
+
quantity: int
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass(frozen=True)
|
23
|
+
class ItemDetails:
|
24
|
+
"""A class to hold details of an item."""
|
25
|
+
|
26
|
+
id: int
|
27
|
+
title: str
|
28
|
+
vendor: str
|
29
|
+
image_urls: list[str]
|
30
|
+
link: str
|
31
|
+
published_at: datetime
|
32
|
+
updated_at: datetime
|
33
|
+
variants: list[Variant]
|
34
|
+
|
35
|
+
|
36
|
+
class FindMeStoreList:
|
37
|
+
"""A class to scrape the Find Me Store (FMS) list from the specified URL."""
|
38
|
+
|
39
|
+
items: list[ItemDetails] = []
|
40
|
+
|
41
|
+
def __init__(self):
|
42
|
+
self._session = requests.Session()
|
43
|
+
self._session.headers.update({"User-Agent": "MJ12bot"})
|
44
|
+
self._base_url = "https://findmestore.thinkr.jp"
|
45
|
+
|
46
|
+
def fetch_items(self) -> None:
|
47
|
+
"""Fetches all items from the FMS list."""
|
48
|
+
page = 1
|
49
|
+
while True:
|
50
|
+
try:
|
51
|
+
# Fetch the first page of products
|
52
|
+
items = self._fetch_products(page)
|
53
|
+
self.items.extend(items)
|
54
|
+
if not items:
|
55
|
+
break # No products found, exit the loop
|
56
|
+
page += 1
|
57
|
+
except ValueError as e:
|
58
|
+
print(f"Error fetching products: {e}")
|
59
|
+
break # Exit the loop on error
|
60
|
+
|
61
|
+
# Sort items by publish time
|
62
|
+
self.items.sort(key=lambda item: item.published_at, reverse=True)
|
63
|
+
|
64
|
+
def fill_quantities(self) -> None:
|
65
|
+
"""Fills the quantities for each variant in the items."""
|
66
|
+
quantities: Mapping[int, int] = {}
|
67
|
+
page = 1
|
68
|
+
while True:
|
69
|
+
try:
|
70
|
+
# Fetch quantities from the search API
|
71
|
+
q = self._fetch_quantities(page)
|
72
|
+
quantities.update(q)
|
73
|
+
if not q:
|
74
|
+
break
|
75
|
+
page += 1
|
76
|
+
except ValueError as e:
|
77
|
+
print(f"Error fetching quantities: {e}")
|
78
|
+
break
|
79
|
+
|
80
|
+
for item in self.items:
|
81
|
+
for variant in item.variants:
|
82
|
+
variant.quantity = max(quantities.get(variant.id, 0), -1)
|
83
|
+
|
84
|
+
def _fetch_quantities(self, page: int) -> Mapping[int, int]:
|
85
|
+
"""Fetches the quantities from search API. Returns a mapping of variant IDs to quantities."""
|
86
|
+
while True:
|
87
|
+
res = self._session.get(
|
88
|
+
f"{self._base_url}/search?view=preorderjson&q=*&page={page}"
|
89
|
+
)
|
90
|
+
if res.status_code == 200:
|
91
|
+
break
|
92
|
+
elif res.status_code == 429:
|
93
|
+
print(f"Rate limit exceeded, waiting 5s before retrying page {page}...")
|
94
|
+
time.sleep(5)
|
95
|
+
else:
|
96
|
+
raise ValueError(
|
97
|
+
f"Failed to fetch search result at page {page}: [{res.status_code}] {res.text}"
|
98
|
+
)
|
99
|
+
# A hacky fix for the API returning an empty "id" field
|
100
|
+
json_fixed = re.sub(r":\s*(,|\})", f": null\\1", res.text)
|
101
|
+
return {
|
102
|
+
variant["id"]: variant["inventory_quantity"]
|
103
|
+
for product in json.loads(json_fixed)
|
104
|
+
for variant in product.get("variants", [])
|
105
|
+
if variant["available"]
|
106
|
+
}
|
107
|
+
|
108
|
+
def _parse_timestamp(self, timestamp: str) -> datetime:
|
109
|
+
"""Parses a timestamp string into a datetime object."""
|
110
|
+
return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S%z")
|
111
|
+
|
112
|
+
def _parse_product(self, product: dict) -> ItemDetails:
|
113
|
+
"""Parses a product dictionary into an ItemDetails object."""
|
114
|
+
id = product["id"]
|
115
|
+
title = product["title"]
|
116
|
+
vendor = product.get("vendor", "Unknown Vendor")
|
117
|
+
image_urls = [image["src"] for image in product.get("images", [])]
|
118
|
+
link = f"{self._base_url}/products/{product['handle']}"
|
119
|
+
published_at = self._parse_timestamp(product["published_at"])
|
120
|
+
updated_at = self._parse_timestamp(product["updated_at"])
|
121
|
+
variants = [
|
122
|
+
Variant(
|
123
|
+
id=variant["id"],
|
124
|
+
name=variant["title"] if variant["title"] != "Default Title" else "",
|
125
|
+
price=variant["price"],
|
126
|
+
available=variant["available"],
|
127
|
+
quantity=0,
|
128
|
+
)
|
129
|
+
for variant in product.get("variants", [])
|
130
|
+
]
|
131
|
+
return ItemDetails(
|
132
|
+
id, title, vendor, image_urls, link, published_at, updated_at, variants
|
133
|
+
)
|
134
|
+
|
135
|
+
def _fetch_products(self, page: int) -> list[ItemDetails]:
|
136
|
+
"""Fetches the products from the FMS list."""
|
137
|
+
while True:
|
138
|
+
res = self._session.get(
|
139
|
+
f"{self._base_url}/products.json?limit=250&page={page}"
|
140
|
+
)
|
141
|
+
if res.status_code == 200:
|
142
|
+
break
|
143
|
+
elif res.status_code == 429:
|
144
|
+
print(f"Rate limit exceeded, waiting 5s before retrying page {page}...")
|
145
|
+
time.sleep(5)
|
146
|
+
else:
|
147
|
+
raise ValueError(
|
148
|
+
f"Failed to fetch products at page {page}: [{res.status_code}] {res.text}"
|
149
|
+
)
|
150
|
+
products = res.json().get("products", [])
|
151
|
+
return [self._parse_product(product) for product in products]
|
@@ -1,12 +1,13 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: fmslist
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: A web scraper that lists items from Findmestore.
|
5
5
|
Project-URL: Homepage, https://github.com/d4n1elchen/fmslist
|
6
6
|
Project-URL: Issues, https://github.com/d4n1elchen/fmslist/issues
|
7
7
|
License-Expression: MIT
|
8
8
|
License-File: LICENSE
|
9
9
|
Requires-Python: >=3.9
|
10
|
+
Requires-Dist: requests
|
10
11
|
Description-Content-Type: text/markdown
|
11
12
|
|
12
13
|
# fmslist
|
@@ -0,0 +1,6 @@
|
|
1
|
+
fmslist/__init__.py,sha256=LvCLbFusopU82bfaLvjmc_TnJJereamdRlo0diCSOlc,23
|
2
|
+
fmslist/scraper.py,sha256=0M3DgfGI9NpaJb7GsWWvLsL2bU-SIyUe_SMs7eAxYQc,5257
|
3
|
+
fmslist-0.0.3.dist-info/METADATA,sha256=A0nfMgto5vg9jyLd85AtFKTFG1T4K_sQOUin_LayRJY,394
|
4
|
+
fmslist-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
5
|
+
fmslist-0.0.3.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
6
|
+
fmslist-0.0.3.dist-info/RECORD,,
|
fmslist-0.0.1.dist-info/RECORD
DELETED
@@ -1,6 +0,0 @@
|
|
1
|
-
fmslist/__init__.py,sha256=LvCLbFusopU82bfaLvjmc_TnJJereamdRlo0diCSOlc,23
|
2
|
-
fmslist/scraper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
fmslist-0.0.1.dist-info/METADATA,sha256=6a0CJ6Y2UVotIePJcV5d3GhxfdJqfykpL0DMo8-eDSg,370
|
4
|
-
fmslist-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
5
|
-
fmslist-0.0.1.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
|
6
|
-
fmslist-0.0.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|