fmslist 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fmslist/scraper.py CHANGED
@@ -0,0 +1,151 @@
1
+ import json
2
+ import re
3
+ import time
4
+ from dataclasses import dataclass
5
+ from datetime import datetime
6
+ from typing import Mapping
7
+
8
+ import requests
9
+
10
+
11
+ @dataclass
12
+ class Variant:
13
+ """A class to hold details of a variant of an item."""
14
+
15
+ id: int
16
+ name: str
17
+ price: str
18
+ available: bool
19
+ quantity: int
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class ItemDetails:
24
+ """A class to hold details of an item."""
25
+
26
+ id: int
27
+ title: str
28
+ vendor: str
29
+ image_urls: list[str]
30
+ link: str
31
+ published_at: datetime
32
+ updated_at: datetime
33
+ variants: list[Variant]
34
+
35
+
36
+ class FindMeStoreList:
37
+ """A class to scrape the Find Me Store (FMS) list from the specified URL."""
38
+
39
+ items: list[ItemDetails] = []
40
+
41
+ def __init__(self):
42
+ self._session = requests.Session()
43
+ self._session.headers.update({"User-Agent": "MJ12bot"})
44
+ self._base_url = "https://findmestore.thinkr.jp"
45
+
46
+ def fetch_items(self) -> None:
47
+ """Fetches all items from the FMS list."""
48
+ page = 1
49
+ while True:
50
+ try:
51
+ # Fetch the first page of products
52
+ items = self._fetch_products(page)
53
+ self.items.extend(items)
54
+ if not items:
55
+ break # No products found, exit the loop
56
+ page += 1
57
+ except ValueError as e:
58
+ print(f"Error fetching products: {e}")
59
+ break # Exit the loop on error
60
+
61
+ # Sort items by publish time
62
+ self.items.sort(key=lambda item: item.published_at, reverse=True)
63
+
64
+ def fill_quantities(self) -> None:
65
+ """Fills the quantities for each variant in the items."""
66
+ quantities: Mapping[int, int] = {}
67
+ page = 1
68
+ while True:
69
+ try:
70
+ # Fetch quantities from the search API
71
+ q = self._fetch_quantities(page)
72
+ quantities.update(q)
73
+ if not q:
74
+ break
75
+ page += 1
76
+ except ValueError as e:
77
+ print(f"Error fetching quantities: {e}")
78
+ break
79
+
80
+ for item in self.items:
81
+ for variant in item.variants:
82
+ variant.quantity = max(quantities.get(variant.id, 0), -1)
83
+
84
+ def _fetch_quantities(self, page: int) -> Mapping[int, int]:
85
+ """Fetches the quantities from search API. Returns a mapping of variant IDs to quantities."""
86
+ while True:
87
+ res = self._session.get(
88
+ f"{self._base_url}/search?view=preorderjson&q=*&page={page}"
89
+ )
90
+ if res.status_code == 200:
91
+ break
92
+ elif res.status_code == 429:
93
+ print(f"Rate limit exceeded, waiting 5s before retrying page {page}...")
94
+ time.sleep(5)
95
+ else:
96
+ raise ValueError(
97
+ f"Failed to fetch search result at page {page}: [{res.status_code}] {res.text}"
98
+ )
99
+ # A hacky fix for the API returning an empty "id" field
100
+ json_fixed = re.sub(r":\s*(,|\})", f": null\\1", res.text)
101
+ return {
102
+ variant["id"]: variant["inventory_quantity"]
103
+ for product in json.loads(json_fixed)
104
+ for variant in product.get("variants", [])
105
+ if variant["available"]
106
+ }
107
+
108
+ def _parse_timestamp(self, timestamp: str) -> datetime:
109
+ """Parses a timestamp string into a datetime object."""
110
+ return datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S%z")
111
+
112
+ def _parse_product(self, product: dict) -> ItemDetails:
113
+ """Parses a product dictionary into an ItemDetails object."""
114
+ id = product["id"]
115
+ title = product["title"]
116
+ vendor = product.get("vendor", "Unknown Vendor")
117
+ image_urls = [image["src"] for image in product.get("images", [])]
118
+ link = f"{self._base_url}/products/{product['handle']}"
119
+ published_at = self._parse_timestamp(product["published_at"])
120
+ updated_at = self._parse_timestamp(product["updated_at"])
121
+ variants = [
122
+ Variant(
123
+ id=variant["id"],
124
+ name=variant["title"] if variant["title"] != "Default Title" else "",
125
+ price=variant["price"],
126
+ available=variant["available"],
127
+ quantity=0,
128
+ )
129
+ for variant in product.get("variants", [])
130
+ ]
131
+ return ItemDetails(
132
+ id, title, vendor, image_urls, link, published_at, updated_at, variants
133
+ )
134
+
135
+ def _fetch_products(self, page: int) -> list[ItemDetails]:
136
+ """Fetches the products from the FMS list."""
137
+ while True:
138
+ res = self._session.get(
139
+ f"{self._base_url}/products.json?limit=250&page={page}"
140
+ )
141
+ if res.status_code == 200:
142
+ break
143
+ elif res.status_code == 429:
144
+ print(f"Rate limit exceeded, waiting 5s before retrying page {page}...")
145
+ time.sleep(5)
146
+ else:
147
+ raise ValueError(
148
+ f"Failed to fetch products at page {page}: [{res.status_code}] {res.text}"
149
+ )
150
+ products = res.json().get("products", [])
151
+ return [self._parse_product(product) for product in products]
@@ -1,12 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fmslist
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: A web scraper that lists items from Findmestore.
5
5
  Project-URL: Homepage, https://github.com/d4n1elchen/fmslist
6
6
  Project-URL: Issues, https://github.com/d4n1elchen/fmslist/issues
7
7
  License-Expression: MIT
8
8
  License-File: LICENSE
9
9
  Requires-Python: >=3.9
10
+ Requires-Dist: bs4
11
+ Requires-Dist: requests
10
12
  Description-Content-Type: text/markdown
11
13
 
12
14
  # fmslist
@@ -0,0 +1,6 @@
1
+ fmslist/__init__.py,sha256=LvCLbFusopU82bfaLvjmc_TnJJereamdRlo0diCSOlc,23
2
+ fmslist/scraper.py,sha256=0M3DgfGI9NpaJb7GsWWvLsL2bU-SIyUe_SMs7eAxYQc,5257
3
+ fmslist-0.0.2.dist-info/METADATA,sha256=5rvdDVw2USlkhi5jLVENdGCITrea1a72a2WPjksM2ig,413
4
+ fmslist-0.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
+ fmslist-0.0.2.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
6
+ fmslist-0.0.2.dist-info/RECORD,,
@@ -1,6 +0,0 @@
1
- fmslist/__init__.py,sha256=LvCLbFusopU82bfaLvjmc_TnJJereamdRlo0diCSOlc,23
2
- fmslist/scraper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- fmslist-0.0.1.dist-info/METADATA,sha256=6a0CJ6Y2UVotIePJcV5d3GhxfdJqfykpL0DMo8-eDSg,370
4
- fmslist-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
5
- fmslist-0.0.1.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
6
- fmslist-0.0.1.dist-info/RECORD,,