fetchxml 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fetchxml/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ from .client import FetchXML
2
+ from .exceptions import FetchXMLError
3
+
4
+ __all__ = ["FetchXML", "FetchXMLError"]
fetchxml/client.py ADDED
@@ -0,0 +1,70 @@
1
+ import requests
2
+ import time
3
+ from .exceptions import FetchXMLError
4
+
5
+
6
+ class FetchXML:
7
+ def __init__(self, base_url=None, delay=0.5, timeout=15):
8
+ self.base_url = base_url
9
+ self.delay = delay
10
+ self.timeout = timeout
11
+ self.session = requests.Session()
12
+ self._init_headers()
13
+ if base_url:
14
+ self._bootstrap_session()
15
+
16
+ def _init_headers(self):
17
+ self.session.headers.update({
18
+ "User-Agent": (
19
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
20
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
21
+ "Chrome/122.0.0.0 Safari/537.36"
22
+ ),
23
+ "Accept-Language": "en-US,en;q=0.9",
24
+ "Connection": "keep-alive",
25
+ })
26
+
27
+ def _bootstrap_session(self):
28
+ try:
29
+ r = self.session.get(self.base_url, timeout=self.timeout)
30
+ if r.status_code != 200:
31
+ raise FetchXMLError(
32
+ f"Failed to initialize session. Status {r.status_code}"
33
+ )
34
+ except Exception as e:
35
+ raise FetchXMLError(f"Session bootstrap failed: {str(e)}")
36
+
37
+ def fetch(self, url, referer=None):
38
+ headers = {
39
+ "Accept": "application/xml,text/xml,*/*;q=0.1",
40
+ }
41
+
42
+ if referer:
43
+ headers["Referer"] = referer
44
+ elif self.base_url:
45
+ headers["Referer"] = self.base_url
46
+
47
+ time.sleep(self.delay)
48
+
49
+ response = self.session.get(
50
+ url,
51
+ headers=headers,
52
+ timeout=self.timeout
53
+ )
54
+
55
+ if response.status_code == 403:
56
+ # Attempt session refresh once
57
+ if self.base_url:
58
+ self._bootstrap_session()
59
+ response = self.session.get(
60
+ url,
61
+ headers=headers,
62
+ timeout=self.timeout
63
+ )
64
+
65
+ if response.status_code != 200:
66
+ raise FetchXMLError(
67
+ f"Failed to fetch XML. Status {response.status_code}"
68
+ )
69
+
70
+ return response.text
fetchxml/exceptions.py ADDED
@@ -0,0 +1,3 @@
1
+ class FetchXMLError(Exception):
2
+ """Custom exception for fetchxml errors."""
3
+ pass
@@ -0,0 +1,356 @@
1
+ Metadata-Version: 2.4
2
+ Name: fetchxml
3
+ Version: 0.1.0
4
+ Summary: Lightweight session-based XML fetcher with browser-like behavior.
5
+ Author: Your Name
6
+ Project-URL: Homepage, https://github.com/yourusername/fetchxml
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: requests
11
+ Dynamic: license-file
12
+
13
+ Here is a complete, production-quality **README.md** for your open-source package **fetchxml**.
14
+
15
+ You can paste this directly into `README.md`.
16
+
17
+ ---
18
+
19
+ # ๐Ÿ“ฆ fetchxml
20
+
21
+ Lightweight, session-based XML fetcher for Python.
22
+
23
+ `fetchxml` provides a clean, reusable way to fetch XML from web endpoints that require:
24
+
25
+ * Browser-like headers
26
+ * Session initialization
27
+ * Cookie handling
28
+ * Referer validation
29
+ * Basic anti-bot protection handling
30
+
31
+ It abstracts session bootstrapping and retry logic into a simple interface.
32
+
33
+ ---
34
+
35
+ ## ๐Ÿš€ Why fetchxml?
36
+
37
+ Some websites block simple HTTP requests and require:
38
+
39
+ * A session cookie
40
+ * Proper User-Agent
41
+ * Referer header
42
+ * Basic browser simulation
43
+
44
+ `fetchxml` handles this automatically.
45
+
46
+ Instead of writing repetitive session logic every time, you can do:
47
+
48
+ ```python
49
+ from fetchxml import FetchXML
50
+
51
+ client = FetchXML(base_url="https://example.com")
52
+ xml = client.fetch("https://example.com/file.xml")
53
+
54
+ print(xml)
55
+ ```
56
+
57
+ ---
58
+
59
+ # ๐Ÿ“ฅ Installation
60
+
61
+ ## Option 1 โ€“ Install from local project
62
+
63
+ From the project root (where `pyproject.toml` is located):
64
+
65
+ ```bash
66
+ pip install .
67
+ ```
68
+
69
+ For development mode:
70
+
71
+ ```bash
72
+ pip install -e .
73
+ ```
74
+
75
+ ---
76
+
77
+ ## Option 2 โ€“ Install from PyPI (after publishing)
78
+
79
+ ```bash
80
+ pip install fetchxml
81
+ ```
82
+
83
+ ---
84
+
85
+ # ๐Ÿง  Basic Usage
86
+
87
+ ## 1๏ธโƒฃ Simple XML Fetch
88
+
89
+ ```python
90
+ from fetchxml import FetchXML
91
+
92
+ client = FetchXML()
93
+
94
+ xml = client.fetch("https://example.com/sample.xml")
95
+
96
+ print(xml[:500])
97
+ ```
98
+
99
+ Use this when the target site does NOT require session bootstrap.
100
+
101
+ ---
102
+
103
+ ## 2๏ธโƒฃ Fetch XML With Session Bootstrap
104
+
105
+ Some sites require hitting their homepage first to establish cookies.
106
+
107
+ ```python
108
+ from fetchxml import FetchXML
109
+
110
+ client = FetchXML(base_url="https://example.com")
111
+
112
+ xml = client.fetch("https://example.com/sample.xml")
113
+
114
+ print(xml)
115
+ ```
116
+
117
+ `base_url` triggers automatic session initialization.
118
+
119
+ ---
120
+
121
+ ## 3๏ธโƒฃ Fetch With Custom Referer
122
+
123
+ If a specific referer header is required:
124
+
125
+ ```python
126
+ xml = client.fetch(
127
+ "https://example.com/sample.xml",
128
+ referer="https://example.com/dashboard"
129
+ )
130
+ ```
131
+
132
+ ---
133
+
134
+ # โš™๏ธ Configuration Options
135
+
136
+ When initializing:
137
+
138
+ ```python
139
+ client = FetchXML(
140
+ base_url="https://example.com", # optional
141
+ delay=0.5, # delay between requests (seconds)
142
+ timeout=15 # request timeout (seconds)
143
+ )
144
+ ```
145
+
146
+ ### Parameters
147
+
148
+ | Parameter | Description |
149
+ | ---------- | --------------------------------------------- |
150
+ | `base_url` | URL used to bootstrap session cookies |
151
+ | `delay` | Sleep time before each request (default 0.5s) |
152
+ | `timeout` | Request timeout in seconds (default 15s) |
153
+
154
+ ---
155
+
156
+ # ๐Ÿ” Automatic Retry Behavior
157
+
158
+ If a request returns **HTTP 403**, `fetchxml` will:
159
+
160
+ 1. Attempt to re-bootstrap session (if `base_url` provided)
161
+ 2. Retry the request once
162
+
163
+ If it still fails โ†’ exception is raised.
164
+
165
+ ---
166
+
167
+ # โ— Exception Handling
168
+
169
+ All errors raise:
170
+
171
+ ```python
172
+ FetchXMLError
173
+ ```
174
+
175
+ Import it like:
176
+
177
+ ```python
178
+ from fetchxml import FetchXMLError
179
+ ```
180
+
181
+ Example:
182
+
183
+ ```python
184
+ from fetchxml import FetchXML, FetchXMLError
185
+
186
+ client = FetchXML(base_url="https://example.com")
187
+
188
+ try:
189
+ xml = client.fetch("https://example.com/sample.xml")
190
+ print(xml)
191
+ except FetchXMLError as e:
192
+ print("Failed to fetch XML:", str(e))
193
+ ```
194
+
195
+ ---
196
+
197
+ # ๐Ÿ” What Triggers FetchXMLError?
198
+
199
+ * Session bootstrap failure
200
+ * Non-200 HTTP response
201
+ * Timeout
202
+ * Connection error
203
+ * Persistent 403 after retry
204
+
205
+ ---
206
+
207
+ # ๐Ÿ›ก๏ธ Rate Limiting
208
+
209
+ `delay` ensures a pause before each request:
210
+
211
+ ```python
212
+ client = FetchXML(delay=1.5)
213
+ ```
214
+
215
+ Recommended for:
216
+
217
+ * Bulk XML downloads
218
+ * Respecting server load
219
+ * Avoiding bot detection
220
+
221
+ ---
222
+
223
+ # ๐Ÿ“ Example: Download and Save XML
224
+
225
+ ```python
226
+ from fetchxml import FetchXML
227
+
228
+ client = FetchXML(base_url="https://example.com")
229
+
230
+ url = "https://example.com/sample.xml"
231
+ xml = client.fetch(url)
232
+
233
+ with open("sample.xml", "w", encoding="utf-8") as f:
234
+ f.write(xml)
235
+
236
+ print("Saved successfully.")
237
+ ```
238
+
239
+ ---
240
+
241
+ # ๐Ÿ”ง Advanced: Reusing One Client for Multiple Files
242
+
243
+ Best practice for bulk downloads:
244
+
245
+ ```python
246
+ from fetchxml import FetchXML
247
+
248
+ client = FetchXML(base_url="https://example.com")
249
+
250
+ urls = [
251
+ "https://example.com/file1.xml",
252
+ "https://example.com/file2.xml",
253
+ "https://example.com/file3.xml"
254
+ ]
255
+
256
+ for url in urls:
257
+ xml = client.fetch(url)
258
+ print(f"Downloaded {url}")
259
+ ```
260
+
261
+ This reuses the same session and cookies.
262
+
263
+ ---
264
+
265
+ # ๐Ÿงช Testing Connectivity
266
+
267
+ You can quickly test a URL:
268
+
269
+ ```python
270
+ from fetchxml import FetchXML
271
+
272
+ client = FetchXML()
273
+
274
+ try:
275
+ xml = client.fetch("https://example.com/sample.xml")
276
+ print("Success")
277
+ except Exception as e:
278
+ print("Error:", e)
279
+ ```
280
+
281
+ ---
282
+
283
+ # ๐Ÿ—๏ธ Project Structure
284
+
285
+ ```
286
+ fetchxml/
287
+ โ”‚
288
+ โ”œโ”€โ”€ fetchxml/
289
+ โ”‚ โ”œโ”€โ”€ __init__.py
290
+ โ”‚ โ”œโ”€โ”€ client.py
291
+ โ”‚ โ”œโ”€โ”€ exceptions.py
292
+ โ”‚
293
+ โ”œโ”€โ”€ pyproject.toml
294
+ โ”œโ”€โ”€ README.md
295
+ โ””โ”€โ”€ LICENSE
296
+ ```
297
+
298
+ ---
299
+
300
+ # ๐Ÿ“œ License
301
+
302
+ MIT License
303
+
304
+ See `LICENSE` file for full text.
305
+
306
+ ---
307
+
308
+ # โš ๏ธ Disclaimer
309
+
310
+ `fetchxml` does not bypass authentication systems or CAPTCHAs.
311
+
312
+ It simply mimics normal browser session behavior using:
313
+
314
+ * Session cookies
315
+ * Proper headers
316
+ * Referer validation
317
+
318
+ Users are responsible for complying with website terms of service.
319
+
320
+ ---
321
+
322
+ # ๐Ÿ’ก When To Use fetchxml
323
+
324
+ Use it when:
325
+
326
+ * A site blocks naive `requests.get()`
327
+ * Cookies must be initialized first
328
+ * Referer headers are required
329
+ * You want clean, reusable XML fetching logic
330
+
331
+ Do NOT use it for:
332
+
333
+ * Circumventing login walls
334
+ * Bypassing paywalls
335
+ * Evading legal restrictions
336
+
337
+ ---
338
+
339
+ # ๐Ÿงฉ Roadmap (Optional Future Enhancements)
340
+
341
+ * Async version
342
+ * Disk caching layer
343
+ * Proxy support
344
+ * Built-in XML validation
345
+ * Exponential backoff strategy
346
+ * Logging integration
347
+
348
+ ---
349
+
350
+ # ๐Ÿ‘ค Author
351
+
352
+ Saurabh Kumar Agarwal
353
+ 2026
354
+
355
+ ---
356
+
@@ -0,0 +1,8 @@
1
+ fetchxml/__init__.py,sha256=-OcUbUvt0puNz55MC0AR03gaDGVqKdOhF0FGa4kn6vc,110
2
+ fetchxml/client.py,sha256=77ETD2j2g-dUZldYuxsL5ZdmRl0ERffrhXX7Le8BZ2o,2139
3
+ fetchxml/exceptions.py,sha256=Rn_c9AeDuHqh969Vj-qykSPLInQQr4qQvclzpi4Nvrk,90
4
+ fetchxml-0.1.0.dist-info/licenses/LICENSE,sha256=Fe4ROp3CXeMvKjcZjK1lLTMNHrLr1htCR8DGXESgxqU,1097
5
+ fetchxml-0.1.0.dist-info/METADATA,sha256=jmQ6cMDKpdy66WNV0UlVXV1NgBBXtNldqYBZHZe0t9k,6460
6
+ fetchxml-0.1.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
7
+ fetchxml-0.1.0.dist-info/top_level.txt,sha256=BdU2_dX2Y37md0_BDfE3fp92z94jmQmOu-hDBVEQ-xc,9
8
+ fetchxml-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Saurabh Kumar Agarwal
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ fetchxml