productcategorizationapi 1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,47 @@
1
+ import requests
2
+
3
+ class ProductCategorizationAPI:
4
+ def __init__(self, api_key):
5
+ self.api_key = api_key
6
+ self.base_url = "https://www.productcategorization.com/api/"
7
+
8
+ def categorize_text(self, text, confidence=0, expand_context=0):
9
+ params = {
10
+ "query": text,
11
+ "api_key": self.api_key,
12
+ "confidence": str(confidence),
13
+ "expand_context": str(expand_context)
14
+ }
15
+ response = requests.get(self.base_url + "ecommerce/ecommerce_category6_get.php", params=params)
16
+ return response.json()
17
+
18
+ def categorize_url(self, url):
19
+ payload = {
20
+ 'query': url,
21
+ 'api_key': self.api_key,
22
+ 'data_type': 'url'
23
+ }
24
+ headers = {
25
+ 'Content-Type': 'application/x-www-form-urlencoded'
26
+ }
27
+ response = requests.post(self.base_url + "iab/iab_web_content_filtering_url.php", data=payload, headers=headers)
28
+ return response.json()
29
+
30
+ def categorize_image(self, image_url, text="", ip="0", login="0"):
31
+ # Download image to memory
32
+ image_response = requests.get(image_url)
33
+ if image_response.status_code != 200:
34
+ return {'error': 'Failed to download image'}
35
+ import io
36
+ image_file = io.BytesIO(image_response.content)
37
+ data = {
38
+ 'ip': ip,
39
+ 'api_key': self.api_key,
40
+ 'login': login,
41
+ 'text': text
42
+ }
43
+ files = {
44
+ 'image': ('image.jpg', image_file, 'image/jpeg')
45
+ }
46
+ response = requests.post(self.base_url + "ecommerce/ecommerce_shopify_image.php", data=data, files=files)
47
+ return response.json()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 arnu515
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,445 @@
1
+ Metadata-Version: 2.1
2
+ Name: productcategorizationapi
3
+ Version: 1.1
4
+ Summary: product categorization API
5
+ Home-page: https://www.productcategorization.com
6
+ Author-email: info@productcategorizationapi.com
7
+ License: UNKNOWN
8
+ Keywords: product categorization,classification,categorization
9
+ Platform: UNKNOWN
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.3
12
+ Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.0
13
+ Classifier: Environment :: GPU :: NVIDIA CUDA
14
+ Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.2
15
+ Classifier: Environment :: GPU :: NVIDIA CUDA :: 10.1
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Description-Content-Type: text/markdown
18
+
19
+ Certainly! Below is an extensively detailed `README.md` for your PyPI package, paraphrased and expanded from your API documentation. It is structured for maximum clarity, includes code examples, best practices, and a thorough section on your additional services (with anchor texts and external research/edu links).
20
+ Below the README, you'll find a sample `__init__.py` for your PyPI package with a `ProductCategorizationAPI` class.
21
+
22
+ ---
23
+
24
+ # ProductCategorization.com Python Client
25
+
26
+ [![PyPI version](https://badge.fury.io/py/productcategorization.svg)](https://pypi.org/project/productcategorization/)
27
+ [![API Docs](https://www.productcategorization.com/api.php)](https://www.productcategorization.com/api.php)
28
+
29
+ ---
30
+
31
+ ## Overview
32
+
33
+ The `productcategorization` Python package provides seamless access to one of the world's most advanced product categorization APIs, powering e-commerce classification for unicorn startups, multinational enterprises, retail analytics platforms, adTech innovators, and online merchants. Whether you operate an e-commerce storefront, marketplace, or SaaS platform, this package allows you to integrate AI-powered categorization directly into your Python applications, unlocking world-class product, URL, and image classification using industry-standard taxonomies.
34
+
35
+ ## Key Features
36
+
37
+ * **Ultra-Accurate Product Categorization**
38
+ Classify product titles, descriptions, and URLs using:
39
+
40
+ * **Google Shopping Taxonomy:** Over 5,500 hierarchical categories for granular and up-to-date mapping.
41
+ * **Shopify Taxonomy:** Leverage the latest Shopify category structure with \~11,000 fine-grained categories.
42
+ * **Amazon and Other Standard Taxonomies:** Flexibility for diverse retail needs.
43
+ * **Custom Taxonomies:** Tailor classifiers to your unique vertical or proprietary taxonomy.
44
+
45
+ * **Multi-Modal Classification**
46
+
47
+ * **Text**: Classify any product-related string.
48
+ * **URL**: Categorize products directly from their web pages.
49
+ * **Image**: Obtain Shopify categories and attribute extraction directly from images (using AI vision).
50
+
51
+ * **Buyer Persona Enrichment**
52
+ Every classification returns relevant buyer personas—select from a proprietary library of over 1,800 personas to enrich your analytics, personalization, or marketing automations.
53
+ Confidence scores and expanded context available.
54
+
55
+ * **High Scalability and Reliability**
56
+ Robust API supporting high throughput (rate limits adjustable upon request), with credit-based billing for predictable scaling.
57
+
58
+ * **Plug-and-Play Python Integration**
59
+ Simple, modern, and extensible Python API client.
60
+ See [Quickstart](#quickstart) for usage examples.
61
+
62
+ ---
63
+
64
+ ## Table of Contents
65
+
66
+ * [Getting Started](#getting-started)
67
+ * [Authentication](#authentication)
68
+ * [API Usage](#api-usage)
69
+
70
+ * [Text Categorization](#text-categorization)
71
+ * [URL Categorization](#url-categorization)
72
+ * [Image Categorization](#image-categorization)
73
+ * [Advanced Options](#advanced-options)
74
+
75
+ * [Buyer Personas and Confidence Scores](#buyer-personas-and-confidence-scores)
76
+ * [Context Expansion](#context-expansion)
77
+ * [Error Handling](#error-handling)
78
+ * [Best Practices](#best-practices)
79
+ * [Integration Examples](#integration-examples)
80
+ * [Contact & Support](#contact--support)
81
+ * [Related Services](#related-services)
82
+ * [References](#references)
83
+
84
+ ---
85
+
86
+ ## Getting Started
87
+
88
+ Install the package via PyPI:
89
+
90
+ ```bash
91
+ pip install productcategorization
92
+ ```
93
+
94
+ Or add it to your `requirements.txt` for automatic deployment.
95
+
96
+ ---
97
+
98
+ ## Authentication
99
+
100
+ All API access is secured by a personal API key.
101
+ To obtain your API key:
102
+
103
+ 1. Sign up and purchase a subscription at [www.productcategorization.com](https://www.productcategorization.com/pricing.php
104
+ 3. Provide the API key in every request (see examples).
105
+
106
+ > **Note:** Never share your API key publicly. Store it securely as an environment variable or in your configuration files.
107
+
108
+ ---
109
+
110
+ ## API Usage
111
+
112
+ ### Text Categorization
113
+
114
+ Classify any product text (title, description, or keyword) in a single line:
115
+
116
+ ```python
117
+ from productcategorization import ProductCategorizationAPI
118
+
119
+ api = ProductCategorizationAPI(api_key="your_api_key")
120
+ result = api.categorize_text("Fluorescent Highlighters 3pc Yellow")
121
+ print(result)
122
+ ```
123
+
124
+ **Sample Response:**
125
+
126
+ ```json
127
+ {
128
+ "total_credits": 100044,
129
+ "remaining_credits": 33075,
130
+ "language": "en",
131
+ "classification": "Office Supplies > Office Instruments > Writing & Drawing Instruments",
132
+ "buyer_personas": [
133
+ "Business Professional", "Office Professional", "Administrative Coordinator", ...
134
+ ],
135
+ "buyer_personas_confidence_selection": {
136
+ "Office Professional": 0.9,
137
+ "Business Professional": 0.8,
138
+ ...
139
+ },
140
+ "ID": "977",
141
+ "status": 200
142
+ }
143
+ ```
144
+
145
+ **Parameters:**
146
+
147
+ * `query` (str): Product text for categorization.
148
+ * `confidence` (optional, int): Set to `1` to include confidence scores for each persona.
149
+ * `expand_context` (optional, int): Set to `1` to auto-generate expanded context for short/ambiguous texts.
150
+
151
+ ---
152
+
153
+ ### URL Categorization
154
+
155
+ You can also classify products by URL, leveraging our AI’s ability to extract relevant text and metadata:
156
+
157
+ ```python
158
+ result = api.categorize_url("https://www.apple.com")
159
+ print(result)
160
+ ```
161
+
162
+ **Sample Python (requests):**
163
+
164
+ ```python
165
+ import requests
166
+
167
+ payload = {'query': 'www.apple.com', 'api_key': 'your_api_key', 'data_type': 'url'}
168
+ response = requests.post("https://www.productcategorization.com/api/iab/iab_web_content_filtering_url.php", data=payload)
169
+ print(response.json())
170
+ ```
171
+
172
+ ---
173
+
174
+ ### Image Categorization
175
+
176
+ Classify products using image URLs or local image files (Shopify Taxonomy + attribute extraction):
177
+
178
+ ```python
179
+ result = api.categorize_image(image_url="https://images.com/product.jpg", text="Product title")
180
+ print(result)
181
+ ```
182
+
183
+ **Example Function:**
184
+
185
+ ```python
186
+ import requests
187
+ import io
188
+
189
+ def call_api(image_url, text, api_key):
190
+ api_endpoint = 'https://www.productcategorization.com/api/ecommerce/ecommerce_shopify_image.php'
191
+ response = requests.get(image_url)
192
+ if response.status_code != 200:
193
+ return {'error': 'Failed to download image'}
194
+ image_file = io.BytesIO(response.content)
195
+ data = {'ip': '0', 'api_key': api_key, 'login': '0', 'text': text}
196
+ files = {'image': ('image.jpg', image_file, 'image/jpeg')}
197
+ response = requests.post(api_endpoint, data=data, files=files)
198
+ return response.json()
199
+ ```
200
+
201
+ ---
202
+
203
+ ## Advanced Options
204
+
205
+ ### Buyer Personas and Confidence Scores
206
+
207
+ Our AI delivers a unique set of buyer personas for every product—ideal for market analysis, targeted marketing, or persona-based analytics.
208
+ Enable confidence scoring to obtain relevance weights for each persona:
209
+
210
+ ```python
211
+ result = api.categorize_text("Eco-Friendly Notebook", confidence=1)
212
+ print(result["buyer_personas_confidence_selection"])
213
+ ```
214
+
215
+ ### Context Expansion
216
+
217
+ For short or ambiguous inputs, enable `expand_context=1` to let our AI generate an enhanced description for improved classification accuracy:
218
+
219
+ ```python
220
+ result = api.categorize_text("3pc Yellow Highlighters", expand_context=1)
221
+ print(result["expanded_context"])
222
+ ```
223
+
224
+ ---
225
+
226
+ ## Error Handling
227
+
228
+ All API responses include a `status` code for programmatic error handling:
229
+
230
+ | Status | Meaning |
231
+ | ------ | ---------------------------------------- |
232
+ | 200 | Request was successful |
233
+ | 400 | Request malformed (check parameters) |
234
+ | 401 | Invalid API key (check or purchase key) |
235
+ | 403 | Quota exhausted (upgrade or add credits) |
236
+
237
+ Example error handling in Python:
238
+
239
+ ```python
240
+ if result["status"] != 200:
241
+ print(f"API Error: {result.get('message', 'Unknown error')}")
242
+ ```
243
+
244
+ ---
245
+
246
+ ## Best Practices
247
+
248
+ * **Monitor Remaining Credits:** Every response includes `total_credits` and `remaining_credits`. Plan your usage to avoid interruptions.
249
+ * **Respect Rate Limits:** Default is 60 requests per minute. Contact support for higher needs.
250
+ * **Secure Your API Key:** Do not embed directly in code if publishing open-source.
251
+ * **Use Context Expansion When Needed:** For short/ambiguous product titles, enable `expand_context`.
252
+ * **Batch Requests:** For large datasets, consider batching requests and handling quota gracefully.
253
+
254
+ ---
255
+
256
+ ## Integration Examples
257
+
258
+ ### Python Example
259
+
260
+ ```python
261
+ from productcategorization import ProductCategorizationAPI
262
+
263
+ api = ProductCategorizationAPI(api_key="your_api_key")
264
+ result = api.categorize_text("Fluorescent Highlighters 3pc Yellow")
265
+ print(result["classification"])
266
+ ```
267
+
268
+ ### JavaScript Example
269
+
270
+ ```javascript
271
+ const apiBaseUrl = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php?";
272
+ const apiKey = "your_api_key";
273
+ const queryText = "Fluorescent Highlighters 3pc Yellow";
274
+ const encodedQueryText = encodeURIComponent(queryText);
275
+ const finalUrl = `${apiBaseUrl}query=${encodedQueryText}&api_key=${apiKey}`;
276
+
277
+ fetch(finalUrl)
278
+ .then(response => response.json())
279
+ .then(data => console.log(data));
280
+ ```
281
+
282
+ ### Ruby Example
283
+
284
+ ```ruby
285
+ require 'uri'
286
+ require 'net/http'
287
+
288
+ api_base_url = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php"
289
+ api_key = "your_api_key"
290
+ query_text = "Fluorescent Highlighters 3pc Yellow"
291
+
292
+ encoded_query = URI.encode_www_form_component(query_text)
293
+ url = URI("#{api_base_url}?query=#{encoded_query}&api_key=#{api_key}")
294
+
295
+ response = Net::HTTP.get(url)
296
+ puts response
297
+ ```
298
+
299
+ ### C# Example
300
+
301
+ ```csharp
302
+ using System;
303
+ using System.Net.Http;
304
+ using System.Threading.Tasks;
305
+
306
+ class Program {
307
+ static async Task Main(string[] args) {
308
+ var apiBaseUrl = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php?";
309
+ var apiKey = "your_api_key";
310
+ var queryText = "Fluorescent Highlighters 3pc Yellow";
311
+ var encodedQueryText = Uri.EscapeDataString(queryText);
312
+ var finalUrl = $"{apiBaseUrl}query={encodedQueryText}&api_key={apiKey}";
313
+
314
+ using (HttpClient client = new HttpClient()) {
315
+ var response = await client.GetStringAsync(finalUrl);
316
+ Console.WriteLine(response);
317
+ }
318
+ }
319
+ }
320
+ ```
321
+
322
+ ---
323
+
324
+ ## Contact & Support
325
+
326
+ Need a higher rate limit, a custom classifier, or additional support?
327
+ Visit [Contact](https://www.productcategorization.com/contact), or email support via your account dashboard.
328
+
329
+ ---
330
+
331
+ ## Related Services
332
+
333
+ Leverage our broader suite of AI-powered APIs to cover every aspect of your business’s data intelligence and privacy needs:
334
+
335
+ * **[Comment Moderation API](https://www.contentmoderationapi.net) – comment moderation api:**
336
+ Safeguard your community, app, or platform with industry-leading AI moderation for comments and user-generated content. Detect profanity, hate speech, spam, and toxicity in real time.
337
+
338
+ * **[Live Video Anonymization](https://www.anomyizationapi.com) – live video anonymization:**
339
+ Protect privacy with automatic anonymization of faces and sensitive objects in live video streams, supporting GDPR compliance and safeguarding user identities.
340
+
341
+ * **[Text Redaction API](https://www.redactionapi.net) – text redaction api:**
342
+ Redact personal data, financial information, or any sensitive fields from documents at scale using our high-precision redaction API.
343
+
344
+ * **[Company Enrichment Data](https://www.companydataapi.com) – company enrichment data:**
345
+ Instantly enhance your CRM, sales, or analytics platform with up-to-date company profiles, firmographics, and contact data.
346
+
347
+ * **[Domain Categorization Data](https://www.urlcategorizationdatabase.com) – domain categorization data:**
348
+ Access the world’s largest database of categorized domains for cybersecurity, web filtering, and content safety.
349
+
350
+ * **[AI Contract Analysis](https://www.aicontractreviewtool.com) – ai contract analysis:**
351
+ Revolutionize contract review workflows with advanced AI-driven contract analysis, risk detection, and compliance assessment.
352
+
353
+ Our APIs integrate seamlessly with your product workflows, providing reliable, scalable, and secure endpoints for your business logic.
354
+
355
+ ---
356
+
357
+ ## References & Further Reading
358
+
359
+ For best-in-class taxonomy, AI, and categorization research, explore:
360
+
361
+ * [Stanford AI Lab](https://ai.stanford.edu)
362
+ * [MIT CSAIL](https://www.csail.mit.edu)
363
+ * [Berkeley AI Research](https://bair.berkeley.edu)
364
+ * [Oxford Internet Institute](https://www.oii.ox.ac.uk)
365
+ * [UCL Centre for Artificial Intelligence](https://www.ucl.ac.uk/ai)
366
+ * [Google AI Blog](https://ai.googleblog.com/)
367
+ * [Microsoft Research](https://www.microsoft.com/en-us/research/)
368
+ * [arXiv Machine Learning](https://arxiv.org/list/cs.LG/recent)
369
+
370
+ For taxonomy standards and e-commerce data:
371
+
372
+ * [Google Shopping Taxonomy](https://support.google.com/merchants/answer/6324436)
373
+ * [Shopify Product Taxonomy](https://github.com/Shopify/product-taxonomy)
374
+
375
+ ---
376
+
377
+ ## License
378
+
379
+ This library is distributed under the MIT License.
380
+
381
+ ---
382
+
383
+ ## Disclaimer
384
+
385
+ This project is unaffiliated with Google, Shopify, or Amazon.
386
+ All trademarks are property of their respective owners.
387
+
388
+ ---
389
+
390
+ # `__init__.py` Example
391
+
392
+ ```python
393
+ import requests
394
+
395
+ class ProductCategorizationAPI:
396
+ def __init__(self, api_key):
397
+ self.api_key = api_key
398
+ self.base_url = "https://www.productcategorization.com/api/"
399
+
400
+ def categorize_text(self, text, confidence=0, expand_context=0):
401
+ params = {
402
+ "query": text,
403
+ "api_key": self.api_key,
404
+ "confidence": str(confidence),
405
+ "expand_context": str(expand_context)
406
+ }
407
+ response = requests.get(self.base_url + "ecommerce/ecommerce_category6_get.php", params=params)
408
+ return response.json()
409
+
410
+ def categorize_url(self, url):
411
+ payload = {
412
+ 'query': url,
413
+ 'api_key': self.api_key,
414
+ 'data_type': 'url'
415
+ }
416
+ headers = {
417
+ 'Content-Type': 'application/x-www-form-urlencoded'
418
+ }
419
+ response = requests.post(self.base_url + "iab/iab_web_content_filtering_url.php", data=payload, headers=headers)
420
+ return response.json()
421
+
422
+ def categorize_image(self, image_url, text="", ip="0", login="0"):
423
+ # Download image to memory
424
+ image_response = requests.get(image_url)
425
+ if image_response.status_code != 200:
426
+ return {'error': 'Failed to download image'}
427
+ import io
428
+ image_file = io.BytesIO(image_response.content)
429
+ data = {
430
+ 'ip': ip,
431
+ 'api_key': self.api_key,
432
+ 'login': login,
433
+ 'text': text
434
+ }
435
+ files = {
436
+ 'image': ('image.jpg', image_file, 'image/jpeg')
437
+ }
438
+ response = requests.post(self.base_url + "ecommerce/ecommerce_shopify_image.php", data=data, files=files)
439
+ return response.json()
440
+ ```
441
+
442
+ ---
443
+
444
+
445
+
@@ -0,0 +1,8 @@
1
+ productcategorizationapi/__init__.py,sha256=du2lXFJfeHR5uNtiLfvzCep0xKLUi1LaFKI_v6cuwYc,1734
2
+ test/__init__.py,sha256=wTdBA0QbwpUvhKcdWegp481T0tvA6XcAb9BkdSDkzvE,2869
3
+ websiteclassificationapi/__init__.py,sha256=wTdBA0QbwpUvhKcdWegp481T0tvA6XcAb9BkdSDkzvE,2869
4
+ productcategorizationapi-1.1.dist-info/LICENSE,sha256=7Fog1crdAQKBoXwk300JwPHIMJV-386OAuvTrd-N8hs,1083
5
+ productcategorizationapi-1.1.dist-info/METADATA,sha256=pI0w_SQWKyYwURZngqBbTXrHnkrAWcV0oubQXK4jyv8,15421
6
+ productcategorizationapi-1.1.dist-info/WHEEL,sha256=D1Wh14kWDxPnrM-5t_6UCB-UuQNrEODtRa3vF4OsvQY,97
7
+ productcategorizationapi-1.1.dist-info/top_level.txt,sha256=7rteRm_SKjI0LumUZysikOpG7fGMo63zBnHZ1yQDg30,30
8
+ productcategorizationapi-1.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.35.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ productcategorizationapi
2
+ test
test/__init__.py ADDED
@@ -0,0 +1,62 @@
1
+ """
2
+ Python wrapper for website categorization API (service of www.websitecategorizationapi.com)
3
+
4
+ Explanation of available classifiers:
5
+
6
+ Classifier_type should be set to either iab1 (Tier 1 categorization) or iab2 (Tier 2 categorization) for general websites or ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites.
7
+
8
+ IAB Tier 1 categorization returns probabilities of text being classified as one of 29 possible categories.
9
+
10
+ IAB Tier 2 categorization returns probabilities of text being classified as one of 447 possible categories.
11
+
12
+ Ecommerce Tier 1 categorization returns probabilities of text being classified as one of 21 possible categories.
13
+
14
+ Ecommerce Tier 2 website categorization returns probabilities of text being classified as one of 182 possible categories.
15
+
16
+ Ecommerce Tier 3 website categorization returns probabilities of text being classified as one of 1113 possible categories.
17
+ """
18
+
19
+ import requests
20
+ import json
21
+
22
+ class websiteclassificationapi:
23
+ def __init__(self):
24
+ pass
25
+
26
+ @staticmethod
27
+ def get_categorization(url,api_key,category_type):
28
+ """
29
+ url = URL of website
30
+ api_key = api key
31
+ category_type = iab1 or iab2 for general websites, ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites
32
+ """
33
+ if category_type=='iab1':
34
+ url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category1_url.php"
35
+ elif category_type=='iab2':
36
+ url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category2_url.php"
37
+ elif category_type=='ecommerce1':
38
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category1.php"
39
+ elif category_type=='ecommerce2':
40
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category2.php"
41
+ elif category_type=='ecommerce3':
42
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category3.php"
43
+
44
+ if (('http://' not in url) or ('https://' not in url)):
45
+ url = 'http://'+url
46
+ try:
47
+ url = requests.utils.quote(url)
48
+ print("URL:", url)
49
+ payload='query='+url+'&api_key='+api_key+'&data_type=url'
50
+ headers = {
51
+ 'Content-Type': 'application/x-www-form-urlencoded'
52
+ }
53
+ response = requests.request("POST", url_api, headers=headers, data=payload)
54
+ data = json.loads(response.text)
55
+ try:
56
+ category = data['classification'][0]['category']
57
+ except:
58
+ category = 'url could not be loaded'
59
+ return category
60
+ except Exception as e:
61
+ print(e)
62
+ pass
@@ -0,0 +1,62 @@
1
+ """
2
+ Python wrapper for website categorization API (service of www.websitecategorizationapi.com)
3
+
4
+ Explanation of available classifiers:
5
+
6
+ Classifier_type should be set to either iab1 (Tier 1 categorization) or iab2 (Tier 2 categorization) for general websites or ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites.
7
+
8
+ IAB Tier 1 categorization returns probabilities of text being classified as one of 29 possible categories.
9
+
10
+ IAB Tier 2 categorization returns probabilities of text being classified as one of 447 possible categories.
11
+
12
+ Ecommerce Tier 1 categorization returns probabilities of text being classified as one of 21 possible categories.
13
+
14
+ Ecommerce Tier 2 website categorization returns probabilities of text being classified as one of 182 possible categories.
15
+
16
+ Ecommerce Tier 3 website categorization returns probabilities of text being classified as one of 1113 possible categories.
17
+ """
18
+
19
+ import requests
20
+ import json
21
+
22
+ class websiteclassificationapi:
23
+ def __init__(self):
24
+ pass
25
+
26
+ @staticmethod
27
+ def get_categorization(url,api_key,category_type):
28
+ """
29
+ url = URL of website
30
+ api_key = api key
31
+ category_type = iab1 or iab2 for general websites, ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites
32
+ """
33
+ if category_type=='iab1':
34
+ url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category1_url.php"
35
+ elif category_type=='iab2':
36
+ url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category2_url.php"
37
+ elif category_type=='ecommerce1':
38
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category1.php"
39
+ elif category_type=='ecommerce2':
40
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category2.php"
41
+ elif category_type=='ecommerce3':
42
+ url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category3.php"
43
+
44
+ if (('http://' not in url) or ('https://' not in url)):
45
+ url = 'http://'+url
46
+ try:
47
+ url = requests.utils.quote(url)
48
+ print("URL:", url)
49
+ payload='query='+url+'&api_key='+api_key+'&data_type=url'
50
+ headers = {
51
+ 'Content-Type': 'application/x-www-form-urlencoded'
52
+ }
53
+ response = requests.request("POST", url_api, headers=headers, data=payload)
54
+ data = json.loads(response.text)
55
+ try:
56
+ category = data['classification'][0]['category']
57
+ except:
58
+ category = 'url could not be loaded'
59
+ return category
60
+ except Exception as e:
61
+ print(e)
62
+ pass