productcategorizationapi 1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- productcategorizationapi/__init__.py +47 -0
- productcategorizationapi-1.1.dist-info/LICENSE +21 -0
- productcategorizationapi-1.1.dist-info/METADATA +445 -0
- productcategorizationapi-1.1.dist-info/RECORD +8 -0
- productcategorizationapi-1.1.dist-info/WHEEL +5 -0
- productcategorizationapi-1.1.dist-info/top_level.txt +2 -0
- test/__init__.py +62 -0
- websiteclassificationapi/__init__.py +62 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
|
|
3
|
+
class ProductCategorizationAPI:
|
|
4
|
+
def __init__(self, api_key):
|
|
5
|
+
self.api_key = api_key
|
|
6
|
+
self.base_url = "https://www.productcategorization.com/api/"
|
|
7
|
+
|
|
8
|
+
def categorize_text(self, text, confidence=0, expand_context=0):
|
|
9
|
+
params = {
|
|
10
|
+
"query": text,
|
|
11
|
+
"api_key": self.api_key,
|
|
12
|
+
"confidence": str(confidence),
|
|
13
|
+
"expand_context": str(expand_context)
|
|
14
|
+
}
|
|
15
|
+
response = requests.get(self.base_url + "ecommerce/ecommerce_category6_get.php", params=params)
|
|
16
|
+
return response.json()
|
|
17
|
+
|
|
18
|
+
def categorize_url(self, url):
|
|
19
|
+
payload = {
|
|
20
|
+
'query': url,
|
|
21
|
+
'api_key': self.api_key,
|
|
22
|
+
'data_type': 'url'
|
|
23
|
+
}
|
|
24
|
+
headers = {
|
|
25
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
26
|
+
}
|
|
27
|
+
response = requests.post(self.base_url + "iab/iab_web_content_filtering_url.php", data=payload, headers=headers)
|
|
28
|
+
return response.json()
|
|
29
|
+
|
|
30
|
+
def categorize_image(self, image_url, text="", ip="0", login="0"):
|
|
31
|
+
# Download image to memory
|
|
32
|
+
image_response = requests.get(image_url)
|
|
33
|
+
if image_response.status_code != 200:
|
|
34
|
+
return {'error': 'Failed to download image'}
|
|
35
|
+
import io
|
|
36
|
+
image_file = io.BytesIO(image_response.content)
|
|
37
|
+
data = {
|
|
38
|
+
'ip': ip,
|
|
39
|
+
'api_key': self.api_key,
|
|
40
|
+
'login': login,
|
|
41
|
+
'text': text
|
|
42
|
+
}
|
|
43
|
+
files = {
|
|
44
|
+
'image': ('image.jpg', image_file, 'image/jpeg')
|
|
45
|
+
}
|
|
46
|
+
response = requests.post(self.base_url + "ecommerce/ecommerce_shopify_image.php", data=data, files=files)
|
|
47
|
+
return response.json()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 arnu515
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: productcategorizationapi
|
|
3
|
+
Version: 1.1
|
|
4
|
+
Summary: product categorization API
|
|
5
|
+
Home-page: https://www.productcategorization.com
|
|
6
|
+
Author-email: info@productcategorizationapi.com
|
|
7
|
+
License: UNKNOWN
|
|
8
|
+
Keywords: product categorization,classification,categorization
|
|
9
|
+
Platform: UNKNOWN
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.3
|
|
12
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.0
|
|
13
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
14
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA :: 11.2
|
|
15
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA :: 10.1
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
Certainly! Below is an extensively detailed `README.md` for your PyPI package, paraphrased and expanded from your API documentation. It is structured for maximum clarity, includes code examples, best practices, and a thorough section on your additional services (with anchor texts and external research/edu links).
|
|
20
|
+
Below the README, you'll find a sample `__init__.py` for your PyPI package with a `ProductCategorizationAPI` class.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
# ProductCategorization.com Python Client
|
|
25
|
+
|
|
26
|
+
[](https://pypi.org/project/productcategorization/)
|
|
27
|
+
[](https://www.productcategorization.com/api.php)
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Overview
|
|
32
|
+
|
|
33
|
+
The `productcategorization` Python package provides seamless access to one of the world's most advanced product categorization APIs, powering e-commerce classification for unicorn startups, multinational enterprises, retail analytics platforms, adTech innovators, and online merchants. Whether you operate an e-commerce storefront, marketplace, or SaaS platform, this package allows you to integrate AI-powered categorization directly into your Python applications, unlocking world-class product, URL, and image classification using industry-standard taxonomies.
|
|
34
|
+
|
|
35
|
+
## Key Features
|
|
36
|
+
|
|
37
|
+
* **Ultra-Accurate Product Categorization**
|
|
38
|
+
Classify product titles, descriptions, and URLs using:
|
|
39
|
+
|
|
40
|
+
* **Google Shopping Taxonomy:** Over 5,500 hierarchical categories for granular and up-to-date mapping.
|
|
41
|
+
* **Shopify Taxonomy:** Leverage the latest Shopify category structure with \~11,000 fine-grained categories.
|
|
42
|
+
* **Amazon and Other Standard Taxonomies:** Flexibility for diverse retail needs.
|
|
43
|
+
* **Custom Taxonomies:** Tailor classifiers to your unique vertical or proprietary taxonomy.
|
|
44
|
+
|
|
45
|
+
* **Multi-Modal Classification**
|
|
46
|
+
|
|
47
|
+
* **Text**: Classify any product-related string.
|
|
48
|
+
* **URL**: Categorize products directly from their web pages.
|
|
49
|
+
* **Image**: Obtain Shopify categories and attribute extraction directly from images (using AI vision).
|
|
50
|
+
|
|
51
|
+
* **Buyer Persona Enrichment**
|
|
52
|
+
Every classification returns relevant buyer personas—select from a proprietary library of over 1,800 personas to enrich your analytics, personalization, or marketing automations.
|
|
53
|
+
Confidence scores and expanded context available.
|
|
54
|
+
|
|
55
|
+
* **High Scalability and Reliability**
|
|
56
|
+
Robust API supporting high throughput (rate limits adjustable upon request), with credit-based billing for predictable scaling.
|
|
57
|
+
|
|
58
|
+
* **Plug-and-Play Python Integration**
|
|
59
|
+
Simple, modern, and extensible Python API client.
|
|
60
|
+
See [Quickstart](#quickstart) for usage examples.
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Table of Contents
|
|
65
|
+
|
|
66
|
+
* [Getting Started](#getting-started)
|
|
67
|
+
* [Authentication](#authentication)
|
|
68
|
+
* [API Usage](#api-usage)
|
|
69
|
+
|
|
70
|
+
* [Text Categorization](#text-categorization)
|
|
71
|
+
* [URL Categorization](#url-categorization)
|
|
72
|
+
* [Image Categorization](#image-categorization)
|
|
73
|
+
* [Advanced Options](#advanced-options)
|
|
74
|
+
|
|
75
|
+
* [Buyer Personas and Confidence Scores](#buyer-personas-and-confidence-scores)
|
|
76
|
+
* [Context Expansion](#context-expansion)
|
|
77
|
+
* [Error Handling](#error-handling)
|
|
78
|
+
* [Best Practices](#best-practices)
|
|
79
|
+
* [Integration Examples](#integration-examples)
|
|
80
|
+
* [Contact & Support](#contact--support)
|
|
81
|
+
* [Related Services](#related-services)
|
|
82
|
+
* [References](#references)
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Getting Started
|
|
87
|
+
|
|
88
|
+
Install the package via PyPI:
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install productcategorization
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Or add it to your `requirements.txt` for automatic deployment.
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Authentication
|
|
99
|
+
|
|
100
|
+
All API access is secured by a personal API key.
|
|
101
|
+
To obtain your API key:
|
|
102
|
+
|
|
103
|
+
1. Sign up and purchase a subscription at [www.productcategorization.com](https://www.productcategorization.com/pricing.php
|
|
104
|
+
3. Provide the API key in every request (see examples).
|
|
105
|
+
|
|
106
|
+
> **Note:** Never share your API key publicly. Store it securely as an environment variable or in your configuration files.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## API Usage
|
|
111
|
+
|
|
112
|
+
### Text Categorization
|
|
113
|
+
|
|
114
|
+
Classify any product text (title, description, or keyword) in a single line:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from productcategorization import ProductCategorizationAPI
|
|
118
|
+
|
|
119
|
+
api = ProductCategorizationAPI(api_key="your_api_key")
|
|
120
|
+
result = api.categorize_text("Fluorescent Highlighters 3pc Yellow")
|
|
121
|
+
print(result)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Sample Response:**
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"total_credits": 100044,
|
|
129
|
+
"remaining_credits": 33075,
|
|
130
|
+
"language": "en",
|
|
131
|
+
"classification": "Office Supplies > Office Instruments > Writing & Drawing Instruments",
|
|
132
|
+
"buyer_personas": [
|
|
133
|
+
"Business Professional", "Office Professional", "Administrative Coordinator", ...
|
|
134
|
+
],
|
|
135
|
+
"buyer_personas_confidence_selection": {
|
|
136
|
+
"Office Professional": 0.9,
|
|
137
|
+
"Business Professional": 0.8,
|
|
138
|
+
...
|
|
139
|
+
},
|
|
140
|
+
"ID": "977",
|
|
141
|
+
"status": 200
|
|
142
|
+
}
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
**Parameters:**
|
|
146
|
+
|
|
147
|
+
* `query` (str): Product text for categorization.
|
|
148
|
+
* `confidence` (optional, int): Set to `1` to include confidence scores for each persona.
|
|
149
|
+
* `expand_context` (optional, int): Set to `1` to auto-generate expanded context for short/ambiguous texts.
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
### URL Categorization
|
|
154
|
+
|
|
155
|
+
You can also classify products by URL, leveraging our AI’s ability to extract relevant text and metadata:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
result = api.categorize_url("https://www.apple.com")
|
|
159
|
+
print(result)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
**Sample Python (requests):**
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
import requests
|
|
166
|
+
|
|
167
|
+
payload = {'query': 'www.apple.com', 'api_key': 'your_api_key', 'data_type': 'url'}
|
|
168
|
+
response = requests.post("https://www.productcategorization.com/api/iab/iab_web_content_filtering_url.php", data=payload)
|
|
169
|
+
print(response.json())
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
### Image Categorization
|
|
175
|
+
|
|
176
|
+
Classify products using image URLs or local image files (Shopify Taxonomy + attribute extraction):
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
result = api.categorize_image(image_url="https://images.com/product.jpg", text="Product title")
|
|
180
|
+
print(result)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Example Function:**
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
import requests
|
|
187
|
+
import io
|
|
188
|
+
|
|
189
|
+
def call_api(image_url, text, api_key):
|
|
190
|
+
api_endpoint = 'https://www.productcategorization.com/api/ecommerce/ecommerce_shopify_image.php'
|
|
191
|
+
response = requests.get(image_url)
|
|
192
|
+
if response.status_code != 200:
|
|
193
|
+
return {'error': 'Failed to download image'}
|
|
194
|
+
image_file = io.BytesIO(response.content)
|
|
195
|
+
data = {'ip': '0', 'api_key': api_key, 'login': '0', 'text': text}
|
|
196
|
+
files = {'image': ('image.jpg', image_file, 'image/jpeg')}
|
|
197
|
+
response = requests.post(api_endpoint, data=data, files=files)
|
|
198
|
+
return response.json()
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Advanced Options
|
|
204
|
+
|
|
205
|
+
### Buyer Personas and Confidence Scores
|
|
206
|
+
|
|
207
|
+
Our AI delivers a unique set of buyer personas for every product—ideal for market analysis, targeted marketing, or persona-based analytics.
|
|
208
|
+
Enable confidence scoring to obtain relevance weights for each persona:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
result = api.categorize_text("Eco-Friendly Notebook", confidence=1)
|
|
212
|
+
print(result["buyer_personas_confidence_selection"])
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Context Expansion
|
|
216
|
+
|
|
217
|
+
For short or ambiguous inputs, enable `expand_context=1` to let our AI generate an enhanced description for improved classification accuracy:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
result = api.categorize_text("3pc Yellow Highlighters", expand_context=1)
|
|
221
|
+
print(result["expanded_context"])
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Error Handling
|
|
227
|
+
|
|
228
|
+
All API responses include a `status` code for programmatic error handling:
|
|
229
|
+
|
|
230
|
+
| Status | Meaning |
|
|
231
|
+
| ------ | ---------------------------------------- |
|
|
232
|
+
| 200 | Request was successful |
|
|
233
|
+
| 400 | Request malformed (check parameters) |
|
|
234
|
+
| 401 | Invalid API key (check or purchase key) |
|
|
235
|
+
| 403 | Quota exhausted (upgrade or add credits) |
|
|
236
|
+
|
|
237
|
+
Example error handling in Python:
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
if result["status"] != 200:
|
|
241
|
+
print(f"API Error: {result.get('message', 'Unknown error')}")
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## Best Practices
|
|
247
|
+
|
|
248
|
+
* **Monitor Remaining Credits:** Every response includes `total_credits` and `remaining_credits`. Plan your usage to avoid interruptions.
|
|
249
|
+
* **Respect Rate Limits:** Default is 60 requests per minute. Contact support for higher needs.
|
|
250
|
+
* **Secure Your API Key:** Do not embed directly in code if publishing open-source.
|
|
251
|
+
* **Use Context Expansion When Needed:** For short/ambiguous product titles, enable `expand_context`.
|
|
252
|
+
* **Batch Requests:** For large datasets, consider batching requests and handling quota gracefully.
|
|
253
|
+
|
|
254
|
+
---
|
|
255
|
+
|
|
256
|
+
## Integration Examples
|
|
257
|
+
|
|
258
|
+
### Python Example
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
from productcategorization import ProductCategorizationAPI
|
|
262
|
+
|
|
263
|
+
api = ProductCategorizationAPI(api_key="your_api_key")
|
|
264
|
+
result = api.categorize_text("Fluorescent Highlighters 3pc Yellow")
|
|
265
|
+
print(result["classification"])
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### JavaScript Example
|
|
269
|
+
|
|
270
|
+
```javascript
|
|
271
|
+
const apiBaseUrl = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php?";
|
|
272
|
+
const apiKey = "your_api_key";
|
|
273
|
+
const queryText = "Fluorescent Highlighters 3pc Yellow";
|
|
274
|
+
const encodedQueryText = encodeURIComponent(queryText);
|
|
275
|
+
const finalUrl = `${apiBaseUrl}query=${encodedQueryText}&api_key=${apiKey}`;
|
|
276
|
+
|
|
277
|
+
fetch(finalUrl)
|
|
278
|
+
.then(response => response.json())
|
|
279
|
+
.then(data => console.log(data));
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Ruby Example
|
|
283
|
+
|
|
284
|
+
```ruby
|
|
285
|
+
require 'uri'
|
|
286
|
+
require 'net/http'
|
|
287
|
+
|
|
288
|
+
api_base_url = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php"
|
|
289
|
+
api_key = "your_api_key"
|
|
290
|
+
query_text = "Fluorescent Highlighters 3pc Yellow"
|
|
291
|
+
|
|
292
|
+
encoded_query = URI.encode_www_form_component(query_text)
|
|
293
|
+
url = URI("#{api_base_url}?query=#{encoded_query}&api_key=#{api_key}")
|
|
294
|
+
|
|
295
|
+
response = Net::HTTP.get(url)
|
|
296
|
+
puts response
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### C# Example
|
|
300
|
+
|
|
301
|
+
```csharp
|
|
302
|
+
using System;
|
|
303
|
+
using System.Net.Http;
|
|
304
|
+
using System.Threading.Tasks;
|
|
305
|
+
|
|
306
|
+
class Program {
|
|
307
|
+
static async Task Main(string[] args) {
|
|
308
|
+
var apiBaseUrl = "https://www.productcategorization.com/api/ecommerce/ecommerce_category6_get.php?";
|
|
309
|
+
var apiKey = "your_api_key";
|
|
310
|
+
var queryText = "Fluorescent Highlighters 3pc Yellow";
|
|
311
|
+
var encodedQueryText = Uri.EscapeDataString(queryText);
|
|
312
|
+
var finalUrl = $"{apiBaseUrl}query={encodedQueryText}&api_key={apiKey}";
|
|
313
|
+
|
|
314
|
+
using (HttpClient client = new HttpClient()) {
|
|
315
|
+
var response = await client.GetStringAsync(finalUrl);
|
|
316
|
+
Console.WriteLine(response);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Contact & Support
|
|
325
|
+
|
|
326
|
+
Need a higher rate limit, a custom classifier, or additional support?
|
|
327
|
+
Visit [Contact](https://www.productcategorization.com/contact), or email support via your account dashboard.
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## Related Services
|
|
332
|
+
|
|
333
|
+
Leverage our broader suite of AI-powered APIs to cover every aspect of your business’s data intelligence and privacy needs:
|
|
334
|
+
|
|
335
|
+
* **[Comment Moderation API](https://www.contentmoderationapi.net) – comment moderation api:**
|
|
336
|
+
Safeguard your community, app, or platform with industry-leading AI moderation for comments and user-generated content. Detect profanity, hate speech, spam, and toxicity in real time.
|
|
337
|
+
|
|
338
|
+
* **[Live Video Anonymization](https://www.anomyizationapi.com) – live video anonymization:**
|
|
339
|
+
Protect privacy with automatic anonymization of faces and sensitive objects in live video streams, supporting GDPR compliance and safeguarding user identities.
|
|
340
|
+
|
|
341
|
+
* **[Text Redaction API](https://www.redactionapi.net) – text redaction api:**
|
|
342
|
+
Redact personal data, financial information, or any sensitive fields from documents at scale using our high-precision redaction API.
|
|
343
|
+
|
|
344
|
+
* **[Company Enrichment Data](https://www.companydataapi.com) – company enrichment data:**
|
|
345
|
+
Instantly enhance your CRM, sales, or analytics platform with up-to-date company profiles, firmographics, and contact data.
|
|
346
|
+
|
|
347
|
+
* **[Domain Categorization Data](https://www.urlcategorizationdatabase.com) – domain categorization data:**
|
|
348
|
+
Access the world’s largest database of categorized domains for cybersecurity, web filtering, and content safety.
|
|
349
|
+
|
|
350
|
+
* **[AI Contract Analysis](https://www.aicontractreviewtool.com) – ai contract analysis:**
|
|
351
|
+
Revolutionize contract review workflows with advanced AI-driven contract analysis, risk detection, and compliance assessment.
|
|
352
|
+
|
|
353
|
+
Our APIs integrate seamlessly with your product workflows, providing reliable, scalable, and secure endpoints for your business logic.
|
|
354
|
+
|
|
355
|
+
---
|
|
356
|
+
|
|
357
|
+
## References & Further Reading
|
|
358
|
+
|
|
359
|
+
For best-in-class taxonomy, AI, and categorization research, explore:
|
|
360
|
+
|
|
361
|
+
* [Stanford AI Lab](https://ai.stanford.edu)
|
|
362
|
+
* [MIT CSAIL](https://www.csail.mit.edu)
|
|
363
|
+
* [Berkeley AI Research](https://bair.berkeley.edu)
|
|
364
|
+
* [Oxford Internet Institute](https://www.oii.ox.ac.uk)
|
|
365
|
+
* [UCL Centre for Artificial Intelligence](https://www.ucl.ac.uk/ai)
|
|
366
|
+
* [Google AI Blog](https://ai.googleblog.com/)
|
|
367
|
+
* [Microsoft Research](https://www.microsoft.com/en-us/research/)
|
|
368
|
+
* [arXiv Machine Learning](https://arxiv.org/list/cs.LG/recent)
|
|
369
|
+
|
|
370
|
+
For taxonomy standards and e-commerce data:
|
|
371
|
+
|
|
372
|
+
* [Google Shopping Taxonomy](https://support.google.com/merchants/answer/6324436)
|
|
373
|
+
* [Shopify Product Taxonomy](https://github.com/Shopify/product-taxonomy)
|
|
374
|
+
|
|
375
|
+
---
|
|
376
|
+
|
|
377
|
+
## License
|
|
378
|
+
|
|
379
|
+
This library is distributed under the MIT License.
|
|
380
|
+
|
|
381
|
+
---
|
|
382
|
+
|
|
383
|
+
## Disclaimer
|
|
384
|
+
|
|
385
|
+
This project is unaffiliated with Google, Shopify, or Amazon.
|
|
386
|
+
All trademarks are property of their respective owners.
|
|
387
|
+
|
|
388
|
+
---
|
|
389
|
+
|
|
390
|
+
# `__init__.py` Example
|
|
391
|
+
|
|
392
|
+
```python
|
|
393
|
+
import requests
|
|
394
|
+
|
|
395
|
+
class ProductCategorizationAPI:
|
|
396
|
+
def __init__(self, api_key):
|
|
397
|
+
self.api_key = api_key
|
|
398
|
+
self.base_url = "https://www.productcategorization.com/api/"
|
|
399
|
+
|
|
400
|
+
def categorize_text(self, text, confidence=0, expand_context=0):
|
|
401
|
+
params = {
|
|
402
|
+
"query": text,
|
|
403
|
+
"api_key": self.api_key,
|
|
404
|
+
"confidence": str(confidence),
|
|
405
|
+
"expand_context": str(expand_context)
|
|
406
|
+
}
|
|
407
|
+
response = requests.get(self.base_url + "ecommerce/ecommerce_category6_get.php", params=params)
|
|
408
|
+
return response.json()
|
|
409
|
+
|
|
410
|
+
def categorize_url(self, url):
|
|
411
|
+
payload = {
|
|
412
|
+
'query': url,
|
|
413
|
+
'api_key': self.api_key,
|
|
414
|
+
'data_type': 'url'
|
|
415
|
+
}
|
|
416
|
+
headers = {
|
|
417
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
418
|
+
}
|
|
419
|
+
response = requests.post(self.base_url + "iab/iab_web_content_filtering_url.php", data=payload, headers=headers)
|
|
420
|
+
return response.json()
|
|
421
|
+
|
|
422
|
+
def categorize_image(self, image_url, text="", ip="0", login="0"):
|
|
423
|
+
# Download image to memory
|
|
424
|
+
image_response = requests.get(image_url)
|
|
425
|
+
if image_response.status_code != 200:
|
|
426
|
+
return {'error': 'Failed to download image'}
|
|
427
|
+
import io
|
|
428
|
+
image_file = io.BytesIO(image_response.content)
|
|
429
|
+
data = {
|
|
430
|
+
'ip': ip,
|
|
431
|
+
'api_key': self.api_key,
|
|
432
|
+
'login': login,
|
|
433
|
+
'text': text
|
|
434
|
+
}
|
|
435
|
+
files = {
|
|
436
|
+
'image': ('image.jpg', image_file, 'image/jpeg')
|
|
437
|
+
}
|
|
438
|
+
response = requests.post(self.base_url + "ecommerce/ecommerce_shopify_image.php", data=data, files=files)
|
|
439
|
+
return response.json()
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
---
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
productcategorizationapi/__init__.py,sha256=du2lXFJfeHR5uNtiLfvzCep0xKLUi1LaFKI_v6cuwYc,1734
|
|
2
|
+
test/__init__.py,sha256=wTdBA0QbwpUvhKcdWegp481T0tvA6XcAb9BkdSDkzvE,2869
|
|
3
|
+
websiteclassificationapi/__init__.py,sha256=wTdBA0QbwpUvhKcdWegp481T0tvA6XcAb9BkdSDkzvE,2869
|
|
4
|
+
productcategorizationapi-1.1.dist-info/LICENSE,sha256=7Fog1crdAQKBoXwk300JwPHIMJV-386OAuvTrd-N8hs,1083
|
|
5
|
+
productcategorizationapi-1.1.dist-info/METADATA,sha256=pI0w_SQWKyYwURZngqBbTXrHnkrAWcV0oubQXK4jyv8,15421
|
|
6
|
+
productcategorizationapi-1.1.dist-info/WHEEL,sha256=D1Wh14kWDxPnrM-5t_6UCB-UuQNrEODtRa3vF4OsvQY,97
|
|
7
|
+
productcategorizationapi-1.1.dist-info/top_level.txt,sha256=7rteRm_SKjI0LumUZysikOpG7fGMo63zBnHZ1yQDg30,30
|
|
8
|
+
productcategorizationapi-1.1.dist-info/RECORD,,
|
test/__init__.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python wrapper for website categorization API (service of www.websitecategorizationapi.com)
|
|
3
|
+
|
|
4
|
+
Explanation of available classifiers:
|
|
5
|
+
|
|
6
|
+
Classifier_type should be set to either iab1 (Tier 1 categorization) or iab2 (Tier 2 categorization) for general websites or ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites.
|
|
7
|
+
|
|
8
|
+
IAB Tier 1 categorization returns probabilities of text being classified as one of 29 possible categories.
|
|
9
|
+
|
|
10
|
+
IAB Tier 2 categorization returns probabilities of text being classified as one of 447 possible categories.
|
|
11
|
+
|
|
12
|
+
Ecommerce Tier 1 categorization returns probabilities of text being classified as one of 21 possible categories.
|
|
13
|
+
|
|
14
|
+
Ecommerce Tier 2 website categorization returns probabilities of text being classified as one of 182 possible categories.
|
|
15
|
+
|
|
16
|
+
Ecommerce Tier 3 website categorization returns probabilities of text being classified as one of 1113 possible categories.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
import json
|
|
21
|
+
|
|
22
|
+
class websiteclassificationapi:
|
|
23
|
+
def __init__(self):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def get_categorization(url,api_key,category_type):
|
|
28
|
+
"""
|
|
29
|
+
url = URL of website
|
|
30
|
+
api_key = api key
|
|
31
|
+
category_type = iab1 or iab2 for general websites, ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites
|
|
32
|
+
"""
|
|
33
|
+
if category_type=='iab1':
|
|
34
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category1_url.php"
|
|
35
|
+
elif category_type=='iab2':
|
|
36
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category2_url.php"
|
|
37
|
+
elif category_type=='ecommerce1':
|
|
38
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category1.php"
|
|
39
|
+
elif category_type=='ecommerce2':
|
|
40
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category2.php"
|
|
41
|
+
elif category_type=='ecommerce3':
|
|
42
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category3.php"
|
|
43
|
+
|
|
44
|
+
if (('http://' not in url) or ('https://' not in url)):
|
|
45
|
+
url = 'http://'+url
|
|
46
|
+
try:
|
|
47
|
+
url = requests.utils.quote(url)
|
|
48
|
+
print("URL:", url)
|
|
49
|
+
payload='query='+url+'&api_key='+api_key+'&data_type=url'
|
|
50
|
+
headers = {
|
|
51
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
52
|
+
}
|
|
53
|
+
response = requests.request("POST", url_api, headers=headers, data=payload)
|
|
54
|
+
data = json.loads(response.text)
|
|
55
|
+
try:
|
|
56
|
+
category = data['classification'][0]['category']
|
|
57
|
+
except:
|
|
58
|
+
category = 'url could not be loaded'
|
|
59
|
+
return category
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(e)
|
|
62
|
+
pass
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Python wrapper for website categorization API (service of www.websitecategorizationapi.com)
|
|
3
|
+
|
|
4
|
+
Explanation of available classifiers:
|
|
5
|
+
|
|
6
|
+
Classifier_type should be set to either iab1 (Tier 1 categorization) or iab2 (Tier 2 categorization) for general websites or ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites.
|
|
7
|
+
|
|
8
|
+
IAB Tier 1 categorization returns probabilities of text being classified as one of 29 possible categories.
|
|
9
|
+
|
|
10
|
+
IAB Tier 2 categorization returns probabilities of text being classified as one of 447 possible categories.
|
|
11
|
+
|
|
12
|
+
Ecommerce Tier 1 categorization returns probabilities of text being classified as one of 21 possible categories.
|
|
13
|
+
|
|
14
|
+
Ecommerce Tier 2 website categorization returns probabilities of text being classified as one of 182 possible categories.
|
|
15
|
+
|
|
16
|
+
Ecommerce Tier 3 website categorization returns probabilities of text being classified as one of 1113 possible categories.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
import json
|
|
21
|
+
|
|
22
|
+
class websiteclassificationapi:
|
|
23
|
+
def __init__(self):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def get_categorization(url,api_key,category_type):
|
|
28
|
+
"""
|
|
29
|
+
url = URL of website
|
|
30
|
+
api_key = api key
|
|
31
|
+
category_type = iab1 or iab2 for general websites, ecommerce1, ecommerce2 and ecommerce3 for E-commerce or product websites
|
|
32
|
+
"""
|
|
33
|
+
if category_type=='iab1':
|
|
34
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category1_url.php"
|
|
35
|
+
elif category_type=='iab2':
|
|
36
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/iab_category2_url.php"
|
|
37
|
+
elif category_type=='ecommerce1':
|
|
38
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category1.php"
|
|
39
|
+
elif category_type=='ecommerce2':
|
|
40
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category2.php"
|
|
41
|
+
elif category_type=='ecommerce3':
|
|
42
|
+
url_api = "https://www.websitecategorizationapi.com/api/iab/gpt/gpt_category3.php"
|
|
43
|
+
|
|
44
|
+
if (('http://' not in url) or ('https://' not in url)):
|
|
45
|
+
url = 'http://'+url
|
|
46
|
+
try:
|
|
47
|
+
url = requests.utils.quote(url)
|
|
48
|
+
print("URL:", url)
|
|
49
|
+
payload='query='+url+'&api_key='+api_key+'&data_type=url'
|
|
50
|
+
headers = {
|
|
51
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
52
|
+
}
|
|
53
|
+
response = requests.request("POST", url_api, headers=headers, data=payload)
|
|
54
|
+
data = json.loads(response.text)
|
|
55
|
+
try:
|
|
56
|
+
category = data['classification'][0]['category']
|
|
57
|
+
except:
|
|
58
|
+
category = 'url could not be loaded'
|
|
59
|
+
return category
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(e)
|
|
62
|
+
pass
|