nosible 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nosible-0.2.4/src/nosible.egg-info → nosible-0.2.6}/PKG-INFO +40 -41
- {nosible-0.2.4 → nosible-0.2.6}/README.md +39 -40
- {nosible-0.2.4 → nosible-0.2.6}/pyproject.toml +1 -1
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/result.py +122 -11
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/result_set.py +42 -29
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/search.py +82 -22
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/search_set.py +26 -26
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/snippet.py +2 -2
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/snippet_set.py +2 -2
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/classes/web_page.py +11 -56
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/nosible_client.py +360 -84
- {nosible-0.2.4 → nosible-0.2.6/src/nosible.egg-info}/PKG-INFO +40 -41
- {nosible-0.2.4 → nosible-0.2.6}/tests/test_01_nosible.py +14 -14
- {nosible-0.2.4 → nosible-0.2.6}/tests/test_02_results.py +11 -11
- {nosible-0.2.4 → nosible-0.2.6}/tests/test_03_search_searchset.py +6 -6
- {nosible-0.2.4 → nosible-0.2.6}/LICENSE +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/setup.cfg +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/setup.py +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/__init__.py +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/utils/json_tools.py +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible/utils/rate_limiter.py +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible.egg-info/SOURCES.txt +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible.egg-info/dependency_links.txt +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible.egg-info/requires.txt +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/src/nosible.egg-info/top_level.txt +0 -0
- {nosible-0.2.4 → nosible-0.2.6}/tests/test_04_snippets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nosible
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Python client for the NOSIBLE Search API
|
|
5
5
|
Home-page: https://github.com/NosibleAI/nosible
|
|
6
6
|
Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
|
|
@@ -132,11 +132,11 @@ os.environ["LLM_API_KEY"] = "sk-..."
|
|
|
132
132
|
|
|
133
133
|
### 🎯 Core Workflows
|
|
134
134
|
|
|
135
|
-
| I need | Method
|
|
136
|
-
|
|
137
|
-
| Single query, up to 100 results | `search`
|
|
138
|
-
| Multiple queries in parallel | `searches`
|
|
139
|
-
| Thousands of results (100–10k) | `bulk_search`
|
|
135
|
+
| I need | Method | Use case |
|
|
136
|
+
|---------------------------------|-----------------|-------------------------|
|
|
137
|
+
| Single query, up to 100 results | `fast-search` | Interactive lookups |
|
|
138
|
+
| Multiple queries in parallel | `fast-searches` | Dashboards, comparisons |
|
|
139
|
+
| Thousands of results (100–10k) | `bulk_search` | Analytics, offline jobs |
|
|
140
140
|
|
|
141
141
|
|
|
142
142
|
### 🚀 Examples
|
|
@@ -172,17 +172,16 @@ with Nosible(
|
|
|
172
172
|
llm_api_key="sk-...",
|
|
173
173
|
openai_base_url="https://api.openrouter.ai/v1"
|
|
174
174
|
) as client:
|
|
175
|
-
results = client.
|
|
175
|
+
results = client.fast_search(
|
|
176
176
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
177
177
|
n_results=20,
|
|
178
|
+
language="en",
|
|
178
179
|
publish_start="2020-06-01",
|
|
179
180
|
publish_end="2025-06-30",
|
|
180
|
-
include_netlocs=["nytimes.com", "techcrunch.com"],
|
|
181
|
-
exclude_netlocs=["example.com"],
|
|
182
181
|
visited_start="2023-06-01",
|
|
183
182
|
visited_end="2025-06-29",
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
include_netlocs=["nytimes.com", "techcrunch.com"],
|
|
184
|
+
exclude_netlocs=["example.com"],
|
|
186
185
|
include_companies=["/m/04sv4"], # Microsoft's GKID
|
|
187
186
|
exclude_companies=["/m/045c7b"] # Google GKID
|
|
188
187
|
)
|
|
@@ -203,7 +202,7 @@ with Nosible(
|
|
|
203
202
|
```python
|
|
204
203
|
# Example of using your own expansions
|
|
205
204
|
with Nosible() as nos:
|
|
206
|
-
results = nos.
|
|
205
|
+
results = nos.fast_search(
|
|
207
206
|
question="How have the Trump tariffs impacted the US economy?",
|
|
208
207
|
expansions=[
|
|
209
208
|
"What are the consequences of Trump's 2018 steel and aluminum tariffs on American manufacturers?",
|
|
@@ -232,7 +231,7 @@ Allows you to run multiple searches concurrently and `yields` the results as the
|
|
|
232
231
|
from nosible import Nosible
|
|
233
232
|
|
|
234
233
|
with Nosible(nosible_api_key="basic|abcd1234...", llm_api_key="sk-...") as client:
|
|
235
|
-
for batch in client.
|
|
234
|
+
for batch in client.fast_searches(
|
|
236
235
|
questions=[
|
|
237
236
|
"What are the terms of the partnership between Microsoft and OpenAI?",
|
|
238
237
|
"What exclusivity or non-compete clauses are included in their partnership?"
|
|
@@ -249,7 +248,7 @@ Bulk search enables you to retrieve a large number of results in a single reques
|
|
|
249
248
|
|
|
250
249
|
- Use the `bulk_search` method when you need more than 1,000 results for a single query.
|
|
251
250
|
- You can request between **1,000 and 10,000** results per query.
|
|
252
|
-
- All parameters available in the standard `search` method—such as `expansions`, `include_companies`,
|
|
251
|
+
- All parameters available in the standard `search` method—such as `expansions`, `include_companies`, and more—are also supported in `bulk_search`.
|
|
253
252
|
- A bulk search for 10,000 results typically completes in about 30 seconds or less.
|
|
254
253
|
|
|
255
254
|
```python
|
|
@@ -272,11 +271,11 @@ Add two ResultSets together:
|
|
|
272
271
|
from nosible import Nosible
|
|
273
272
|
|
|
274
273
|
with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
275
|
-
r1 = client.
|
|
274
|
+
r1 = client.fast_search(
|
|
276
275
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
277
276
|
n_results=5
|
|
278
277
|
)
|
|
279
|
-
r2 = client.
|
|
278
|
+
r2 = client.fast_search(
|
|
280
279
|
question="How is research governance and decision-making structured between Google and DeepMind?",
|
|
281
280
|
n_results=5
|
|
282
281
|
)
|
|
@@ -300,7 +299,7 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
|
300
299
|
include_netlocs=["arxiv.org", "bbc.com"],
|
|
301
300
|
certain=True
|
|
302
301
|
)
|
|
303
|
-
results = client.
|
|
302
|
+
results = client.fast_search(search=search)
|
|
304
303
|
print([r for r in results])
|
|
305
304
|
```
|
|
306
305
|
|
|
@@ -317,7 +316,7 @@ This fetches a sentiment score for each search result.
|
|
|
317
316
|
from nosible import Nosible
|
|
318
317
|
|
|
319
318
|
with Nosible(nosible_api_key="basic|abcd1234...", llm_api_key="sk-...") as client:
|
|
320
|
-
results = client.
|
|
319
|
+
results = client.fast_search(
|
|
321
320
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
322
321
|
n_results=1
|
|
323
322
|
)
|
|
@@ -333,29 +332,29 @@ Supported formats for saving and loading:
|
|
|
333
332
|
from nosible import Nosible, ResultSet
|
|
334
333
|
|
|
335
334
|
with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
335
|
+
combined = client.fast_search(
|
|
336
|
+
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
337
|
+
n_results=5
|
|
338
|
+
) + client.fast_search(
|
|
339
|
+
question="How is research governance and decision-making structured between Google and DeepMind?",
|
|
340
|
+
n_results=5
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Save
|
|
344
|
+
combined.write_csv("all_news.csv")
|
|
345
|
+
combined.write_json("all_news.json")
|
|
346
|
+
combined.write_parquet("all_news.parquet")
|
|
347
|
+
combined.write_ipc("all_news.arrow")
|
|
348
|
+
combined.write_duckdb("all_news.duckdb", table_name="news")
|
|
349
|
+
combined.write_ndjson("all_news.ndjson")
|
|
350
|
+
|
|
351
|
+
# Load
|
|
352
|
+
rs_csv = ResultSet.read_csv("all_news.csv")
|
|
353
|
+
rs_json = ResultSet.read_json("all_news.json")
|
|
354
|
+
rs_parq = ResultSet.read_parquet("all_news.parquet")
|
|
355
|
+
rs_arrow = ResultSet.read_ipc("all_news.arrow")
|
|
356
|
+
rs_duckdb = ResultSet.read_duckdb("all_news.duckdb")
|
|
357
|
+
rs_ndjson = ResultSet.read_ndjson("all_news.ndjson")
|
|
359
358
|
```
|
|
360
359
|
|
|
361
360
|
#### More Examples
|
|
@@ -89,11 +89,11 @@ os.environ["LLM_API_KEY"] = "sk-..."
|
|
|
89
89
|
|
|
90
90
|
### 🎯 Core Workflows
|
|
91
91
|
|
|
92
|
-
| I need | Method
|
|
93
|
-
|
|
94
|
-
| Single query, up to 100 results | `search`
|
|
95
|
-
| Multiple queries in parallel | `searches`
|
|
96
|
-
| Thousands of results (100–10k) | `bulk_search`
|
|
92
|
+
| I need | Method | Use case |
|
|
93
|
+
|---------------------------------|-----------------|-------------------------|
|
|
94
|
+
| Single query, up to 100 results | `fast-search` | Interactive lookups |
|
|
95
|
+
| Multiple queries in parallel | `fast-searches` | Dashboards, comparisons |
|
|
96
|
+
| Thousands of results (100–10k) | `bulk_search` | Analytics, offline jobs |
|
|
97
97
|
|
|
98
98
|
|
|
99
99
|
### 🚀 Examples
|
|
@@ -129,17 +129,16 @@ with Nosible(
|
|
|
129
129
|
llm_api_key="sk-...",
|
|
130
130
|
openai_base_url="https://api.openrouter.ai/v1"
|
|
131
131
|
) as client:
|
|
132
|
-
results = client.
|
|
132
|
+
results = client.fast_search(
|
|
133
133
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
134
134
|
n_results=20,
|
|
135
|
+
language="en",
|
|
135
136
|
publish_start="2020-06-01",
|
|
136
137
|
publish_end="2025-06-30",
|
|
137
|
-
include_netlocs=["nytimes.com", "techcrunch.com"],
|
|
138
|
-
exclude_netlocs=["example.com"],
|
|
139
138
|
visited_start="2023-06-01",
|
|
140
139
|
visited_end="2025-06-29",
|
|
141
|
-
|
|
142
|
-
|
|
140
|
+
include_netlocs=["nytimes.com", "techcrunch.com"],
|
|
141
|
+
exclude_netlocs=["example.com"],
|
|
143
142
|
include_companies=["/m/04sv4"], # Microsoft's GKID
|
|
144
143
|
exclude_companies=["/m/045c7b"] # Google GKID
|
|
145
144
|
)
|
|
@@ -160,7 +159,7 @@ with Nosible(
|
|
|
160
159
|
```python
|
|
161
160
|
# Example of using your own expansions
|
|
162
161
|
with Nosible() as nos:
|
|
163
|
-
results = nos.
|
|
162
|
+
results = nos.fast_search(
|
|
164
163
|
question="How have the Trump tariffs impacted the US economy?",
|
|
165
164
|
expansions=[
|
|
166
165
|
"What are the consequences of Trump's 2018 steel and aluminum tariffs on American manufacturers?",
|
|
@@ -189,7 +188,7 @@ Allows you to run multiple searches concurrently and `yields` the results as the
|
|
|
189
188
|
from nosible import Nosible
|
|
190
189
|
|
|
191
190
|
with Nosible(nosible_api_key="basic|abcd1234...", llm_api_key="sk-...") as client:
|
|
192
|
-
for batch in client.
|
|
191
|
+
for batch in client.fast_searches(
|
|
193
192
|
questions=[
|
|
194
193
|
"What are the terms of the partnership between Microsoft and OpenAI?",
|
|
195
194
|
"What exclusivity or non-compete clauses are included in their partnership?"
|
|
@@ -206,7 +205,7 @@ Bulk search enables you to retrieve a large number of results in a single reques
|
|
|
206
205
|
|
|
207
206
|
- Use the `bulk_search` method when you need more than 1,000 results for a single query.
|
|
208
207
|
- You can request between **1,000 and 10,000** results per query.
|
|
209
|
-
- All parameters available in the standard `search` method—such as `expansions`, `include_companies`,
|
|
208
|
+
- All parameters available in the standard `search` method—such as `expansions`, `include_companies`, and more—are also supported in `bulk_search`.
|
|
210
209
|
- A bulk search for 10,000 results typically completes in about 30 seconds or less.
|
|
211
210
|
|
|
212
211
|
```python
|
|
@@ -229,11 +228,11 @@ Add two ResultSets together:
|
|
|
229
228
|
from nosible import Nosible
|
|
230
229
|
|
|
231
230
|
with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
232
|
-
r1 = client.
|
|
231
|
+
r1 = client.fast_search(
|
|
233
232
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
234
233
|
n_results=5
|
|
235
234
|
)
|
|
236
|
-
r2 = client.
|
|
235
|
+
r2 = client.fast_search(
|
|
237
236
|
question="How is research governance and decision-making structured between Google and DeepMind?",
|
|
238
237
|
n_results=5
|
|
239
238
|
)
|
|
@@ -257,7 +256,7 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
|
257
256
|
include_netlocs=["arxiv.org", "bbc.com"],
|
|
258
257
|
certain=True
|
|
259
258
|
)
|
|
260
|
-
results = client.
|
|
259
|
+
results = client.fast_search(search=search)
|
|
261
260
|
print([r for r in results])
|
|
262
261
|
```
|
|
263
262
|
|
|
@@ -274,7 +273,7 @@ This fetches a sentiment score for each search result.
|
|
|
274
273
|
from nosible import Nosible
|
|
275
274
|
|
|
276
275
|
with Nosible(nosible_api_key="basic|abcd1234...", llm_api_key="sk-...") as client:
|
|
277
|
-
results = client.
|
|
276
|
+
results = client.fast_search(
|
|
278
277
|
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
279
278
|
n_results=1
|
|
280
279
|
)
|
|
@@ -290,29 +289,29 @@ Supported formats for saving and loading:
|
|
|
290
289
|
from nosible import Nosible, ResultSet
|
|
291
290
|
|
|
292
291
|
with Nosible(nosible_api_key="basic|abcd1234...") as client:
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
292
|
+
combined = client.fast_search(
|
|
293
|
+
question="What are the terms of the partnership between Microsoft and OpenAI?",
|
|
294
|
+
n_results=5
|
|
295
|
+
) + client.fast_search(
|
|
296
|
+
question="How is research governance and decision-making structured between Google and DeepMind?",
|
|
297
|
+
n_results=5
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# Save
|
|
301
|
+
combined.write_csv("all_news.csv")
|
|
302
|
+
combined.write_json("all_news.json")
|
|
303
|
+
combined.write_parquet("all_news.parquet")
|
|
304
|
+
combined.write_ipc("all_news.arrow")
|
|
305
|
+
combined.write_duckdb("all_news.duckdb", table_name="news")
|
|
306
|
+
combined.write_ndjson("all_news.ndjson")
|
|
307
|
+
|
|
308
|
+
# Load
|
|
309
|
+
rs_csv = ResultSet.read_csv("all_news.csv")
|
|
310
|
+
rs_json = ResultSet.read_json("all_news.json")
|
|
311
|
+
rs_parq = ResultSet.read_parquet("all_news.parquet")
|
|
312
|
+
rs_arrow = ResultSet.read_ipc("all_news.arrow")
|
|
313
|
+
rs_duckdb = ResultSet.read_duckdb("all_news.duckdb")
|
|
314
|
+
rs_ndjson = ResultSet.read_ndjson("all_news.ndjson")
|
|
316
315
|
```
|
|
317
316
|
|
|
318
317
|
#### More Examples
|
|
@@ -10,6 +10,7 @@ if TYPE_CHECKING:
|
|
|
10
10
|
from nosible.classes.result_set import ResultSet
|
|
11
11
|
else:
|
|
12
12
|
ResultSet = None
|
|
13
|
+
import warnings
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
@dataclass(init=True, repr=True, eq=True, frozen=False)
|
|
@@ -39,6 +40,32 @@ class Result:
|
|
|
39
40
|
The language code of the content (e.g., 'en' for English).
|
|
40
41
|
similarity : float, optional
|
|
41
42
|
Similarity score with respect to a query or reference.
|
|
43
|
+
brand_safety : str, optional
|
|
44
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
45
|
+
language : str, optional
|
|
46
|
+
Language code to use in search (ISO 639-1 language code).
|
|
47
|
+
continent : str, optional
|
|
48
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
49
|
+
region : str, optional
|
|
50
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
51
|
+
country : str, optional
|
|
52
|
+
Country the results must come from.
|
|
53
|
+
sector : str, optional
|
|
54
|
+
GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
55
|
+
industry_group : str, optional
|
|
56
|
+
GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
57
|
+
industry : str, optional
|
|
58
|
+
GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
59
|
+
sub_industry : str, optional
|
|
60
|
+
GICS Sub-industry classification of the content's subject.
|
|
61
|
+
iab_tier_1 : str, optional
|
|
62
|
+
IAB Tier 1 category for the content.
|
|
63
|
+
iab_tier_2 : str, optional
|
|
64
|
+
IAB Tier 2 category for the content.
|
|
65
|
+
iab_tier_3 : str, optional
|
|
66
|
+
IAB Tier 3 category for the content.
|
|
67
|
+
iab_tier_4 : str, optional
|
|
68
|
+
IAB Tier 4 category for the content.
|
|
42
69
|
|
|
43
70
|
Examples
|
|
44
71
|
--------
|
|
@@ -84,6 +111,30 @@ class Result:
|
|
|
84
111
|
"""Similarity score with respect to a query or reference."""
|
|
85
112
|
url_hash: str | None = None
|
|
86
113
|
"""A hash of the URL for quick comparisons."""
|
|
114
|
+
brand_safety: str | None = None
|
|
115
|
+
"""Whether it is safe, sensitive, or unsafe to advertise on this content."""
|
|
116
|
+
continent: str | None = None
|
|
117
|
+
"""Continent the results must come from (e.g., "Europe", "Asia")."""
|
|
118
|
+
region: str | None = None
|
|
119
|
+
"""Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean")."""
|
|
120
|
+
country: str | None = None
|
|
121
|
+
"""Country the results must come from."""
|
|
122
|
+
sector: str | None = None
|
|
123
|
+
"""GICS Sector the results must relate to (e.g., "Energy", "Information Technology")."""
|
|
124
|
+
industry_group: str | None = None
|
|
125
|
+
"""GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance")."""
|
|
126
|
+
industry: str | None = None
|
|
127
|
+
"""GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines")."""
|
|
128
|
+
sub_industry: str | None = None
|
|
129
|
+
"""GICS Sub-industry classification of the content's subject."""
|
|
130
|
+
iab_tier_1: str | None = None
|
|
131
|
+
"""IAB Tier 1 category for the content."""
|
|
132
|
+
iab_tier_2: str | None = None
|
|
133
|
+
"""IAB Tier 2 category for the content."""
|
|
134
|
+
iab_tier_3: str | None = None
|
|
135
|
+
"""IAB Tier 3 category for the content."""
|
|
136
|
+
iab_tier_4: str | None = None
|
|
137
|
+
"""IAB Tier 4 category for the content."""
|
|
87
138
|
|
|
88
139
|
def __str__(self) -> str:
|
|
89
140
|
"""
|
|
@@ -335,17 +386,30 @@ class Result:
|
|
|
335
386
|
algorithm: str = "hybrid-3",
|
|
336
387
|
publish_start: str = None,
|
|
337
388
|
publish_end: str = None,
|
|
338
|
-
include_netlocs: list = None,
|
|
339
|
-
exclude_netlocs: list = None,
|
|
340
389
|
visited_start: str = None,
|
|
341
390
|
visited_end: str = None,
|
|
342
391
|
certain: bool = None,
|
|
343
|
-
|
|
344
|
-
|
|
392
|
+
include_netlocs: list = None,
|
|
393
|
+
exclude_netlocs: list = None,
|
|
345
394
|
include_companies: list = None,
|
|
346
395
|
exclude_companies: list = None,
|
|
347
396
|
include_docs: list = None,
|
|
348
397
|
exclude_docs: list = None,
|
|
398
|
+
brand_safety: str = None,
|
|
399
|
+
language: str = None,
|
|
400
|
+
continent: str = None,
|
|
401
|
+
region: str = None,
|
|
402
|
+
country: str = None,
|
|
403
|
+
sector: str = None,
|
|
404
|
+
industry_group: str = None,
|
|
405
|
+
industry: str = None,
|
|
406
|
+
sub_industry: str = None,
|
|
407
|
+
iab_tier_1: str = None,
|
|
408
|
+
iab_tier_2: str = None,
|
|
409
|
+
iab_tier_3: str = None,
|
|
410
|
+
iab_tier_4: str = None,
|
|
411
|
+
instruction: str = None,
|
|
412
|
+
*args, **kwargs
|
|
349
413
|
) -> ResultSet:
|
|
350
414
|
"""
|
|
351
415
|
Find similar search results based on the content or metadata of this Result.
|
|
@@ -381,10 +445,6 @@ class Result:
|
|
|
381
445
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
382
446
|
exclude_netlocs : list of str, optional
|
|
383
447
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
384
|
-
include_languages : list of str, optional
|
|
385
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
386
|
-
exclude_languages : list of str, optional
|
|
387
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
388
448
|
include_companies : list of str, optional
|
|
389
449
|
Google KG IDs of public companies to require (Max: 50).
|
|
390
450
|
exclude_companies : list of str, optional
|
|
@@ -393,6 +453,34 @@ class Result:
|
|
|
393
453
|
URL hashes of docs to include (Max: 50).
|
|
394
454
|
exclude_docs : list of str, optional
|
|
395
455
|
URL hashes of docs to exclude (Max: 50).
|
|
456
|
+
brand_safety : str, optional
|
|
457
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
458
|
+
language : str, optional
|
|
459
|
+
Language code to use in search (ISO 639-1 language code).
|
|
460
|
+
continent : str, optional
|
|
461
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
462
|
+
region : str, optional
|
|
463
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
464
|
+
country : str, optional
|
|
465
|
+
Country the results must come from.
|
|
466
|
+
sector : str, optional
|
|
467
|
+
GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
468
|
+
industry_group : str, optional
|
|
469
|
+
GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
470
|
+
industry : str, optional
|
|
471
|
+
GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
472
|
+
sub_industry : str, optional
|
|
473
|
+
GICS Sub-industry classification of the content's subject.
|
|
474
|
+
iab_tier_1 : str, optional
|
|
475
|
+
IAB Tier 1 category for the content.
|
|
476
|
+
iab_tier_2 : str, optional
|
|
477
|
+
IAB Tier 2 category for the content.
|
|
478
|
+
iab_tier_3 : str, optional
|
|
479
|
+
IAB Tier 3 category for the content.
|
|
480
|
+
iab_tier_4 : str, optional
|
|
481
|
+
IAB Tier 4 category for the content.
|
|
482
|
+
instruction : str, optional
|
|
483
|
+
Instruction to use with the search query.
|
|
396
484
|
|
|
397
485
|
Returns
|
|
398
486
|
-------
|
|
@@ -413,6 +501,17 @@ class Result:
|
|
|
413
501
|
... result = Result(url="https://example.com", title="Example Domain") # doctest: +SKIP
|
|
414
502
|
... similar_results = result.similar(client=nos) # doctest: +SKIP
|
|
415
503
|
"""
|
|
504
|
+
if "include_languages" in kwargs:
|
|
505
|
+
warnings.warn(
|
|
506
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
507
|
+
"Please use the parameter 'language' instead.",
|
|
508
|
+
)
|
|
509
|
+
if "exclude_languages" in kwargs:
|
|
510
|
+
warnings.warn(
|
|
511
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
512
|
+
"Please use the parameter 'language' instead.",
|
|
513
|
+
)
|
|
514
|
+
|
|
416
515
|
if client is None:
|
|
417
516
|
raise ValueError("A Nosible client instance must be provided as 'client'.")
|
|
418
517
|
if not self.url:
|
|
@@ -436,14 +535,26 @@ class Result:
|
|
|
436
535
|
visited_start=visited_start,
|
|
437
536
|
visited_end=visited_end,
|
|
438
537
|
certain=certain,
|
|
439
|
-
include_languages=include_languages,
|
|
440
|
-
exclude_languages=exclude_languages,
|
|
441
538
|
include_companies=include_companies,
|
|
442
539
|
exclude_companies=exclude_companies,
|
|
443
540
|
include_docs=include_docs,
|
|
444
541
|
exclude_docs=exclude_docs,
|
|
542
|
+
brand_safety=brand_safety,
|
|
543
|
+
language=language,
|
|
544
|
+
continent=continent,
|
|
545
|
+
region=region,
|
|
546
|
+
country=country,
|
|
547
|
+
sector=sector,
|
|
548
|
+
industry_group=industry_group,
|
|
549
|
+
industry=industry,
|
|
550
|
+
sub_industry=sub_industry,
|
|
551
|
+
iab_tier_1=iab_tier_1,
|
|
552
|
+
iab_tier_2=iab_tier_2,
|
|
553
|
+
iab_tier_3=iab_tier_3,
|
|
554
|
+
iab_tier_4=iab_tier_4,
|
|
555
|
+
instruction=instruction,
|
|
445
556
|
)
|
|
446
|
-
return client.
|
|
557
|
+
return client.fast_search(search=s)
|
|
447
558
|
except Exception as e:
|
|
448
559
|
raise RuntimeError(f"Failed to find similar results for title '{self.title}': {e}") from e
|
|
449
560
|
|