nosible 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nosible/classes/result.py +118 -11
- nosible/classes/result_set.py +42 -29
- nosible/classes/search.py +77 -22
- nosible/classes/search_set.py +26 -26
- nosible/classes/snippet.py +2 -2
- nosible/classes/snippet_set.py +2 -2
- nosible/classes/web_page.py +11 -56
- nosible/nosible_client.py +341 -84
- {nosible-0.2.4.dist-info → nosible-0.2.5.dist-info}/METADATA +35 -36
- nosible-0.2.5.dist-info/RECORD +16 -0
- nosible-0.2.4.dist-info/RECORD +0 -16
- {nosible-0.2.4.dist-info → nosible-0.2.5.dist-info}/WHEEL +0 -0
- {nosible-0.2.4.dist-info → nosible-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {nosible-0.2.4.dist-info → nosible-0.2.5.dist-info}/top_level.txt +0 -0
nosible/nosible_client.py
CHANGED
|
@@ -11,6 +11,7 @@ from collections.abc import Iterator
|
|
|
11
11
|
from concurrent.futures import ThreadPoolExecutor
|
|
12
12
|
from datetime import datetime
|
|
13
13
|
from typing import Optional, Union
|
|
14
|
+
import warnings
|
|
14
15
|
|
|
15
16
|
import httpx
|
|
16
17
|
from tenacity import (
|
|
@@ -73,10 +74,6 @@ class Nosible:
|
|
|
73
74
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
74
75
|
exclude_netlocs : list of str, optional
|
|
75
76
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
76
|
-
include_languages : list of str, optional
|
|
77
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
78
|
-
exclude_languages : list of str, optional
|
|
79
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
80
77
|
include_companies : list of str, optional
|
|
81
78
|
Google KG IDs of public companies to require (Max: 50).
|
|
82
79
|
exclude_companies : list of str, optional
|
|
@@ -85,6 +82,32 @@ class Nosible:
|
|
|
85
82
|
URL hashes of docs to include (Max: 50).
|
|
86
83
|
exclude_docs : list of str, optional
|
|
87
84
|
URL hashes of docs to exclude (Max: 50).
|
|
85
|
+
brand_safety : str, optional
|
|
86
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
87
|
+
language : str, optional
|
|
88
|
+
Language code to use in search (ISO 639-1 language code).
|
|
89
|
+
continent : str, optional
|
|
90
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
91
|
+
region : str, optional
|
|
92
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
93
|
+
country : str, optional
|
|
94
|
+
Country the results must come from.
|
|
95
|
+
sector : str, optional
|
|
96
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
97
|
+
industry_group : str, optional
|
|
98
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
99
|
+
industry : str, optional
|
|
100
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
101
|
+
sub_industry : str, optional
|
|
102
|
+
Sub-industry classification of the content's subject.
|
|
103
|
+
iab_tier_1 : str, optional
|
|
104
|
+
IAB Tier 1 category for the content.
|
|
105
|
+
iab_tier_2 : str, optional
|
|
106
|
+
IAB Tier 2 category for the content.
|
|
107
|
+
iab_tier_3 : str, optional
|
|
108
|
+
IAB Tier 3 category for the content.
|
|
109
|
+
iab_tier_4 : str, optional
|
|
110
|
+
IAB Tier 4 category for the content.
|
|
88
111
|
|
|
89
112
|
Notes
|
|
90
113
|
-----
|
|
@@ -99,7 +122,7 @@ class Nosible:
|
|
|
99
122
|
--------
|
|
100
123
|
>>> from nosible import Nosible # doctest: +SKIP
|
|
101
124
|
>>> nos = Nosible(nosible_api_key="your_api_key_here") # doctest: +SKIP
|
|
102
|
-
>>> search = nos.
|
|
125
|
+
>>> search = nos.fast_search(question="What is Nosible?", n_results=5) # doctest: +SKIP
|
|
103
126
|
"""
|
|
104
127
|
|
|
105
128
|
def __init__(
|
|
@@ -119,13 +142,37 @@ class Nosible:
|
|
|
119
142
|
visited_start: str = None,
|
|
120
143
|
visited_end: str = None,
|
|
121
144
|
certain: bool = None,
|
|
122
|
-
include_languages: list = None,
|
|
123
|
-
exclude_languages: list = None,
|
|
124
145
|
include_companies: list = None,
|
|
125
146
|
exclude_companies: list = None,
|
|
126
147
|
include_docs: list = None,
|
|
127
148
|
exclude_docs: list = None,
|
|
149
|
+
brand_safety: str = None,
|
|
150
|
+
language: str = None,
|
|
151
|
+
continent: str = None,
|
|
152
|
+
region: str = None,
|
|
153
|
+
country: str = None,
|
|
154
|
+
sector: str = None,
|
|
155
|
+
industry_group: str = None,
|
|
156
|
+
industry: str = None,
|
|
157
|
+
sub_industry: str = None,
|
|
158
|
+
iab_tier_1: str = None,
|
|
159
|
+
iab_tier_2: str = None,
|
|
160
|
+
iab_tier_3: str = None,
|
|
161
|
+
iab_tier_4: str = None,
|
|
162
|
+
*args, **kwargs
|
|
128
163
|
) -> None:
|
|
164
|
+
|
|
165
|
+
if "include_languages" in kwargs:
|
|
166
|
+
warnings.warn(
|
|
167
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
168
|
+
"Please use the parameter 'language' instead.",
|
|
169
|
+
)
|
|
170
|
+
if "exclude_languages" in kwargs:
|
|
171
|
+
warnings.warn(
|
|
172
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
173
|
+
"Please use the parameter 'language' instead.",
|
|
174
|
+
)
|
|
175
|
+
|
|
129
176
|
# API Keys
|
|
130
177
|
if nosible_api_key is not None:
|
|
131
178
|
self.nosible_api_key = nosible_api_key
|
|
@@ -192,14 +239,25 @@ class Nosible:
|
|
|
192
239
|
self.visited_start = visited_start
|
|
193
240
|
self.visited_end = visited_end
|
|
194
241
|
self.certain = certain
|
|
195
|
-
self.include_languages = include_languages
|
|
196
|
-
self.exclude_languages = exclude_languages
|
|
197
242
|
self.include_companies = include_companies
|
|
198
243
|
self.exclude_companies = exclude_companies
|
|
199
244
|
self.exclude_docs = exclude_docs
|
|
200
245
|
self.include_docs = include_docs
|
|
201
|
-
|
|
202
|
-
|
|
246
|
+
self.brand_safety = brand_safety
|
|
247
|
+
self.language = language
|
|
248
|
+
self.continent = continent
|
|
249
|
+
self.region = region
|
|
250
|
+
self.country = country
|
|
251
|
+
self.sector = sector
|
|
252
|
+
self.industry_group = industry_group
|
|
253
|
+
self.industry = industry
|
|
254
|
+
self.sub_industry = sub_industry
|
|
255
|
+
self.iab_tier_1 = iab_tier_1
|
|
256
|
+
self.iab_tier_2 = iab_tier_2
|
|
257
|
+
self.iab_tier_3 = iab_tier_3
|
|
258
|
+
self.iab_tier_4 = iab_tier_4
|
|
259
|
+
|
|
260
|
+
def fast_search(
|
|
203
261
|
self,
|
|
204
262
|
search: Search = None,
|
|
205
263
|
question: str = None,
|
|
@@ -220,12 +278,24 @@ class Nosible:
|
|
|
220
278
|
visited_start: str = None,
|
|
221
279
|
visited_end: str = None,
|
|
222
280
|
certain: bool = None,
|
|
223
|
-
include_languages: list = None,
|
|
224
|
-
exclude_languages: list = None,
|
|
225
281
|
include_companies: list = None,
|
|
226
282
|
exclude_companies: list = None,
|
|
227
283
|
include_docs: list = None,
|
|
228
284
|
exclude_docs: list = None,
|
|
285
|
+
brand_safety: str = None,
|
|
286
|
+
language: str = None,
|
|
287
|
+
continent: str = None,
|
|
288
|
+
region: str = None,
|
|
289
|
+
country: str = None,
|
|
290
|
+
sector: str = None,
|
|
291
|
+
industry_group: str = None,
|
|
292
|
+
industry: str = None,
|
|
293
|
+
sub_industry: str = None,
|
|
294
|
+
iab_tier_1: str = None,
|
|
295
|
+
iab_tier_2: str = None,
|
|
296
|
+
iab_tier_3: str = None,
|
|
297
|
+
iab_tier_4: str = None,
|
|
298
|
+
*args, **kwargs
|
|
229
299
|
) -> ResultSet:
|
|
230
300
|
"""
|
|
231
301
|
Run a single search query.
|
|
@@ -273,10 +343,6 @@ class Nosible:
|
|
|
273
343
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
274
344
|
exclude_netlocs : list of str, optional
|
|
275
345
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
276
|
-
include_languages : list of str, optional
|
|
277
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
278
|
-
exclude_languages : list of str, optional
|
|
279
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
280
346
|
include_companies : list of str, optional
|
|
281
347
|
Google KG IDs of public companies to require (Max: 50).
|
|
282
348
|
exclude_companies : list of str, optional
|
|
@@ -285,6 +351,32 @@ class Nosible:
|
|
|
285
351
|
URL hashes of docs to include (Max: 50).
|
|
286
352
|
exclude_docs : list of str, optional
|
|
287
353
|
URL hashes of docs to exclude (Max: 50).
|
|
354
|
+
brand_safety : str, optional
|
|
355
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
356
|
+
language : str, optional
|
|
357
|
+
Language code to use in search (ISO 639-1 language code).
|
|
358
|
+
continent : str, optional
|
|
359
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
360
|
+
region : str, optional
|
|
361
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
362
|
+
country : str, optional
|
|
363
|
+
Country the results must come from.
|
|
364
|
+
sector : str, optional
|
|
365
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
366
|
+
industry_group : str, optional
|
|
367
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
368
|
+
industry : str, optional
|
|
369
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
370
|
+
sub_industry : str, optional
|
|
371
|
+
Sub-industry classification of the content's subject.
|
|
372
|
+
iab_tier_1 : str, optional
|
|
373
|
+
IAB Tier 1 category for the content.
|
|
374
|
+
iab_tier_2 : str, optional
|
|
375
|
+
IAB Tier 2 category for the content.
|
|
376
|
+
iab_tier_3 : str, optional
|
|
377
|
+
IAB Tier 3 category for the content.
|
|
378
|
+
iab_tier_4 : str, optional
|
|
379
|
+
IAB Tier 4 category for the content.
|
|
288
380
|
|
|
289
381
|
Returns
|
|
290
382
|
-------
|
|
@@ -314,27 +406,38 @@ class Nosible:
|
|
|
314
406
|
>>> from nosible import Nosible
|
|
315
407
|
>>> s = Search(question="Hedge funds seek to expand into private credit", n_results=10)
|
|
316
408
|
>>> with Nosible() as nos:
|
|
317
|
-
... results = nos.
|
|
409
|
+
... results = nos.fast_search(search=s)
|
|
318
410
|
... print(isinstance(results, ResultSet))
|
|
319
411
|
... print(len(results))
|
|
320
412
|
True
|
|
321
413
|
10
|
|
322
414
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
323
|
-
>>> nos.
|
|
415
|
+
>>> nos.fast_search() # doctest: +ELLIPSIS
|
|
324
416
|
Traceback (most recent call last):
|
|
325
417
|
...
|
|
326
418
|
TypeError: Specify exactly one of 'question' or 'search'.
|
|
327
419
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
328
|
-
>>> nos.
|
|
420
|
+
>>> nos.fast_search(question="foo", search=s) # doctest: +ELLIPSIS
|
|
329
421
|
Traceback (most recent call last):
|
|
330
422
|
...
|
|
331
423
|
TypeError: Specify exactly one of 'question' or 'search'.
|
|
332
424
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
333
|
-
>>> nos.
|
|
425
|
+
>>> nos.fast_search(question="foo", n_results=101) # doctest: +ELLIPSIS
|
|
334
426
|
Traceback (most recent call last):
|
|
335
427
|
...
|
|
336
428
|
ValueError: Search can not have more than 100 results - Use bulk search instead.
|
|
337
429
|
"""
|
|
430
|
+
if "include_languages" in kwargs:
|
|
431
|
+
warnings.warn(
|
|
432
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
433
|
+
"Please use the parameter 'language' instead.",
|
|
434
|
+
)
|
|
435
|
+
if "exclude_languages" in kwargs:
|
|
436
|
+
warnings.warn(
|
|
437
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
438
|
+
"Please use the parameter 'language' instead.",
|
|
439
|
+
)
|
|
440
|
+
|
|
338
441
|
if (question is None and search is None) or (question is not None and search is not None):
|
|
339
442
|
raise TypeError("Specify exactly one of 'question' or 'search'.")
|
|
340
443
|
|
|
@@ -357,25 +460,36 @@ class Nosible:
|
|
|
357
460
|
visited_start=visited_start,
|
|
358
461
|
visited_end=visited_end,
|
|
359
462
|
certain=certain,
|
|
360
|
-
include_languages=include_languages,
|
|
361
|
-
exclude_languages=exclude_languages,
|
|
362
463
|
include_companies=include_companies,
|
|
363
464
|
exclude_companies=exclude_companies,
|
|
364
465
|
include_docs=include_docs,
|
|
365
466
|
exclude_docs=exclude_docs,
|
|
467
|
+
brand_safety=brand_safety,
|
|
468
|
+
language=language,
|
|
469
|
+
continent=continent,
|
|
470
|
+
region=region,
|
|
471
|
+
country=country,
|
|
472
|
+
sector=sector,
|
|
473
|
+
industry_group=industry_group,
|
|
474
|
+
industry=industry,
|
|
475
|
+
sub_industry=sub_industry,
|
|
476
|
+
iab_tier_1=iab_tier_1,
|
|
477
|
+
iab_tier_2=iab_tier_2,
|
|
478
|
+
iab_tier_3=iab_tier_3,
|
|
479
|
+
iab_tier_4=iab_tier_4,
|
|
366
480
|
)
|
|
367
481
|
|
|
368
482
|
future = self._executor.submit(self._search_single, search_obj)
|
|
369
483
|
try:
|
|
370
484
|
return future.result()
|
|
371
485
|
except ValueError:
|
|
372
|
-
# Propagate our own
|
|
486
|
+
# Propagate our own "too many results" error directly.
|
|
373
487
|
raise
|
|
374
488
|
except Exception as e:
|
|
375
489
|
self.logger.warning(f"Search for {search_obj.question!r} failed: {e}")
|
|
376
490
|
raise RuntimeError(f"Search for {search_obj.question!r} failed") from e
|
|
377
491
|
|
|
378
|
-
def
|
|
492
|
+
def fast_searches(
|
|
379
493
|
self,
|
|
380
494
|
*,
|
|
381
495
|
searches: Union[SearchSet, list[Search]] = None,
|
|
@@ -397,12 +511,24 @@ class Nosible:
|
|
|
397
511
|
visited_start: str = None,
|
|
398
512
|
visited_end: str = None,
|
|
399
513
|
certain: bool = None,
|
|
400
|
-
include_languages: list = None,
|
|
401
|
-
exclude_languages: list = None,
|
|
402
514
|
include_companies: list = None,
|
|
403
515
|
exclude_companies: list = None,
|
|
404
516
|
include_docs: list = None,
|
|
405
517
|
exclude_docs: list = None,
|
|
518
|
+
brand_safety: str = None,
|
|
519
|
+
language: str = None,
|
|
520
|
+
continent: str = None,
|
|
521
|
+
region: str = None,
|
|
522
|
+
country: str = None,
|
|
523
|
+
sector: str = None,
|
|
524
|
+
industry_group: str = None,
|
|
525
|
+
industry: str = None,
|
|
526
|
+
sub_industry: str = None,
|
|
527
|
+
iab_tier_1: str = None,
|
|
528
|
+
iab_tier_2: str = None,
|
|
529
|
+
iab_tier_3: str = None,
|
|
530
|
+
iab_tier_4: str = None,
|
|
531
|
+
**kwargs
|
|
406
532
|
) -> Iterator[ResultSet]:
|
|
407
533
|
"""
|
|
408
534
|
Run multiple searches concurrently and yield results.
|
|
@@ -447,9 +573,6 @@ class Nosible:
|
|
|
447
573
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
448
574
|
exclude_netlocs : list of str, optional
|
|
449
575
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
450
|
-
include_languages : list of str, optional
|
|
451
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
452
|
-
exclude_languages : list of str, optional
|
|
453
576
|
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
454
577
|
include_companies : list of str, optional
|
|
455
578
|
Google KG IDs of public companies to require (Max: 50).
|
|
@@ -459,6 +582,32 @@ class Nosible:
|
|
|
459
582
|
URL hashes of docs to include (Max: 50).
|
|
460
583
|
exclude_docs : list of str, optional
|
|
461
584
|
URL hashes of docs to exclude (Max: 50).
|
|
585
|
+
brand_safety : str, optional
|
|
586
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
587
|
+
language : str, optional
|
|
588
|
+
Language code to use in search (ISO 639-1 language code).
|
|
589
|
+
continent : str, optional
|
|
590
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
591
|
+
region : str, optional
|
|
592
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
593
|
+
country : str, optional
|
|
594
|
+
Country the results must come from.
|
|
595
|
+
sector : str, optional
|
|
596
|
+
GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
597
|
+
industry_group : str, optional
|
|
598
|
+
GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
599
|
+
industry : str, optional
|
|
600
|
+
GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
601
|
+
sub_industry : str, optional
|
|
602
|
+
GICS Sub-industry classification of the content's subject.
|
|
603
|
+
iab_tier_1 : str, optional
|
|
604
|
+
IAB Tier 1 category for the content.
|
|
605
|
+
iab_tier_2 : str, optional
|
|
606
|
+
IAB Tier 2 category for the content.
|
|
607
|
+
iab_tier_3 : str, optional
|
|
608
|
+
IAB Tier 3 category for the content.
|
|
609
|
+
iab_tier_4 : str, optional
|
|
610
|
+
IAB Tier 4 category for the content.
|
|
462
611
|
|
|
463
612
|
Returns
|
|
464
613
|
------
|
|
@@ -489,7 +638,7 @@ class Nosible:
|
|
|
489
638
|
... ]
|
|
490
639
|
... )
|
|
491
640
|
>>> with Nosible() as nos:
|
|
492
|
-
... results_list = list(nos.
|
|
641
|
+
... results_list = list(nos.fast_searches(searches=queries))
|
|
493
642
|
>>> print(len(results_list))
|
|
494
643
|
2
|
|
495
644
|
>>> for r in results_list:
|
|
@@ -498,7 +647,7 @@ class Nosible:
|
|
|
498
647
|
True True
|
|
499
648
|
>>> with Nosible() as nos:
|
|
500
649
|
... results_list_str = list(
|
|
501
|
-
... nos.
|
|
650
|
+
... nos.fast_searches(
|
|
502
651
|
... questions=[
|
|
503
652
|
... "What are the terms of the partnership between Microsoft and OpenAI?",
|
|
504
653
|
... "What are the terms of the partnership between Volkswagen and Uber?",
|
|
@@ -506,17 +655,28 @@ class Nosible:
|
|
|
506
655
|
... )
|
|
507
656
|
... )
|
|
508
657
|
>>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +ELLIPSIS
|
|
509
|
-
>>> nos.
|
|
658
|
+
>>> nos.fast_searches() # doctest: +ELLIPSIS
|
|
510
659
|
Traceback (most recent call last):
|
|
511
660
|
...
|
|
512
661
|
TypeError: Specify exactly one of 'questions' or 'searches'.
|
|
513
662
|
>>> from nosible import Nosible
|
|
514
663
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
515
|
-
>>> nos.
|
|
664
|
+
>>> nos.fast_searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
|
|
516
665
|
Traceback (most recent call last):
|
|
517
666
|
...
|
|
518
667
|
TypeError: Specify exactly one of 'questions' or 'searches'.
|
|
519
668
|
"""
|
|
669
|
+
if "include_languages" in kwargs:
|
|
670
|
+
warnings.warn(
|
|
671
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
672
|
+
"Please use the parameter 'language' instead.",
|
|
673
|
+
)
|
|
674
|
+
if "exclude_languages" in kwargs:
|
|
675
|
+
warnings.warn(
|
|
676
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
677
|
+
"Please use the parameter 'language' instead.",
|
|
678
|
+
)
|
|
679
|
+
|
|
520
680
|
if (questions is None and searches is None) or (questions is not None and searches is not None):
|
|
521
681
|
raise TypeError("Specify exactly one of 'questions' or 'searches'.")
|
|
522
682
|
|
|
@@ -543,12 +703,23 @@ class Nosible:
|
|
|
543
703
|
visited_start=visited_start,
|
|
544
704
|
visited_end=visited_end,
|
|
545
705
|
certain=certain,
|
|
546
|
-
include_languages=include_languages,
|
|
547
|
-
exclude_languages=exclude_languages,
|
|
548
706
|
include_companies=include_companies,
|
|
549
707
|
exclude_companies=exclude_companies,
|
|
550
708
|
include_docs=include_docs,
|
|
551
709
|
exclude_docs=exclude_docs,
|
|
710
|
+
brand_safety=brand_safety,
|
|
711
|
+
language=language,
|
|
712
|
+
continent=continent,
|
|
713
|
+
region=region,
|
|
714
|
+
country=country,
|
|
715
|
+
sector=sector,
|
|
716
|
+
industry_group=industry_group,
|
|
717
|
+
industry=industry,
|
|
718
|
+
sub_industry=sub_industry,
|
|
719
|
+
iab_tier_1=iab_tier_1,
|
|
720
|
+
iab_tier_2=iab_tier_2,
|
|
721
|
+
iab_tier_3=iab_tier_3,
|
|
722
|
+
iab_tier_4=iab_tier_4,
|
|
552
723
|
)
|
|
553
724
|
|
|
554
725
|
futures = [self._executor.submit(self._search_single, s) for s in searches_list]
|
|
@@ -562,6 +733,7 @@ class Nosible:
|
|
|
562
733
|
|
|
563
734
|
return _run_generator()
|
|
564
735
|
|
|
736
|
+
|
|
565
737
|
@_rate_limited("fast")
|
|
566
738
|
def _search_single(self, search_obj: Search) -> ResultSet:
|
|
567
739
|
"""
|
|
@@ -590,7 +762,7 @@ class Nosible:
|
|
|
590
762
|
>>> from nosible import Nosible
|
|
591
763
|
>>> s = Search(question="Nvidia insiders dump more than $1 billion in stock", n_results=200)
|
|
592
764
|
>>> with Nosible() as nos:
|
|
593
|
-
... results = nos.
|
|
765
|
+
... results = nos.fast_search(search=s) # doctest: +ELLIPSIS
|
|
594
766
|
Traceback (most recent call last):
|
|
595
767
|
...
|
|
596
768
|
ValueError: Search can not have more than 100 results - Use bulk search instead.
|
|
@@ -618,18 +790,31 @@ class Nosible:
|
|
|
618
790
|
visited_start = search_obj.visited_start if search_obj.visited_start is not None else self.visited_start
|
|
619
791
|
visited_end = search_obj.visited_end if search_obj.visited_end is not None else self.visited_end
|
|
620
792
|
certain = search_obj.certain if search_obj.certain is not None else self.certain
|
|
621
|
-
include_languages = (
|
|
622
|
-
search_obj.include_languages if search_obj.include_languages is not None else self.include_languages
|
|
623
|
-
)
|
|
624
|
-
exclude_languages = (
|
|
625
|
-
search_obj.exclude_languages if search_obj.exclude_languages is not None else self.exclude_languages
|
|
626
|
-
)
|
|
627
793
|
include_companies = (
|
|
628
794
|
search_obj.include_companies if search_obj.include_companies is not None else self.include_companies
|
|
629
795
|
)
|
|
630
796
|
exclude_companies = (
|
|
631
797
|
search_obj.exclude_companies if search_obj.exclude_companies is not None else self.exclude_companies
|
|
632
798
|
)
|
|
799
|
+
include_docs = search_obj.include_docs if search_obj.include_docs is not None else self.include_docs
|
|
800
|
+
exclude_docs = search_obj.exclude_docs if search_obj.exclude_docs is not None else self.exclude_docs
|
|
801
|
+
brand_safety = search_obj.brand_safety if search_obj.brand_safety is not None else self.brand_safety
|
|
802
|
+
language = search_obj.language if search_obj.language is not None else self.language
|
|
803
|
+
continent = search_obj.continent if search_obj.continent is not None else self.continent
|
|
804
|
+
region = search_obj.region if search_obj.region is not None else self.region
|
|
805
|
+
country = search_obj.country if search_obj.country is not None else self.country
|
|
806
|
+
sector = search_obj.sector if search_obj.sector is not None else self.sector
|
|
807
|
+
industry_group = search_obj.industry_group if search_obj.industry_group is not None else self.industry_group
|
|
808
|
+
industry = search_obj.industry if search_obj.industry is not None else self.industry
|
|
809
|
+
sub_industry = search_obj.sub_industry if search_obj.sub_industry is not None else self.sub_industry
|
|
810
|
+
iab_tier_1 = search_obj.iab_tier_1 if search_obj.iab_tier_1 is not None else self.iab_tier_1
|
|
811
|
+
iab_tier_2 = search_obj.iab_tier_2 if search_obj.iab_tier_2 is not None else self.iab_tier_2
|
|
812
|
+
iab_tier_3 = search_obj.iab_tier_3 if search_obj.iab_tier_3 is not None else self.iab_tier_3
|
|
813
|
+
iab_tier_4 = search_obj.iab_tier_4 if search_obj.iab_tier_4 is not None else self.iab_tier_4
|
|
814
|
+
|
|
815
|
+
must_include = must_include if must_include is not None else []
|
|
816
|
+
must_exclude = must_exclude if must_exclude is not None else []
|
|
817
|
+
min_similarity = min_similarity if min_similarity is not None else 0
|
|
633
818
|
|
|
634
819
|
if not (0.0 <= min_similarity <= 1.0):
|
|
635
820
|
raise ValueError(f"Invalid min_simalarity: {min_similarity}. Must be [0,1].")
|
|
@@ -650,10 +835,10 @@ class Nosible:
|
|
|
650
835
|
visited_start=visited_start,
|
|
651
836
|
visited_end=visited_end,
|
|
652
837
|
certain=certain,
|
|
653
|
-
include_languages=include_languages,
|
|
654
|
-
exclude_languages=exclude_languages,
|
|
655
838
|
include_companies=include_companies,
|
|
656
839
|
exclude_companies=exclude_companies,
|
|
840
|
+
include_docs=include_docs,
|
|
841
|
+
exclude_docs=exclude_docs,
|
|
657
842
|
)
|
|
658
843
|
|
|
659
844
|
# Enforce limits
|
|
@@ -674,6 +859,24 @@ class Nosible:
|
|
|
674
859
|
"must_include": must_include,
|
|
675
860
|
"must_exclude": must_exclude,
|
|
676
861
|
}
|
|
862
|
+
optional = {
|
|
863
|
+
"brand_safety":brand_safety,
|
|
864
|
+
"language": language,
|
|
865
|
+
"continent": continent,
|
|
866
|
+
"region": region,
|
|
867
|
+
"country": country,
|
|
868
|
+
"sector": sector,
|
|
869
|
+
"industry_group": industry_group,
|
|
870
|
+
"industry": industry,
|
|
871
|
+
"sub_industry": sub_industry,
|
|
872
|
+
"iab_tier_1": iab_tier_1,
|
|
873
|
+
"iab_tier_2": iab_tier_2,
|
|
874
|
+
"iab_tier_3": iab_tier_3,
|
|
875
|
+
"iab_tier_4": iab_tier_4,
|
|
876
|
+
}
|
|
877
|
+
for key, val in optional.items():
|
|
878
|
+
if val is not None:
|
|
879
|
+
payload[key] = val
|
|
677
880
|
|
|
678
881
|
resp = self._post(url="https://www.nosible.ai/search/v1/fast-search", payload=payload)
|
|
679
882
|
resp.raise_for_status()
|
|
@@ -739,18 +942,30 @@ class Nosible:
|
|
|
739
942
|
autogenerate_expansions: bool = False,
|
|
740
943
|
publish_start: str = None,
|
|
741
944
|
publish_end: str = None,
|
|
742
|
-
include_netlocs: list = None,
|
|
743
|
-
exclude_netlocs: list = None,
|
|
744
945
|
visited_start: str = None,
|
|
745
946
|
visited_end: str = None,
|
|
746
947
|
certain: bool = None,
|
|
747
|
-
|
|
748
|
-
|
|
948
|
+
include_netlocs: list = None,
|
|
949
|
+
exclude_netlocs: list = None,
|
|
749
950
|
include_companies: list = None,
|
|
750
951
|
exclude_companies: list = None,
|
|
751
952
|
include_docs: list = None,
|
|
752
953
|
exclude_docs: list = None,
|
|
954
|
+
brand_safety: str = None,
|
|
955
|
+
language: str = None,
|
|
956
|
+
continent: str = None,
|
|
957
|
+
region: str = None,
|
|
958
|
+
country: str = None,
|
|
959
|
+
sector: str = None,
|
|
960
|
+
industry_group: str = None,
|
|
961
|
+
industry: str = None,
|
|
962
|
+
sub_industry: str = None,
|
|
963
|
+
iab_tier_1: str = None,
|
|
964
|
+
iab_tier_2: str = None,
|
|
965
|
+
iab_tier_3: str = None,
|
|
966
|
+
iab_tier_4: str = None,
|
|
753
967
|
verbose: bool = False,
|
|
968
|
+
**kwargs,
|
|
754
969
|
) -> ResultSet:
|
|
755
970
|
"""
|
|
756
971
|
Perform a bulk (slow) search query (1,000–10,000 results) against the Nosible API.
|
|
@@ -795,10 +1010,6 @@ class Nosible:
|
|
|
795
1010
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
796
1011
|
exclude_netlocs : list of str, optional
|
|
797
1012
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
798
|
-
include_languages : list of str, optional
|
|
799
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
800
|
-
exclude_languages : list of str, optional
|
|
801
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
802
1013
|
include_companies : list of str, optional
|
|
803
1014
|
Google KG IDs of public companies to require (Max: 50).
|
|
804
1015
|
exclude_companies : list of str, optional
|
|
@@ -807,6 +1018,32 @@ class Nosible:
|
|
|
807
1018
|
URL hashes of docs to include (Max: 50).
|
|
808
1019
|
exclude_docs : list of str, optional
|
|
809
1020
|
URL hashes of docs to exclude (Max: 50).
|
|
1021
|
+
brand_safety : str, optional
|
|
1022
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
1023
|
+
language : str, optional
|
|
1024
|
+
Language code to use in search (ISO 639-1 language code).
|
|
1025
|
+
continent : str, optional
|
|
1026
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
1027
|
+
region : str, optional
|
|
1028
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
1029
|
+
country : str, optional
|
|
1030
|
+
Country the results must come from.
|
|
1031
|
+
sector : str, optional
|
|
1032
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
1033
|
+
industry_group : str, optional
|
|
1034
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
1035
|
+
industry : str, optional
|
|
1036
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
1037
|
+
sub_industry : str, optional
|
|
1038
|
+
Sub-industry classification of the content's subject.
|
|
1039
|
+
iab_tier_1 : str, optional
|
|
1040
|
+
IAB Tier 1 category for the content.
|
|
1041
|
+
iab_tier_2 : str, optional
|
|
1042
|
+
IAB Tier 2 category for the content.
|
|
1043
|
+
iab_tier_3 : str, optional
|
|
1044
|
+
IAB Tier 3 category for the content.
|
|
1045
|
+
iab_tier_4 : str, optional
|
|
1046
|
+
IAB Tier 4 category for the content.
|
|
810
1047
|
verbose : bool, optional
|
|
811
1048
|
Show verbose output, Bulk search will print more information.
|
|
812
1049
|
|
|
@@ -872,6 +1109,17 @@ class Nosible:
|
|
|
872
1109
|
...
|
|
873
1110
|
ValueError: Bulk search cannot have more than 10000 results per query.
|
|
874
1111
|
"""
|
|
1112
|
+
if "include_languages" in kwargs:
|
|
1113
|
+
warnings.warn(
|
|
1114
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
1115
|
+
"Please use the parameter 'language' instead.",
|
|
1116
|
+
)
|
|
1117
|
+
if "exclude_languages" in kwargs:
|
|
1118
|
+
warnings.warn(
|
|
1119
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
1120
|
+
"Please use the parameter 'language' instead.",
|
|
1121
|
+
)
|
|
1122
|
+
|
|
875
1123
|
from cryptography.fernet import Fernet
|
|
876
1124
|
|
|
877
1125
|
previous_level = self.logger.level
|
|
@@ -894,11 +1142,8 @@ class Nosible:
|
|
|
894
1142
|
n_contextify = search.n_contextify if search.n_contextify is not None else n_contextify
|
|
895
1143
|
algorithm = search.algorithm if search.algorithm is not None else algorithm
|
|
896
1144
|
min_similarity = search.min_similarity if search.min_similarity is not None else min_similarity
|
|
897
|
-
min_similarity = min_similarity if min_similarity is not None else 0
|
|
898
1145
|
must_include = search.must_include if search.must_include is not None else must_include
|
|
899
|
-
must_include = must_include if must_include is not None else []
|
|
900
1146
|
must_exclude = search.must_exclude if search.must_exclude is not None else must_exclude
|
|
901
|
-
must_exclude = must_exclude if must_exclude is not None else []
|
|
902
1147
|
autogenerate_expansions = (
|
|
903
1148
|
search.autogenerate_expansions
|
|
904
1149
|
if search.autogenerate_expansions is not None
|
|
@@ -911,12 +1156,23 @@ class Nosible:
|
|
|
911
1156
|
visited_start = search.visited_start if search.visited_start is not None else visited_start
|
|
912
1157
|
visited_end = search.visited_end if search.visited_end is not None else visited_end
|
|
913
1158
|
certain = search.certain if search.certain is not None else certain
|
|
914
|
-
include_languages = search.include_languages if search.include_languages is not None else include_languages
|
|
915
|
-
exclude_languages = search.exclude_languages if search.exclude_languages is not None else exclude_languages
|
|
916
1159
|
include_companies = search.include_companies if search.include_companies is not None else include_companies
|
|
917
1160
|
exclude_companies = search.exclude_companies if search.exclude_companies is not None else exclude_companies
|
|
918
|
-
include_docs = search.include_docs if search.include_docs is not None else include_docs
|
|
919
|
-
exclude_docs = search.exclude_docs if search.exclude_docs is not None else exclude_docs
|
|
1161
|
+
include_docs = search.include_docs if search.include_docs is not None else self.include_docs
|
|
1162
|
+
exclude_docs = search.exclude_docs if search.exclude_docs is not None else self.exclude_docs
|
|
1163
|
+
brand_safety = search.brand_safety if search.brand_safety is not None else self.brand_safety
|
|
1164
|
+
language = search.language if search.language is not None else self.language
|
|
1165
|
+
continent = search.continent if search.continent is not None else self.continent
|
|
1166
|
+
region = search.region if search.region is not None else self.region
|
|
1167
|
+
country = search.country if search.country is not None else self.country
|
|
1168
|
+
sector = search.sector if search.sector is not None else self.sector
|
|
1169
|
+
industry_group = search.industry_group if search.industry_group is not None else self.industry_group
|
|
1170
|
+
industry = search.industry if search.industry is not None else self.industry
|
|
1171
|
+
sub_industry = search.sub_industry if search.sub_industry is not None else self.sub_industry
|
|
1172
|
+
iab_tier_1 = search.iab_tier_1 if search.iab_tier_1 is not None else self.iab_tier_1
|
|
1173
|
+
iab_tier_2 = search.iab_tier_2 if search.iab_tier_2 is not None else self.iab_tier_2
|
|
1174
|
+
iab_tier_3 = search.iab_tier_3 if search.iab_tier_3 is not None else self.iab_tier_3
|
|
1175
|
+
iab_tier_4 = search.iab_tier_4 if search.iab_tier_4 is not None else self.iab_tier_4
|
|
920
1176
|
|
|
921
1177
|
# Default expansions and filters
|
|
922
1178
|
if expansions is None:
|
|
@@ -936,13 +1192,11 @@ class Nosible:
|
|
|
936
1192
|
sql_filter = self._format_sql(
|
|
937
1193
|
publish_start=publish_start if publish_start is not None else self.publish_start,
|
|
938
1194
|
publish_end=publish_end if publish_end is not None else self.publish_end,
|
|
939
|
-
include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
|
|
940
|
-
exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
|
|
941
1195
|
visited_start=visited_start if visited_start is not None else self.visited_start,
|
|
942
1196
|
visited_end=visited_end if visited_end is not None else self.visited_end,
|
|
943
1197
|
certain=certain if certain is not None else self.certain,
|
|
944
|
-
|
|
945
|
-
|
|
1198
|
+
include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
|
|
1199
|
+
exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
|
|
946
1200
|
include_companies=include_companies if include_companies is not None else self.include_companies,
|
|
947
1201
|
exclude_companies=exclude_companies if exclude_companies is not None else self.exclude_companies,
|
|
948
1202
|
include_docs=include_docs if include_docs is not None else self.include_docs,
|
|
@@ -979,6 +1233,25 @@ class Nosible:
|
|
|
979
1233
|
"must_include": must_include,
|
|
980
1234
|
"must_exclude": must_exclude,
|
|
981
1235
|
}
|
|
1236
|
+
optional = {
|
|
1237
|
+
"brand_safety": brand_safety,
|
|
1238
|
+
"language": language,
|
|
1239
|
+
"continent": continent,
|
|
1240
|
+
"region": region,
|
|
1241
|
+
"country": country,
|
|
1242
|
+
"sector": sector,
|
|
1243
|
+
"industry_group": industry_group,
|
|
1244
|
+
"industry": industry,
|
|
1245
|
+
"sub_industry": sub_industry,
|
|
1246
|
+
"iab_tier_1": iab_tier_1,
|
|
1247
|
+
"iab_tier_2": iab_tier_2,
|
|
1248
|
+
"iab_tier_3": iab_tier_3,
|
|
1249
|
+
"iab_tier_4": iab_tier_4,
|
|
1250
|
+
}
|
|
1251
|
+
for key, val in optional.items():
|
|
1252
|
+
if val is not None:
|
|
1253
|
+
payload[key] = val
|
|
1254
|
+
|
|
982
1255
|
resp = self._post(url="https://www.nosible.ai/search/v1/slow-search", payload=payload)
|
|
983
1256
|
try:
|
|
984
1257
|
resp.raise_for_status()
|
|
@@ -1019,7 +1292,7 @@ class Nosible:
|
|
|
1019
1292
|
show_context: bool = True,
|
|
1020
1293
|
) -> str:
|
|
1021
1294
|
"""
|
|
1022
|
-
RAG-style question answering: retrieve top `n_results` via `.
|
|
1295
|
+
RAG-style question answering: retrieve top `n_results` via `.fast_search()`
|
|
1023
1296
|
then answer `query` using those documents as context.
|
|
1024
1297
|
|
|
1025
1298
|
Parameters
|
|
@@ -1068,7 +1341,7 @@ class Nosible:
|
|
|
1068
1341
|
raise ValueError("An LLM API key is required for answer().")
|
|
1069
1342
|
|
|
1070
1343
|
# Retrieve top documents
|
|
1071
|
-
results = self.
|
|
1344
|
+
results = self.fast_search(question=query, n_results=n_results, min_similarity=min_similarity)
|
|
1072
1345
|
|
|
1073
1346
|
# Build RAG context
|
|
1074
1347
|
context = ""
|
|
@@ -1778,13 +2051,11 @@ class Nosible:
|
|
|
1778
2051
|
self,
|
|
1779
2052
|
publish_start: str = None,
|
|
1780
2053
|
publish_end: str = None,
|
|
1781
|
-
include_netlocs: list = None,
|
|
1782
|
-
exclude_netlocs: list = None,
|
|
1783
2054
|
visited_start: str = None,
|
|
1784
2055
|
visited_end: str = None,
|
|
1785
2056
|
certain: bool = None,
|
|
1786
|
-
|
|
1787
|
-
|
|
2057
|
+
include_netlocs: list = None,
|
|
2058
|
+
exclude_netlocs: list = None,
|
|
1788
2059
|
include_companies: list = None,
|
|
1789
2060
|
exclude_companies: list = None,
|
|
1790
2061
|
include_docs: list = None,
|
|
@@ -1809,10 +2080,6 @@ class Nosible:
|
|
|
1809
2080
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
1810
2081
|
exclude_netlocs : list of str, optional
|
|
1811
2082
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
1812
|
-
include_languages : list of str, optional
|
|
1813
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
1814
|
-
exclude_languages : list of str, optional
|
|
1815
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
1816
2083
|
include_companies : list of str, optional
|
|
1817
2084
|
Google KG IDs of public companies to require (Max: 50).
|
|
1818
2085
|
exclude_companies : list of str, optional
|
|
@@ -1845,8 +2112,6 @@ class Nosible:
|
|
|
1845
2112
|
for name, value in [
|
|
1846
2113
|
("include_netlocs", include_netlocs),
|
|
1847
2114
|
("exclude_netlocs", exclude_netlocs),
|
|
1848
|
-
("include_languages", include_languages),
|
|
1849
|
-
("exclude_languages", exclude_languages),
|
|
1850
2115
|
("include_companies", include_companies),
|
|
1851
2116
|
("exclude_companies", exclude_companies),
|
|
1852
2117
|
("include_docs", include_docs),
|
|
@@ -1918,14 +2183,6 @@ class Nosible:
|
|
|
1918
2183
|
f"(company_1 NOT IN ({company_list}) AND company_2 NOT IN ({company_list}) AND company_3 NOT IN ({company_list}))"
|
|
1919
2184
|
)
|
|
1920
2185
|
|
|
1921
|
-
# Include / exclude languages
|
|
1922
|
-
if include_languages:
|
|
1923
|
-
langs = ", ".join(f"'{lang}-{lang}'" for lang in include_languages)
|
|
1924
|
-
clauses.append(f"language IN ({langs})")
|
|
1925
|
-
if exclude_languages:
|
|
1926
|
-
langs = ", ".join(f"'{lang}-{lang}'" for lang in exclude_languages)
|
|
1927
|
-
clauses.append(f"language NOT IN ({langs})")
|
|
1928
|
-
|
|
1929
2186
|
if include_docs:
|
|
1930
2187
|
# Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
|
|
1931
2188
|
doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)
|