nosible 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nosible/classes/result.py +122 -11
- nosible/classes/result_set.py +42 -29
- nosible/classes/search.py +82 -22
- nosible/classes/search_set.py +26 -26
- nosible/classes/snippet.py +2 -2
- nosible/classes/snippet_set.py +2 -2
- nosible/classes/web_page.py +11 -56
- nosible/nosible_client.py +360 -84
- {nosible-0.2.4.dist-info → nosible-0.2.6.dist-info}/METADATA +40 -41
- nosible-0.2.6.dist-info/RECORD +16 -0
- nosible-0.2.4.dist-info/RECORD +0 -16
- {nosible-0.2.4.dist-info → nosible-0.2.6.dist-info}/WHEEL +0 -0
- {nosible-0.2.4.dist-info → nosible-0.2.6.dist-info}/licenses/LICENSE +0 -0
- {nosible-0.2.4.dist-info → nosible-0.2.6.dist-info}/top_level.txt +0 -0
nosible/nosible_client.py
CHANGED
|
@@ -11,6 +11,7 @@ from collections.abc import Iterator
|
|
|
11
11
|
from concurrent.futures import ThreadPoolExecutor
|
|
12
12
|
from datetime import datetime
|
|
13
13
|
from typing import Optional, Union
|
|
14
|
+
import warnings
|
|
14
15
|
|
|
15
16
|
import httpx
|
|
16
17
|
from tenacity import (
|
|
@@ -73,10 +74,6 @@ class Nosible:
|
|
|
73
74
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
74
75
|
exclude_netlocs : list of str, optional
|
|
75
76
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
76
|
-
include_languages : list of str, optional
|
|
77
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
78
|
-
exclude_languages : list of str, optional
|
|
79
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
80
77
|
include_companies : list of str, optional
|
|
81
78
|
Google KG IDs of public companies to require (Max: 50).
|
|
82
79
|
exclude_companies : list of str, optional
|
|
@@ -85,6 +82,34 @@ class Nosible:
|
|
|
85
82
|
URL hashes of docs to include (Max: 50).
|
|
86
83
|
exclude_docs : list of str, optional
|
|
87
84
|
URL hashes of docs to exclude (Max: 50).
|
|
85
|
+
brand_safety : str, optional
|
|
86
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
87
|
+
language : str, optional
|
|
88
|
+
Language code to use in search (ISO 639-1 language code).
|
|
89
|
+
continent : str, optional
|
|
90
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
91
|
+
region : str, optional
|
|
92
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
93
|
+
country : str, optional
|
|
94
|
+
Country the results must come from.
|
|
95
|
+
sector : str, optional
|
|
96
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
97
|
+
industry_group : str, optional
|
|
98
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
99
|
+
industry : str, optional
|
|
100
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
101
|
+
sub_industry : str, optional
|
|
102
|
+
Sub-industry classification of the content's subject.
|
|
103
|
+
iab_tier_1 : str, optional
|
|
104
|
+
IAB Tier 1 category for the content.
|
|
105
|
+
iab_tier_2 : str, optional
|
|
106
|
+
IAB Tier 2 category for the content.
|
|
107
|
+
iab_tier_3 : str, optional
|
|
108
|
+
IAB Tier 3 category for the content.
|
|
109
|
+
iab_tier_4 : str, optional
|
|
110
|
+
IAB Tier 4 category for the content.
|
|
111
|
+
instruction : str, optional
|
|
112
|
+
Instruction to use with the search query.
|
|
88
113
|
|
|
89
114
|
Notes
|
|
90
115
|
-----
|
|
@@ -99,7 +124,7 @@ class Nosible:
|
|
|
99
124
|
--------
|
|
100
125
|
>>> from nosible import Nosible # doctest: +SKIP
|
|
101
126
|
>>> nos = Nosible(nosible_api_key="your_api_key_here") # doctest: +SKIP
|
|
102
|
-
>>> search = nos.
|
|
127
|
+
>>> search = nos.fast_search(question="What is Nosible?", n_results=5) # doctest: +SKIP
|
|
103
128
|
"""
|
|
104
129
|
|
|
105
130
|
def __init__(
|
|
@@ -119,13 +144,38 @@ class Nosible:
|
|
|
119
144
|
visited_start: str = None,
|
|
120
145
|
visited_end: str = None,
|
|
121
146
|
certain: bool = None,
|
|
122
|
-
include_languages: list = None,
|
|
123
|
-
exclude_languages: list = None,
|
|
124
147
|
include_companies: list = None,
|
|
125
148
|
exclude_companies: list = None,
|
|
126
149
|
include_docs: list = None,
|
|
127
150
|
exclude_docs: list = None,
|
|
151
|
+
brand_safety: str = None,
|
|
152
|
+
language: str = None,
|
|
153
|
+
continent: str = None,
|
|
154
|
+
region: str = None,
|
|
155
|
+
country: str = None,
|
|
156
|
+
sector: str = None,
|
|
157
|
+
industry_group: str = None,
|
|
158
|
+
industry: str = None,
|
|
159
|
+
sub_industry: str = None,
|
|
160
|
+
iab_tier_1: str = None,
|
|
161
|
+
iab_tier_2: str = None,
|
|
162
|
+
iab_tier_3: str = None,
|
|
163
|
+
iab_tier_4: str = None,
|
|
164
|
+
instruction: str = None,
|
|
165
|
+
*args, **kwargs
|
|
128
166
|
) -> None:
|
|
167
|
+
|
|
168
|
+
if "include_languages" in kwargs:
|
|
169
|
+
warnings.warn(
|
|
170
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
171
|
+
"Please use the parameter 'language' instead.",
|
|
172
|
+
)
|
|
173
|
+
if "exclude_languages" in kwargs:
|
|
174
|
+
warnings.warn(
|
|
175
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
176
|
+
"Please use the parameter 'language' instead.",
|
|
177
|
+
)
|
|
178
|
+
|
|
129
179
|
# API Keys
|
|
130
180
|
if nosible_api_key is not None:
|
|
131
181
|
self.nosible_api_key = nosible_api_key
|
|
@@ -192,14 +242,26 @@ class Nosible:
|
|
|
192
242
|
self.visited_start = visited_start
|
|
193
243
|
self.visited_end = visited_end
|
|
194
244
|
self.certain = certain
|
|
195
|
-
self.include_languages = include_languages
|
|
196
|
-
self.exclude_languages = exclude_languages
|
|
197
245
|
self.include_companies = include_companies
|
|
198
246
|
self.exclude_companies = exclude_companies
|
|
199
247
|
self.exclude_docs = exclude_docs
|
|
200
248
|
self.include_docs = include_docs
|
|
201
|
-
|
|
202
|
-
|
|
249
|
+
self.brand_safety = brand_safety
|
|
250
|
+
self.language = language
|
|
251
|
+
self.continent = continent
|
|
252
|
+
self.region = region
|
|
253
|
+
self.country = country
|
|
254
|
+
self.sector = sector
|
|
255
|
+
self.industry_group = industry_group
|
|
256
|
+
self.industry = industry
|
|
257
|
+
self.sub_industry = sub_industry
|
|
258
|
+
self.iab_tier_1 = iab_tier_1
|
|
259
|
+
self.iab_tier_2 = iab_tier_2
|
|
260
|
+
self.iab_tier_3 = iab_tier_3
|
|
261
|
+
self.iab_tier_4 = iab_tier_4
|
|
262
|
+
self.instruction = instruction
|
|
263
|
+
|
|
264
|
+
def fast_search(
|
|
203
265
|
self,
|
|
204
266
|
search: Search = None,
|
|
205
267
|
question: str = None,
|
|
@@ -220,12 +282,25 @@ class Nosible:
|
|
|
220
282
|
visited_start: str = None,
|
|
221
283
|
visited_end: str = None,
|
|
222
284
|
certain: bool = None,
|
|
223
|
-
include_languages: list = None,
|
|
224
|
-
exclude_languages: list = None,
|
|
225
285
|
include_companies: list = None,
|
|
226
286
|
exclude_companies: list = None,
|
|
227
287
|
include_docs: list = None,
|
|
228
288
|
exclude_docs: list = None,
|
|
289
|
+
brand_safety: str = None,
|
|
290
|
+
language: str = None,
|
|
291
|
+
continent: str = None,
|
|
292
|
+
region: str = None,
|
|
293
|
+
country: str = None,
|
|
294
|
+
sector: str = None,
|
|
295
|
+
industry_group: str = None,
|
|
296
|
+
industry: str = None,
|
|
297
|
+
sub_industry: str = None,
|
|
298
|
+
iab_tier_1: str = None,
|
|
299
|
+
iab_tier_2: str = None,
|
|
300
|
+
iab_tier_3: str = None,
|
|
301
|
+
iab_tier_4: str = None,
|
|
302
|
+
instruction: str = None,
|
|
303
|
+
*args, **kwargs
|
|
229
304
|
) -> ResultSet:
|
|
230
305
|
"""
|
|
231
306
|
Run a single search query.
|
|
@@ -273,10 +348,6 @@ class Nosible:
|
|
|
273
348
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
274
349
|
exclude_netlocs : list of str, optional
|
|
275
350
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
276
|
-
include_languages : list of str, optional
|
|
277
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
278
|
-
exclude_languages : list of str, optional
|
|
279
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
280
351
|
include_companies : list of str, optional
|
|
281
352
|
Google KG IDs of public companies to require (Max: 50).
|
|
282
353
|
exclude_companies : list of str, optional
|
|
@@ -285,6 +356,34 @@ class Nosible:
|
|
|
285
356
|
URL hashes of docs to include (Max: 50).
|
|
286
357
|
exclude_docs : list of str, optional
|
|
287
358
|
URL hashes of docs to exclude (Max: 50).
|
|
359
|
+
brand_safety : str, optional
|
|
360
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
361
|
+
language : str, optional
|
|
362
|
+
Language code to use in search (ISO 639-1 language code).
|
|
363
|
+
continent : str, optional
|
|
364
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
365
|
+
region : str, optional
|
|
366
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
367
|
+
country : str, optional
|
|
368
|
+
Country the results must come from.
|
|
369
|
+
sector : str, optional
|
|
370
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
371
|
+
industry_group : str, optional
|
|
372
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
373
|
+
industry : str, optional
|
|
374
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
375
|
+
sub_industry : str, optional
|
|
376
|
+
Sub-industry classification of the content's subject.
|
|
377
|
+
iab_tier_1 : str, optional
|
|
378
|
+
IAB Tier 1 category for the content.
|
|
379
|
+
iab_tier_2 : str, optional
|
|
380
|
+
IAB Tier 2 category for the content.
|
|
381
|
+
iab_tier_3 : str, optional
|
|
382
|
+
IAB Tier 3 category for the content.
|
|
383
|
+
iab_tier_4 : str, optional
|
|
384
|
+
IAB Tier 4 category for the content.
|
|
385
|
+
instruction : str, optional
|
|
386
|
+
Instruction to use with the search query.
|
|
288
387
|
|
|
289
388
|
Returns
|
|
290
389
|
-------
|
|
@@ -314,27 +413,38 @@ class Nosible:
|
|
|
314
413
|
>>> from nosible import Nosible
|
|
315
414
|
>>> s = Search(question="Hedge funds seek to expand into private credit", n_results=10)
|
|
316
415
|
>>> with Nosible() as nos:
|
|
317
|
-
... results = nos.
|
|
416
|
+
... results = nos.fast_search(search=s)
|
|
318
417
|
... print(isinstance(results, ResultSet))
|
|
319
418
|
... print(len(results))
|
|
320
419
|
True
|
|
321
420
|
10
|
|
322
421
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
323
|
-
>>> nos.
|
|
422
|
+
>>> nos.fast_search() # doctest: +ELLIPSIS
|
|
324
423
|
Traceback (most recent call last):
|
|
325
424
|
...
|
|
326
425
|
TypeError: Specify exactly one of 'question' or 'search'.
|
|
327
426
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
328
|
-
>>> nos.
|
|
427
|
+
>>> nos.fast_search(question="foo", search=s) # doctest: +ELLIPSIS
|
|
329
428
|
Traceback (most recent call last):
|
|
330
429
|
...
|
|
331
430
|
TypeError: Specify exactly one of 'question' or 'search'.
|
|
332
431
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
333
|
-
>>> nos.
|
|
432
|
+
>>> nos.fast_search(question="foo", n_results=101) # doctest: +ELLIPSIS
|
|
334
433
|
Traceback (most recent call last):
|
|
335
434
|
...
|
|
336
435
|
ValueError: Search can not have more than 100 results - Use bulk search instead.
|
|
337
436
|
"""
|
|
437
|
+
if "include_languages" in kwargs:
|
|
438
|
+
warnings.warn(
|
|
439
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
440
|
+
"Please use the parameter 'language' instead.",
|
|
441
|
+
)
|
|
442
|
+
if "exclude_languages" in kwargs:
|
|
443
|
+
warnings.warn(
|
|
444
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
445
|
+
"Please use the parameter 'language' instead.",
|
|
446
|
+
)
|
|
447
|
+
|
|
338
448
|
if (question is None and search is None) or (question is not None and search is not None):
|
|
339
449
|
raise TypeError("Specify exactly one of 'question' or 'search'.")
|
|
340
450
|
|
|
@@ -357,25 +467,37 @@ class Nosible:
|
|
|
357
467
|
visited_start=visited_start,
|
|
358
468
|
visited_end=visited_end,
|
|
359
469
|
certain=certain,
|
|
360
|
-
include_languages=include_languages,
|
|
361
|
-
exclude_languages=exclude_languages,
|
|
362
470
|
include_companies=include_companies,
|
|
363
471
|
exclude_companies=exclude_companies,
|
|
364
472
|
include_docs=include_docs,
|
|
365
473
|
exclude_docs=exclude_docs,
|
|
474
|
+
brand_safety=brand_safety,
|
|
475
|
+
language=language,
|
|
476
|
+
continent=continent,
|
|
477
|
+
region=region,
|
|
478
|
+
country=country,
|
|
479
|
+
sector=sector,
|
|
480
|
+
industry_group=industry_group,
|
|
481
|
+
industry=industry,
|
|
482
|
+
sub_industry=sub_industry,
|
|
483
|
+
iab_tier_1=iab_tier_1,
|
|
484
|
+
iab_tier_2=iab_tier_2,
|
|
485
|
+
iab_tier_3=iab_tier_3,
|
|
486
|
+
iab_tier_4=iab_tier_4,
|
|
487
|
+
instruction=instruction,
|
|
366
488
|
)
|
|
367
489
|
|
|
368
490
|
future = self._executor.submit(self._search_single, search_obj)
|
|
369
491
|
try:
|
|
370
492
|
return future.result()
|
|
371
493
|
except ValueError:
|
|
372
|
-
# Propagate our own
|
|
494
|
+
# Propagate our own "too many results" error directly.
|
|
373
495
|
raise
|
|
374
496
|
except Exception as e:
|
|
375
497
|
self.logger.warning(f"Search for {search_obj.question!r} failed: {e}")
|
|
376
498
|
raise RuntimeError(f"Search for {search_obj.question!r} failed") from e
|
|
377
499
|
|
|
378
|
-
def
|
|
500
|
+
def fast_searches(
|
|
379
501
|
self,
|
|
380
502
|
*,
|
|
381
503
|
searches: Union[SearchSet, list[Search]] = None,
|
|
@@ -397,12 +519,25 @@ class Nosible:
|
|
|
397
519
|
visited_start: str = None,
|
|
398
520
|
visited_end: str = None,
|
|
399
521
|
certain: bool = None,
|
|
400
|
-
include_languages: list = None,
|
|
401
|
-
exclude_languages: list = None,
|
|
402
522
|
include_companies: list = None,
|
|
403
523
|
exclude_companies: list = None,
|
|
404
524
|
include_docs: list = None,
|
|
405
525
|
exclude_docs: list = None,
|
|
526
|
+
brand_safety: str = None,
|
|
527
|
+
language: str = None,
|
|
528
|
+
continent: str = None,
|
|
529
|
+
region: str = None,
|
|
530
|
+
country: str = None,
|
|
531
|
+
sector: str = None,
|
|
532
|
+
industry_group: str = None,
|
|
533
|
+
industry: str = None,
|
|
534
|
+
sub_industry: str = None,
|
|
535
|
+
iab_tier_1: str = None,
|
|
536
|
+
iab_tier_2: str = None,
|
|
537
|
+
iab_tier_3: str = None,
|
|
538
|
+
iab_tier_4: str = None,
|
|
539
|
+
instruction: str = None,
|
|
540
|
+
**kwargs
|
|
406
541
|
) -> Iterator[ResultSet]:
|
|
407
542
|
"""
|
|
408
543
|
Run multiple searches concurrently and yield results.
|
|
@@ -447,9 +582,6 @@ class Nosible:
|
|
|
447
582
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
448
583
|
exclude_netlocs : list of str, optional
|
|
449
584
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
450
|
-
include_languages : list of str, optional
|
|
451
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
452
|
-
exclude_languages : list of str, optional
|
|
453
585
|
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
454
586
|
include_companies : list of str, optional
|
|
455
587
|
Google KG IDs of public companies to require (Max: 50).
|
|
@@ -459,6 +591,34 @@ class Nosible:
|
|
|
459
591
|
URL hashes of docs to include (Max: 50).
|
|
460
592
|
exclude_docs : list of str, optional
|
|
461
593
|
URL hashes of docs to exclude (Max: 50).
|
|
594
|
+
brand_safety : str, optional
|
|
595
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
596
|
+
language : str, optional
|
|
597
|
+
Language code to use in search (ISO 639-1 language code).
|
|
598
|
+
continent : str, optional
|
|
599
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
600
|
+
region : str, optional
|
|
601
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
602
|
+
country : str, optional
|
|
603
|
+
Country the results must come from.
|
|
604
|
+
sector : str, optional
|
|
605
|
+
GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
606
|
+
industry_group : str, optional
|
|
607
|
+
GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
608
|
+
industry : str, optional
|
|
609
|
+
GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
610
|
+
sub_industry : str, optional
|
|
611
|
+
GICS Sub-industry classification of the content's subject.
|
|
612
|
+
iab_tier_1 : str, optional
|
|
613
|
+
IAB Tier 1 category for the content.
|
|
614
|
+
iab_tier_2 : str, optional
|
|
615
|
+
IAB Tier 2 category for the content.
|
|
616
|
+
iab_tier_3 : str, optional
|
|
617
|
+
IAB Tier 3 category for the content.
|
|
618
|
+
iab_tier_4 : str, optional
|
|
619
|
+
IAB Tier 4 category for the content.
|
|
620
|
+
instruction : str, optional
|
|
621
|
+
Instruction to use with the search query.
|
|
462
622
|
|
|
463
623
|
Returns
|
|
464
624
|
------
|
|
@@ -489,7 +649,7 @@ class Nosible:
|
|
|
489
649
|
... ]
|
|
490
650
|
... )
|
|
491
651
|
>>> with Nosible() as nos:
|
|
492
|
-
... results_list = list(nos.
|
|
652
|
+
... results_list = list(nos.fast_searches(searches=queries))
|
|
493
653
|
>>> print(len(results_list))
|
|
494
654
|
2
|
|
495
655
|
>>> for r in results_list:
|
|
@@ -498,7 +658,7 @@ class Nosible:
|
|
|
498
658
|
True True
|
|
499
659
|
>>> with Nosible() as nos:
|
|
500
660
|
... results_list_str = list(
|
|
501
|
-
... nos.
|
|
661
|
+
... nos.fast_searches(
|
|
502
662
|
... questions=[
|
|
503
663
|
... "What are the terms of the partnership between Microsoft and OpenAI?",
|
|
504
664
|
... "What are the terms of the partnership between Volkswagen and Uber?",
|
|
@@ -506,17 +666,28 @@ class Nosible:
|
|
|
506
666
|
... )
|
|
507
667
|
... )
|
|
508
668
|
>>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +ELLIPSIS
|
|
509
|
-
>>> nos.
|
|
669
|
+
>>> nos.fast_searches() # doctest: +ELLIPSIS
|
|
510
670
|
Traceback (most recent call last):
|
|
511
671
|
...
|
|
512
672
|
TypeError: Specify exactly one of 'questions' or 'searches'.
|
|
513
673
|
>>> from nosible import Nosible
|
|
514
674
|
>>> nos = Nosible(nosible_api_key="test|xyz")
|
|
515
|
-
>>> nos.
|
|
675
|
+
>>> nos.fast_searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
|
|
516
676
|
Traceback (most recent call last):
|
|
517
677
|
...
|
|
518
678
|
TypeError: Specify exactly one of 'questions' or 'searches'.
|
|
519
679
|
"""
|
|
680
|
+
if "include_languages" in kwargs:
|
|
681
|
+
warnings.warn(
|
|
682
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
683
|
+
"Please use the parameter 'language' instead.",
|
|
684
|
+
)
|
|
685
|
+
if "exclude_languages" in kwargs:
|
|
686
|
+
warnings.warn(
|
|
687
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
688
|
+
"Please use the parameter 'language' instead.",
|
|
689
|
+
)
|
|
690
|
+
|
|
520
691
|
if (questions is None and searches is None) or (questions is not None and searches is not None):
|
|
521
692
|
raise TypeError("Specify exactly one of 'questions' or 'searches'.")
|
|
522
693
|
|
|
@@ -543,12 +714,24 @@ class Nosible:
|
|
|
543
714
|
visited_start=visited_start,
|
|
544
715
|
visited_end=visited_end,
|
|
545
716
|
certain=certain,
|
|
546
|
-
include_languages=include_languages,
|
|
547
|
-
exclude_languages=exclude_languages,
|
|
548
717
|
include_companies=include_companies,
|
|
549
718
|
exclude_companies=exclude_companies,
|
|
550
719
|
include_docs=include_docs,
|
|
551
720
|
exclude_docs=exclude_docs,
|
|
721
|
+
brand_safety=brand_safety,
|
|
722
|
+
language=language,
|
|
723
|
+
continent=continent,
|
|
724
|
+
region=region,
|
|
725
|
+
country=country,
|
|
726
|
+
sector=sector,
|
|
727
|
+
industry_group=industry_group,
|
|
728
|
+
industry=industry,
|
|
729
|
+
sub_industry=sub_industry,
|
|
730
|
+
iab_tier_1=iab_tier_1,
|
|
731
|
+
iab_tier_2=iab_tier_2,
|
|
732
|
+
iab_tier_3=iab_tier_3,
|
|
733
|
+
iab_tier_4=iab_tier_4,
|
|
734
|
+
instruction=instruction,
|
|
552
735
|
)
|
|
553
736
|
|
|
554
737
|
futures = [self._executor.submit(self._search_single, s) for s in searches_list]
|
|
@@ -562,6 +745,7 @@ class Nosible:
|
|
|
562
745
|
|
|
563
746
|
return _run_generator()
|
|
564
747
|
|
|
748
|
+
|
|
565
749
|
@_rate_limited("fast")
|
|
566
750
|
def _search_single(self, search_obj: Search) -> ResultSet:
|
|
567
751
|
"""
|
|
@@ -590,7 +774,7 @@ class Nosible:
|
|
|
590
774
|
>>> from nosible import Nosible
|
|
591
775
|
>>> s = Search(question="Nvidia insiders dump more than $1 billion in stock", n_results=200)
|
|
592
776
|
>>> with Nosible() as nos:
|
|
593
|
-
... results = nos.
|
|
777
|
+
... results = nos.fast_search(search=s) # doctest: +ELLIPSIS
|
|
594
778
|
Traceback (most recent call last):
|
|
595
779
|
...
|
|
596
780
|
ValueError: Search can not have more than 100 results - Use bulk search instead.
|
|
@@ -618,18 +802,32 @@ class Nosible:
|
|
|
618
802
|
visited_start = search_obj.visited_start if search_obj.visited_start is not None else self.visited_start
|
|
619
803
|
visited_end = search_obj.visited_end if search_obj.visited_end is not None else self.visited_end
|
|
620
804
|
certain = search_obj.certain if search_obj.certain is not None else self.certain
|
|
621
|
-
include_languages = (
|
|
622
|
-
search_obj.include_languages if search_obj.include_languages is not None else self.include_languages
|
|
623
|
-
)
|
|
624
|
-
exclude_languages = (
|
|
625
|
-
search_obj.exclude_languages if search_obj.exclude_languages is not None else self.exclude_languages
|
|
626
|
-
)
|
|
627
805
|
include_companies = (
|
|
628
806
|
search_obj.include_companies if search_obj.include_companies is not None else self.include_companies
|
|
629
807
|
)
|
|
630
808
|
exclude_companies = (
|
|
631
809
|
search_obj.exclude_companies if search_obj.exclude_companies is not None else self.exclude_companies
|
|
632
810
|
)
|
|
811
|
+
include_docs = search_obj.include_docs if search_obj.include_docs is not None else self.include_docs
|
|
812
|
+
exclude_docs = search_obj.exclude_docs if search_obj.exclude_docs is not None else self.exclude_docs
|
|
813
|
+
brand_safety = search_obj.brand_safety if search_obj.brand_safety is not None else self.brand_safety
|
|
814
|
+
language = search_obj.language if search_obj.language is not None else self.language
|
|
815
|
+
continent = search_obj.continent if search_obj.continent is not None else self.continent
|
|
816
|
+
region = search_obj.region if search_obj.region is not None else self.region
|
|
817
|
+
country = search_obj.country if search_obj.country is not None else self.country
|
|
818
|
+
sector = search_obj.sector if search_obj.sector is not None else self.sector
|
|
819
|
+
industry_group = search_obj.industry_group if search_obj.industry_group is not None else self.industry_group
|
|
820
|
+
industry = search_obj.industry if search_obj.industry is not None else self.industry
|
|
821
|
+
sub_industry = search_obj.sub_industry if search_obj.sub_industry is not None else self.sub_industry
|
|
822
|
+
iab_tier_1 = search_obj.iab_tier_1 if search_obj.iab_tier_1 is not None else self.iab_tier_1
|
|
823
|
+
iab_tier_2 = search_obj.iab_tier_2 if search_obj.iab_tier_2 is not None else self.iab_tier_2
|
|
824
|
+
iab_tier_3 = search_obj.iab_tier_3 if search_obj.iab_tier_3 is not None else self.iab_tier_3
|
|
825
|
+
iab_tier_4 = search_obj.iab_tier_4 if search_obj.iab_tier_4 is not None else self.iab_tier_4
|
|
826
|
+
instruction = search_obj.instruction if search_obj.instruction is not None else self.instruction
|
|
827
|
+
|
|
828
|
+
must_include = must_include if must_include is not None else []
|
|
829
|
+
must_exclude = must_exclude if must_exclude is not None else []
|
|
830
|
+
min_similarity = min_similarity if min_similarity is not None else 0
|
|
633
831
|
|
|
634
832
|
if not (0.0 <= min_similarity <= 1.0):
|
|
635
833
|
raise ValueError(f"Invalid min_simalarity: {min_similarity}. Must be [0,1].")
|
|
@@ -650,10 +848,10 @@ class Nosible:
|
|
|
650
848
|
visited_start=visited_start,
|
|
651
849
|
visited_end=visited_end,
|
|
652
850
|
certain=certain,
|
|
653
|
-
include_languages=include_languages,
|
|
654
|
-
exclude_languages=exclude_languages,
|
|
655
851
|
include_companies=include_companies,
|
|
656
852
|
exclude_companies=exclude_companies,
|
|
853
|
+
include_docs=include_docs,
|
|
854
|
+
exclude_docs=exclude_docs,
|
|
657
855
|
)
|
|
658
856
|
|
|
659
857
|
# Enforce limits
|
|
@@ -674,6 +872,25 @@ class Nosible:
|
|
|
674
872
|
"must_include": must_include,
|
|
675
873
|
"must_exclude": must_exclude,
|
|
676
874
|
}
|
|
875
|
+
optional = {
|
|
876
|
+
"instruction": instruction,
|
|
877
|
+
"brand_safety":brand_safety,
|
|
878
|
+
"language": language,
|
|
879
|
+
"continent": continent,
|
|
880
|
+
"region": region,
|
|
881
|
+
"country": country,
|
|
882
|
+
"sector": sector,
|
|
883
|
+
"industry_group": industry_group,
|
|
884
|
+
"industry": industry,
|
|
885
|
+
"sub_industry": sub_industry,
|
|
886
|
+
"iab_tier_1": iab_tier_1,
|
|
887
|
+
"iab_tier_2": iab_tier_2,
|
|
888
|
+
"iab_tier_3": iab_tier_3,
|
|
889
|
+
"iab_tier_4": iab_tier_4,
|
|
890
|
+
}
|
|
891
|
+
for key, val in optional.items():
|
|
892
|
+
if val is not None:
|
|
893
|
+
payload[key] = val
|
|
677
894
|
|
|
678
895
|
resp = self._post(url="https://www.nosible.ai/search/v1/fast-search", payload=payload)
|
|
679
896
|
resp.raise_for_status()
|
|
@@ -739,18 +956,31 @@ class Nosible:
|
|
|
739
956
|
autogenerate_expansions: bool = False,
|
|
740
957
|
publish_start: str = None,
|
|
741
958
|
publish_end: str = None,
|
|
742
|
-
include_netlocs: list = None,
|
|
743
|
-
exclude_netlocs: list = None,
|
|
744
959
|
visited_start: str = None,
|
|
745
960
|
visited_end: str = None,
|
|
746
961
|
certain: bool = None,
|
|
747
|
-
|
|
748
|
-
|
|
962
|
+
include_netlocs: list = None,
|
|
963
|
+
exclude_netlocs: list = None,
|
|
749
964
|
include_companies: list = None,
|
|
750
965
|
exclude_companies: list = None,
|
|
751
966
|
include_docs: list = None,
|
|
752
967
|
exclude_docs: list = None,
|
|
968
|
+
brand_safety: str = None,
|
|
969
|
+
language: str = None,
|
|
970
|
+
continent: str = None,
|
|
971
|
+
region: str = None,
|
|
972
|
+
country: str = None,
|
|
973
|
+
sector: str = None,
|
|
974
|
+
industry_group: str = None,
|
|
975
|
+
industry: str = None,
|
|
976
|
+
sub_industry: str = None,
|
|
977
|
+
iab_tier_1: str = None,
|
|
978
|
+
iab_tier_2: str = None,
|
|
979
|
+
iab_tier_3: str = None,
|
|
980
|
+
iab_tier_4: str = None,
|
|
981
|
+
instruction: str = None,
|
|
753
982
|
verbose: bool = False,
|
|
983
|
+
**kwargs,
|
|
754
984
|
) -> ResultSet:
|
|
755
985
|
"""
|
|
756
986
|
Perform a bulk (slow) search query (1,000–10,000 results) against the Nosible API.
|
|
@@ -795,10 +1025,6 @@ class Nosible:
|
|
|
795
1025
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
796
1026
|
exclude_netlocs : list of str, optional
|
|
797
1027
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
798
|
-
include_languages : list of str, optional
|
|
799
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
800
|
-
exclude_languages : list of str, optional
|
|
801
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
802
1028
|
include_companies : list of str, optional
|
|
803
1029
|
Google KG IDs of public companies to require (Max: 50).
|
|
804
1030
|
exclude_companies : list of str, optional
|
|
@@ -807,6 +1033,34 @@ class Nosible:
|
|
|
807
1033
|
URL hashes of docs to include (Max: 50).
|
|
808
1034
|
exclude_docs : list of str, optional
|
|
809
1035
|
URL hashes of docs to exclude (Max: 50).
|
|
1036
|
+
brand_safety : str, optional
|
|
1037
|
+
Whether it is safe, sensitive, or unsafe to advertise on this content.
|
|
1038
|
+
language : str, optional
|
|
1039
|
+
Language code to use in search (ISO 639-1 language code).
|
|
1040
|
+
continent : str, optional
|
|
1041
|
+
Continent the results must come from (e.g., "Europe", "Asia").
|
|
1042
|
+
region : str, optional
|
|
1043
|
+
Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
|
|
1044
|
+
country : str, optional
|
|
1045
|
+
Country the results must come from.
|
|
1046
|
+
sector : str, optional
|
|
1047
|
+
Sector the results must relate to (e.g., "Energy", "Information Technology").
|
|
1048
|
+
industry_group : str, optional
|
|
1049
|
+
Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
|
|
1050
|
+
industry : str, optional
|
|
1051
|
+
Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
|
|
1052
|
+
sub_industry : str, optional
|
|
1053
|
+
Sub-industry classification of the content's subject.
|
|
1054
|
+
iab_tier_1 : str, optional
|
|
1055
|
+
IAB Tier 1 category for the content.
|
|
1056
|
+
iab_tier_2 : str, optional
|
|
1057
|
+
IAB Tier 2 category for the content.
|
|
1058
|
+
iab_tier_3 : str, optional
|
|
1059
|
+
IAB Tier 3 category for the content.
|
|
1060
|
+
iab_tier_4 : str, optional
|
|
1061
|
+
IAB Tier 4 category for the content.
|
|
1062
|
+
instruction : str, optional
|
|
1063
|
+
Instruction to use with the search query.
|
|
810
1064
|
verbose : bool, optional
|
|
811
1065
|
Show verbose output, Bulk search will print more information.
|
|
812
1066
|
|
|
@@ -872,6 +1126,17 @@ class Nosible:
|
|
|
872
1126
|
...
|
|
873
1127
|
ValueError: Bulk search cannot have more than 10000 results per query.
|
|
874
1128
|
"""
|
|
1129
|
+
if "include_languages" in kwargs:
|
|
1130
|
+
warnings.warn(
|
|
1131
|
+
"The 'include_languages' parameter is deprecated and will be removed in a future release. "
|
|
1132
|
+
"Please use the parameter 'language' instead.",
|
|
1133
|
+
)
|
|
1134
|
+
if "exclude_languages" in kwargs:
|
|
1135
|
+
warnings.warn(
|
|
1136
|
+
"The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
|
|
1137
|
+
"Please use the parameter 'language' instead.",
|
|
1138
|
+
)
|
|
1139
|
+
|
|
875
1140
|
from cryptography.fernet import Fernet
|
|
876
1141
|
|
|
877
1142
|
previous_level = self.logger.level
|
|
@@ -894,11 +1159,8 @@ class Nosible:
|
|
|
894
1159
|
n_contextify = search.n_contextify if search.n_contextify is not None else n_contextify
|
|
895
1160
|
algorithm = search.algorithm if search.algorithm is not None else algorithm
|
|
896
1161
|
min_similarity = search.min_similarity if search.min_similarity is not None else min_similarity
|
|
897
|
-
min_similarity = min_similarity if min_similarity is not None else 0
|
|
898
1162
|
must_include = search.must_include if search.must_include is not None else must_include
|
|
899
|
-
must_include = must_include if must_include is not None else []
|
|
900
1163
|
must_exclude = search.must_exclude if search.must_exclude is not None else must_exclude
|
|
901
|
-
must_exclude = must_exclude if must_exclude is not None else []
|
|
902
1164
|
autogenerate_expansions = (
|
|
903
1165
|
search.autogenerate_expansions
|
|
904
1166
|
if search.autogenerate_expansions is not None
|
|
@@ -911,12 +1173,24 @@ class Nosible:
|
|
|
911
1173
|
visited_start = search.visited_start if search.visited_start is not None else visited_start
|
|
912
1174
|
visited_end = search.visited_end if search.visited_end is not None else visited_end
|
|
913
1175
|
certain = search.certain if search.certain is not None else certain
|
|
914
|
-
include_languages = search.include_languages if search.include_languages is not None else include_languages
|
|
915
|
-
exclude_languages = search.exclude_languages if search.exclude_languages is not None else exclude_languages
|
|
916
1176
|
include_companies = search.include_companies if search.include_companies is not None else include_companies
|
|
917
1177
|
exclude_companies = search.exclude_companies if search.exclude_companies is not None else exclude_companies
|
|
918
|
-
include_docs = search.include_docs if search.include_docs is not None else include_docs
|
|
919
|
-
exclude_docs = search.exclude_docs if search.exclude_docs is not None else exclude_docs
|
|
1178
|
+
include_docs = search.include_docs if search.include_docs is not None else self.include_docs
|
|
1179
|
+
exclude_docs = search.exclude_docs if search.exclude_docs is not None else self.exclude_docs
|
|
1180
|
+
brand_safety = search.brand_safety if search.brand_safety is not None else self.brand_safety
|
|
1181
|
+
language = search.language if search.language is not None else self.language
|
|
1182
|
+
continent = search.continent if search.continent is not None else self.continent
|
|
1183
|
+
region = search.region if search.region is not None else self.region
|
|
1184
|
+
country = search.country if search.country is not None else self.country
|
|
1185
|
+
sector = search.sector if search.sector is not None else self.sector
|
|
1186
|
+
industry_group = search.industry_group if search.industry_group is not None else self.industry_group
|
|
1187
|
+
industry = search.industry if search.industry is not None else self.industry
|
|
1188
|
+
sub_industry = search.sub_industry if search.sub_industry is not None else self.sub_industry
|
|
1189
|
+
iab_tier_1 = search.iab_tier_1 if search.iab_tier_1 is not None else self.iab_tier_1
|
|
1190
|
+
iab_tier_2 = search.iab_tier_2 if search.iab_tier_2 is not None else self.iab_tier_2
|
|
1191
|
+
iab_tier_3 = search.iab_tier_3 if search.iab_tier_3 is not None else self.iab_tier_3
|
|
1192
|
+
iab_tier_4 = search.iab_tier_4 if search.iab_tier_4 is not None else self.iab_tier_4
|
|
1193
|
+
instruction = search.instruction if search.instruction is not None else self.instruction
|
|
920
1194
|
|
|
921
1195
|
# Default expansions and filters
|
|
922
1196
|
if expansions is None:
|
|
@@ -936,13 +1210,11 @@ class Nosible:
|
|
|
936
1210
|
sql_filter = self._format_sql(
|
|
937
1211
|
publish_start=publish_start if publish_start is not None else self.publish_start,
|
|
938
1212
|
publish_end=publish_end if publish_end is not None else self.publish_end,
|
|
939
|
-
include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
|
|
940
|
-
exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
|
|
941
1213
|
visited_start=visited_start if visited_start is not None else self.visited_start,
|
|
942
1214
|
visited_end=visited_end if visited_end is not None else self.visited_end,
|
|
943
1215
|
certain=certain if certain is not None else self.certain,
|
|
944
|
-
|
|
945
|
-
|
|
1216
|
+
include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
|
|
1217
|
+
exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
|
|
946
1218
|
include_companies=include_companies if include_companies is not None else self.include_companies,
|
|
947
1219
|
exclude_companies=exclude_companies if exclude_companies is not None else self.exclude_companies,
|
|
948
1220
|
include_docs=include_docs if include_docs is not None else self.include_docs,
|
|
@@ -979,6 +1251,26 @@ class Nosible:
|
|
|
979
1251
|
"must_include": must_include,
|
|
980
1252
|
"must_exclude": must_exclude,
|
|
981
1253
|
}
|
|
1254
|
+
optional = {
|
|
1255
|
+
"instruction": instruction,
|
|
1256
|
+
"brand_safety": brand_safety,
|
|
1257
|
+
"language": language,
|
|
1258
|
+
"continent": continent,
|
|
1259
|
+
"region": region,
|
|
1260
|
+
"country": country,
|
|
1261
|
+
"sector": sector,
|
|
1262
|
+
"industry_group": industry_group,
|
|
1263
|
+
"industry": industry,
|
|
1264
|
+
"sub_industry": sub_industry,
|
|
1265
|
+
"iab_tier_1": iab_tier_1,
|
|
1266
|
+
"iab_tier_2": iab_tier_2,
|
|
1267
|
+
"iab_tier_3": iab_tier_3,
|
|
1268
|
+
"iab_tier_4": iab_tier_4,
|
|
1269
|
+
}
|
|
1270
|
+
for key, val in optional.items():
|
|
1271
|
+
if val is not None:
|
|
1272
|
+
payload[key] = val
|
|
1273
|
+
|
|
982
1274
|
resp = self._post(url="https://www.nosible.ai/search/v1/slow-search", payload=payload)
|
|
983
1275
|
try:
|
|
984
1276
|
resp.raise_for_status()
|
|
@@ -1019,7 +1311,7 @@ class Nosible:
|
|
|
1019
1311
|
show_context: bool = True,
|
|
1020
1312
|
) -> str:
|
|
1021
1313
|
"""
|
|
1022
|
-
RAG-style question answering: retrieve top `n_results` via `.
|
|
1314
|
+
RAG-style question answering: retrieve top `n_results` via `.fast_search()`
|
|
1023
1315
|
then answer `query` using those documents as context.
|
|
1024
1316
|
|
|
1025
1317
|
Parameters
|
|
@@ -1068,7 +1360,7 @@ class Nosible:
|
|
|
1068
1360
|
raise ValueError("An LLM API key is required for answer().")
|
|
1069
1361
|
|
|
1070
1362
|
# Retrieve top documents
|
|
1071
|
-
results = self.
|
|
1363
|
+
results = self.fast_search(question=query, n_results=n_results, min_similarity=min_similarity)
|
|
1072
1364
|
|
|
1073
1365
|
# Build RAG context
|
|
1074
1366
|
context = ""
|
|
@@ -1778,13 +2070,11 @@ class Nosible:
|
|
|
1778
2070
|
self,
|
|
1779
2071
|
publish_start: str = None,
|
|
1780
2072
|
publish_end: str = None,
|
|
1781
|
-
include_netlocs: list = None,
|
|
1782
|
-
exclude_netlocs: list = None,
|
|
1783
2073
|
visited_start: str = None,
|
|
1784
2074
|
visited_end: str = None,
|
|
1785
2075
|
certain: bool = None,
|
|
1786
|
-
|
|
1787
|
-
|
|
2076
|
+
include_netlocs: list = None,
|
|
2077
|
+
exclude_netlocs: list = None,
|
|
1788
2078
|
include_companies: list = None,
|
|
1789
2079
|
exclude_companies: list = None,
|
|
1790
2080
|
include_docs: list = None,
|
|
@@ -1809,10 +2099,6 @@ class Nosible:
|
|
|
1809
2099
|
List of netlocs (domains) to include in the search. (Max: 50)
|
|
1810
2100
|
exclude_netlocs : list of str, optional
|
|
1811
2101
|
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
1812
|
-
include_languages : list of str, optional
|
|
1813
|
-
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
1814
|
-
exclude_languages : list of str, optional
|
|
1815
|
-
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
1816
2102
|
include_companies : list of str, optional
|
|
1817
2103
|
Google KG IDs of public companies to require (Max: 50).
|
|
1818
2104
|
exclude_companies : list of str, optional
|
|
@@ -1845,8 +2131,6 @@ class Nosible:
|
|
|
1845
2131
|
for name, value in [
|
|
1846
2132
|
("include_netlocs", include_netlocs),
|
|
1847
2133
|
("exclude_netlocs", exclude_netlocs),
|
|
1848
|
-
("include_languages", include_languages),
|
|
1849
|
-
("exclude_languages", exclude_languages),
|
|
1850
2134
|
("include_companies", include_companies),
|
|
1851
2135
|
("exclude_companies", exclude_companies),
|
|
1852
2136
|
("include_docs", include_docs),
|
|
@@ -1918,14 +2202,6 @@ class Nosible:
|
|
|
1918
2202
|
f"(company_1 NOT IN ({company_list}) AND company_2 NOT IN ({company_list}) AND company_3 NOT IN ({company_list}))"
|
|
1919
2203
|
)
|
|
1920
2204
|
|
|
1921
|
-
# Include / exclude languages
|
|
1922
|
-
if include_languages:
|
|
1923
|
-
langs = ", ".join(f"'{lang}-{lang}'" for lang in include_languages)
|
|
1924
|
-
clauses.append(f"language IN ({langs})")
|
|
1925
|
-
if exclude_languages:
|
|
1926
|
-
langs = ", ".join(f"'{lang}-{lang}'" for lang in exclude_languages)
|
|
1927
|
-
clauses.append(f"language NOT IN ({langs})")
|
|
1928
|
-
|
|
1929
2205
|
if include_docs:
|
|
1930
2206
|
# Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
|
|
1931
2207
|
doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)
|