nosible 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nosible/nosible_client.py CHANGED
@@ -11,6 +11,7 @@ from collections.abc import Iterator
11
11
  from concurrent.futures import ThreadPoolExecutor
12
12
  from datetime import datetime
13
13
  from typing import Optional, Union
14
+ import warnings
14
15
 
15
16
  import httpx
16
17
  from tenacity import (
@@ -73,10 +74,6 @@ class Nosible:
73
74
  List of netlocs (domains) to include in the search. (Max: 50)
74
75
  exclude_netlocs : list of str, optional
75
76
  List of netlocs (domains) to exclude in the search. (Max: 50)
76
- include_languages : list of str, optional
77
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
78
- exclude_languages : list of str, optional
79
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
80
77
  include_companies : list of str, optional
81
78
  Google KG IDs of public companies to require (Max: 50).
82
79
  exclude_companies : list of str, optional
@@ -85,6 +82,32 @@ class Nosible:
85
82
  URL hashes of docs to include (Max: 50).
86
83
  exclude_docs : list of str, optional
87
84
  URL hashes of docs to exclude (Max: 50).
85
+ brand_safety : str, optional
86
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
87
+ language : str, optional
88
+ Language code to use in search (ISO 639-1 language code).
89
+ continent : str, optional
90
+ Continent the results must come from (e.g., "Europe", "Asia").
91
+ region : str, optional
92
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
93
+ country : str, optional
94
+ Country the results must come from.
95
+ sector : str, optional
96
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
97
+ industry_group : str, optional
98
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
99
+ industry : str, optional
100
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
101
+ sub_industry : str, optional
102
+ Sub-industry classification of the content's subject.
103
+ iab_tier_1 : str, optional
104
+ IAB Tier 1 category for the content.
105
+ iab_tier_2 : str, optional
106
+ IAB Tier 2 category for the content.
107
+ iab_tier_3 : str, optional
108
+ IAB Tier 3 category for the content.
109
+ iab_tier_4 : str, optional
110
+ IAB Tier 4 category for the content.
88
111
 
89
112
  Notes
90
113
  -----
@@ -99,7 +122,7 @@ class Nosible:
99
122
  --------
100
123
  >>> from nosible import Nosible # doctest: +SKIP
101
124
  >>> nos = Nosible(nosible_api_key="your_api_key_here") # doctest: +SKIP
102
- >>> search = nos.search(question="What is Nosible?", n_results=5) # doctest: +SKIP
125
+ >>> search = nos.fast_search(question="What is Nosible?", n_results=5) # doctest: +SKIP
103
126
  """
104
127
 
105
128
  def __init__(
@@ -119,13 +142,37 @@ class Nosible:
119
142
  visited_start: str = None,
120
143
  visited_end: str = None,
121
144
  certain: bool = None,
122
- include_languages: list = None,
123
- exclude_languages: list = None,
124
145
  include_companies: list = None,
125
146
  exclude_companies: list = None,
126
147
  include_docs: list = None,
127
148
  exclude_docs: list = None,
149
+ brand_safety: str = None,
150
+ language: str = None,
151
+ continent: str = None,
152
+ region: str = None,
153
+ country: str = None,
154
+ sector: str = None,
155
+ industry_group: str = None,
156
+ industry: str = None,
157
+ sub_industry: str = None,
158
+ iab_tier_1: str = None,
159
+ iab_tier_2: str = None,
160
+ iab_tier_3: str = None,
161
+ iab_tier_4: str = None,
162
+ *args, **kwargs
128
163
  ) -> None:
164
+
165
+ if "include_languages" in kwargs:
166
+ warnings.warn(
167
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
168
+ "Please use the parameter 'language' instead.",
169
+ )
170
+ if "exclude_languages" in kwargs:
171
+ warnings.warn(
172
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
173
+ "Please use the parameter 'language' instead.",
174
+ )
175
+
129
176
  # API Keys
130
177
  if nosible_api_key is not None:
131
178
  self.nosible_api_key = nosible_api_key
@@ -192,14 +239,25 @@ class Nosible:
192
239
  self.visited_start = visited_start
193
240
  self.visited_end = visited_end
194
241
  self.certain = certain
195
- self.include_languages = include_languages
196
- self.exclude_languages = exclude_languages
197
242
  self.include_companies = include_companies
198
243
  self.exclude_companies = exclude_companies
199
244
  self.exclude_docs = exclude_docs
200
245
  self.include_docs = include_docs
201
-
202
- def search(
246
+ self.brand_safety = brand_safety
247
+ self.language = language
248
+ self.continent = continent
249
+ self.region = region
250
+ self.country = country
251
+ self.sector = sector
252
+ self.industry_group = industry_group
253
+ self.industry = industry
254
+ self.sub_industry = sub_industry
255
+ self.iab_tier_1 = iab_tier_1
256
+ self.iab_tier_2 = iab_tier_2
257
+ self.iab_tier_3 = iab_tier_3
258
+ self.iab_tier_4 = iab_tier_4
259
+
260
+ def fast_search(
203
261
  self,
204
262
  search: Search = None,
205
263
  question: str = None,
@@ -208,7 +266,7 @@ class Nosible:
208
266
  n_results: int = 100,
209
267
  n_probes: int = 30,
210
268
  n_contextify: int = 128,
211
- algorithm: str = "hybrid-2",
269
+ algorithm: str = "hybrid-3",
212
270
  min_similarity: float = None,
213
271
  must_include: list[str] = None,
214
272
  must_exclude: list[str] = None,
@@ -220,12 +278,24 @@ class Nosible:
220
278
  visited_start: str = None,
221
279
  visited_end: str = None,
222
280
  certain: bool = None,
223
- include_languages: list = None,
224
- exclude_languages: list = None,
225
281
  include_companies: list = None,
226
282
  exclude_companies: list = None,
227
283
  include_docs: list = None,
228
284
  exclude_docs: list = None,
285
+ brand_safety: str = None,
286
+ language: str = None,
287
+ continent: str = None,
288
+ region: str = None,
289
+ country: str = None,
290
+ sector: str = None,
291
+ industry_group: str = None,
292
+ industry: str = None,
293
+ sub_industry: str = None,
294
+ iab_tier_1: str = None,
295
+ iab_tier_2: str = None,
296
+ iab_tier_3: str = None,
297
+ iab_tier_4: str = None,
298
+ *args, **kwargs
229
299
  ) -> ResultSet:
230
300
  """
231
301
  Run a single search query.
@@ -273,10 +343,6 @@ class Nosible:
273
343
  List of netlocs (domains) to include in the search. (Max: 50)
274
344
  exclude_netlocs : list of str, optional
275
345
  List of netlocs (domains) to exclude in the search. (Max: 50)
276
- include_languages : list of str, optional
277
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
278
- exclude_languages : list of str, optional
279
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
280
346
  include_companies : list of str, optional
281
347
  Google KG IDs of public companies to require (Max: 50).
282
348
  exclude_companies : list of str, optional
@@ -285,6 +351,32 @@ class Nosible:
285
351
  URL hashes of docs to include (Max: 50).
286
352
  exclude_docs : list of str, optional
287
353
  URL hashes of docs to exclude (Max: 50).
354
+ brand_safety : str, optional
355
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
356
+ language : str, optional
357
+ Language code to use in search (ISO 639-1 language code).
358
+ continent : str, optional
359
+ Continent the results must come from (e.g., "Europe", "Asia").
360
+ region : str, optional
361
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
362
+ country : str, optional
363
+ Country the results must come from.
364
+ sector : str, optional
365
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
366
+ industry_group : str, optional
367
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
368
+ industry : str, optional
369
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
370
+ sub_industry : str, optional
371
+ Sub-industry classification of the content's subject.
372
+ iab_tier_1 : str, optional
373
+ IAB Tier 1 category for the content.
374
+ iab_tier_2 : str, optional
375
+ IAB Tier 2 category for the content.
376
+ iab_tier_3 : str, optional
377
+ IAB Tier 3 category for the content.
378
+ iab_tier_4 : str, optional
379
+ IAB Tier 4 category for the content.
288
380
 
289
381
  Returns
290
382
  -------
@@ -314,27 +406,38 @@ class Nosible:
314
406
  >>> from nosible import Nosible
315
407
  >>> s = Search(question="Hedge funds seek to expand into private credit", n_results=10)
316
408
  >>> with Nosible() as nos:
317
- ... results = nos.search(search=s)
409
+ ... results = nos.fast_search(search=s)
318
410
  ... print(isinstance(results, ResultSet))
319
411
  ... print(len(results))
320
412
  True
321
413
  10
322
414
  >>> nos = Nosible(nosible_api_key="test|xyz")
323
- >>> nos.search() # doctest: +ELLIPSIS
415
+ >>> nos.fast_search() # doctest: +ELLIPSIS
324
416
  Traceback (most recent call last):
325
417
  ...
326
418
  TypeError: Specify exactly one of 'question' or 'search'.
327
419
  >>> nos = Nosible(nosible_api_key="test|xyz")
328
- >>> nos.search(question="foo", search=s) # doctest: +ELLIPSIS
420
+ >>> nos.fast_search(question="foo", search=s) # doctest: +ELLIPSIS
329
421
  Traceback (most recent call last):
330
422
  ...
331
423
  TypeError: Specify exactly one of 'question' or 'search'.
332
424
  >>> nos = Nosible(nosible_api_key="test|xyz")
333
- >>> nos.search(question="foo", n_results=101) # doctest: +ELLIPSIS
425
+ >>> nos.fast_search(question="foo", n_results=101) # doctest: +ELLIPSIS
334
426
  Traceback (most recent call last):
335
427
  ...
336
428
  ValueError: Search can not have more than 100 results - Use bulk search instead.
337
429
  """
430
+ if "include_languages" in kwargs:
431
+ warnings.warn(
432
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
433
+ "Please use the parameter 'language' instead.",
434
+ )
435
+ if "exclude_languages" in kwargs:
436
+ warnings.warn(
437
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
438
+ "Please use the parameter 'language' instead.",
439
+ )
440
+
338
441
  if (question is None and search is None) or (question is not None and search is not None):
339
442
  raise TypeError("Specify exactly one of 'question' or 'search'.")
340
443
 
@@ -357,25 +460,36 @@ class Nosible:
357
460
  visited_start=visited_start,
358
461
  visited_end=visited_end,
359
462
  certain=certain,
360
- include_languages=include_languages,
361
- exclude_languages=exclude_languages,
362
463
  include_companies=include_companies,
363
464
  exclude_companies=exclude_companies,
364
465
  include_docs=include_docs,
365
466
  exclude_docs=exclude_docs,
467
+ brand_safety=brand_safety,
468
+ language=language,
469
+ continent=continent,
470
+ region=region,
471
+ country=country,
472
+ sector=sector,
473
+ industry_group=industry_group,
474
+ industry=industry,
475
+ sub_industry=sub_industry,
476
+ iab_tier_1=iab_tier_1,
477
+ iab_tier_2=iab_tier_2,
478
+ iab_tier_3=iab_tier_3,
479
+ iab_tier_4=iab_tier_4,
366
480
  )
367
481
 
368
482
  future = self._executor.submit(self._search_single, search_obj)
369
483
  try:
370
484
  return future.result()
371
485
  except ValueError:
372
- # Propagate our own too many results error directly
486
+ # Propagate our own "too many results" error directly.
373
487
  raise
374
488
  except Exception as e:
375
489
  self.logger.warning(f"Search for {search_obj.question!r} failed: {e}")
376
490
  raise RuntimeError(f"Search for {search_obj.question!r} failed") from e
377
491
 
378
- def searches(
492
+ def fast_searches(
379
493
  self,
380
494
  *,
381
495
  searches: Union[SearchSet, list[Search]] = None,
@@ -385,7 +499,7 @@ class Nosible:
385
499
  n_results: int = 100,
386
500
  n_probes: int = 30,
387
501
  n_contextify: int = 128,
388
- algorithm: str = "hybrid-2",
502
+ algorithm: str = "hybrid-3",
389
503
  min_similarity: float = None,
390
504
  must_include: list[str] = None,
391
505
  must_exclude: list[str] = None,
@@ -397,12 +511,24 @@ class Nosible:
397
511
  visited_start: str = None,
398
512
  visited_end: str = None,
399
513
  certain: bool = None,
400
- include_languages: list = None,
401
- exclude_languages: list = None,
402
514
  include_companies: list = None,
403
515
  exclude_companies: list = None,
404
516
  include_docs: list = None,
405
517
  exclude_docs: list = None,
518
+ brand_safety: str = None,
519
+ language: str = None,
520
+ continent: str = None,
521
+ region: str = None,
522
+ country: str = None,
523
+ sector: str = None,
524
+ industry_group: str = None,
525
+ industry: str = None,
526
+ sub_industry: str = None,
527
+ iab_tier_1: str = None,
528
+ iab_tier_2: str = None,
529
+ iab_tier_3: str = None,
530
+ iab_tier_4: str = None,
531
+ **kwargs
406
532
  ) -> Iterator[ResultSet]:
407
533
  """
408
534
  Run multiple searches concurrently and yield results.
@@ -447,9 +573,6 @@ class Nosible:
447
573
  List of netlocs (domains) to include in the search. (Max: 50)
448
574
  exclude_netlocs : list of str, optional
449
575
  List of netlocs (domains) to exclude in the search. (Max: 50)
450
- include_languages : list of str, optional
451
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
452
- exclude_languages : list of str, optional
453
576
  Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
454
577
  include_companies : list of str, optional
455
578
  Google KG IDs of public companies to require (Max: 50).
@@ -459,6 +582,32 @@ class Nosible:
459
582
  URL hashes of docs to include (Max: 50).
460
583
  exclude_docs : list of str, optional
461
584
  URL hashes of docs to exclude (Max: 50).
585
+ brand_safety : str, optional
586
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
587
+ language : str, optional
588
+ Language code to use in search (ISO 639-1 language code).
589
+ continent : str, optional
590
+ Continent the results must come from (e.g., "Europe", "Asia").
591
+ region : str, optional
592
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
593
+ country : str, optional
594
+ Country the results must come from.
595
+ sector : str, optional
596
+ GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
597
+ industry_group : str, optional
598
+ GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
599
+ industry : str, optional
600
+ GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
601
+ sub_industry : str, optional
602
+ GICS Sub-industry classification of the content's subject.
603
+ iab_tier_1 : str, optional
604
+ IAB Tier 1 category for the content.
605
+ iab_tier_2 : str, optional
606
+ IAB Tier 2 category for the content.
607
+ iab_tier_3 : str, optional
608
+ IAB Tier 3 category for the content.
609
+ iab_tier_4 : str, optional
610
+ IAB Tier 4 category for the content.
462
611
 
463
612
  Returns
464
613
  ------
@@ -489,7 +638,7 @@ class Nosible:
489
638
  ... ]
490
639
  ... )
491
640
  >>> with Nosible() as nos:
492
- ... results_list = list(nos.searches(searches=queries))
641
+ ... results_list = list(nos.fast_searches(searches=queries))
493
642
  >>> print(len(results_list))
494
643
  2
495
644
  >>> for r in results_list:
@@ -498,7 +647,7 @@ class Nosible:
498
647
  True True
499
648
  >>> with Nosible() as nos:
500
649
  ... results_list_str = list(
501
- ... nos.searches(
650
+ ... nos.fast_searches(
502
651
  ... questions=[
503
652
  ... "What are the terms of the partnership between Microsoft and OpenAI?",
504
653
  ... "What are the terms of the partnership between Volkswagen and Uber?",
@@ -506,17 +655,28 @@ class Nosible:
506
655
  ... )
507
656
  ... )
508
657
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +ELLIPSIS
509
- >>> nos.searches() # doctest: +ELLIPSIS
658
+ >>> nos.fast_searches() # doctest: +ELLIPSIS
510
659
  Traceback (most recent call last):
511
660
  ...
512
661
  TypeError: Specify exactly one of 'questions' or 'searches'.
513
662
  >>> from nosible import Nosible
514
663
  >>> nos = Nosible(nosible_api_key="test|xyz")
515
- >>> nos.searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
664
+ >>> nos.fast_searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
516
665
  Traceback (most recent call last):
517
666
  ...
518
667
  TypeError: Specify exactly one of 'questions' or 'searches'.
519
668
  """
669
+ if "include_languages" in kwargs:
670
+ warnings.warn(
671
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
672
+ "Please use the parameter 'language' instead.",
673
+ )
674
+ if "exclude_languages" in kwargs:
675
+ warnings.warn(
676
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
677
+ "Please use the parameter 'language' instead.",
678
+ )
679
+
520
680
  if (questions is None and searches is None) or (questions is not None and searches is not None):
521
681
  raise TypeError("Specify exactly one of 'questions' or 'searches'.")
522
682
 
@@ -543,12 +703,23 @@ class Nosible:
543
703
  visited_start=visited_start,
544
704
  visited_end=visited_end,
545
705
  certain=certain,
546
- include_languages=include_languages,
547
- exclude_languages=exclude_languages,
548
706
  include_companies=include_companies,
549
707
  exclude_companies=exclude_companies,
550
708
  include_docs=include_docs,
551
709
  exclude_docs=exclude_docs,
710
+ brand_safety=brand_safety,
711
+ language=language,
712
+ continent=continent,
713
+ region=region,
714
+ country=country,
715
+ sector=sector,
716
+ industry_group=industry_group,
717
+ industry=industry,
718
+ sub_industry=sub_industry,
719
+ iab_tier_1=iab_tier_1,
720
+ iab_tier_2=iab_tier_2,
721
+ iab_tier_3=iab_tier_3,
722
+ iab_tier_4=iab_tier_4,
552
723
  )
553
724
 
554
725
  futures = [self._executor.submit(self._search_single, s) for s in searches_list]
@@ -562,6 +733,7 @@ class Nosible:
562
733
 
563
734
  return _run_generator()
564
735
 
736
+
565
737
  @_rate_limited("fast")
566
738
  def _search_single(self, search_obj: Search) -> ResultSet:
567
739
  """
@@ -590,7 +762,7 @@ class Nosible:
590
762
  >>> from nosible import Nosible
591
763
  >>> s = Search(question="Nvidia insiders dump more than $1 billion in stock", n_results=200)
592
764
  >>> with Nosible() as nos:
593
- ... results = nos.search(search=s) # doctest: +ELLIPSIS
765
+ ... results = nos.fast_search(search=s) # doctest: +ELLIPSIS
594
766
  Traceback (most recent call last):
595
767
  ...
596
768
  ValueError: Search can not have more than 100 results - Use bulk search instead.
@@ -604,7 +776,7 @@ class Nosible:
604
776
  n_results = search_obj.n_results if search_obj.n_results is not None else 100
605
777
  n_probes = search_obj.n_probes if search_obj.n_probes is not None else 30
606
778
  n_contextify = search_obj.n_contextify if search_obj.n_contextify is not None else 128
607
- algorithm = search_obj.algorithm if search_obj.algorithm is not None else "hybrid-2"
779
+ algorithm = search_obj.algorithm if search_obj.algorithm is not None else "hybrid-3"
608
780
  min_similarity = search_obj.min_similarity if search_obj.min_similarity is not None else 0
609
781
  must_include = search_obj.must_include if search_obj.must_include is not None else []
610
782
  must_exclude = search_obj.must_exclude if search_obj.must_exclude is not None else []
@@ -618,18 +790,31 @@ class Nosible:
618
790
  visited_start = search_obj.visited_start if search_obj.visited_start is not None else self.visited_start
619
791
  visited_end = search_obj.visited_end if search_obj.visited_end is not None else self.visited_end
620
792
  certain = search_obj.certain if search_obj.certain is not None else self.certain
621
- include_languages = (
622
- search_obj.include_languages if search_obj.include_languages is not None else self.include_languages
623
- )
624
- exclude_languages = (
625
- search_obj.exclude_languages if search_obj.exclude_languages is not None else self.exclude_languages
626
- )
627
793
  include_companies = (
628
794
  search_obj.include_companies if search_obj.include_companies is not None else self.include_companies
629
795
  )
630
796
  exclude_companies = (
631
797
  search_obj.exclude_companies if search_obj.exclude_companies is not None else self.exclude_companies
632
798
  )
799
+ include_docs = search_obj.include_docs if search_obj.include_docs is not None else self.include_docs
800
+ exclude_docs = search_obj.exclude_docs if search_obj.exclude_docs is not None else self.exclude_docs
801
+ brand_safety = search_obj.brand_safety if search_obj.brand_safety is not None else self.brand_safety
802
+ language = search_obj.language if search_obj.language is not None else self.language
803
+ continent = search_obj.continent if search_obj.continent is not None else self.continent
804
+ region = search_obj.region if search_obj.region is not None else self.region
805
+ country = search_obj.country if search_obj.country is not None else self.country
806
+ sector = search_obj.sector if search_obj.sector is not None else self.sector
807
+ industry_group = search_obj.industry_group if search_obj.industry_group is not None else self.industry_group
808
+ industry = search_obj.industry if search_obj.industry is not None else self.industry
809
+ sub_industry = search_obj.sub_industry if search_obj.sub_industry is not None else self.sub_industry
810
+ iab_tier_1 = search_obj.iab_tier_1 if search_obj.iab_tier_1 is not None else self.iab_tier_1
811
+ iab_tier_2 = search_obj.iab_tier_2 if search_obj.iab_tier_2 is not None else self.iab_tier_2
812
+ iab_tier_3 = search_obj.iab_tier_3 if search_obj.iab_tier_3 is not None else self.iab_tier_3
813
+ iab_tier_4 = search_obj.iab_tier_4 if search_obj.iab_tier_4 is not None else self.iab_tier_4
814
+
815
+ must_include = must_include if must_include is not None else []
816
+ must_exclude = must_exclude if must_exclude is not None else []
817
+ min_similarity = min_similarity if min_similarity is not None else 0
633
818
 
634
819
  if not (0.0 <= min_similarity <= 1.0):
635
820
  raise ValueError(f"Invalid min_simalarity: {min_similarity}. Must be [0,1].")
@@ -650,10 +835,10 @@ class Nosible:
650
835
  visited_start=visited_start,
651
836
  visited_end=visited_end,
652
837
  certain=certain,
653
- include_languages=include_languages,
654
- exclude_languages=exclude_languages,
655
838
  include_companies=include_companies,
656
839
  exclude_companies=exclude_companies,
840
+ include_docs=include_docs,
841
+ exclude_docs=exclude_docs,
657
842
  )
658
843
 
659
844
  # Enforce limits
@@ -674,6 +859,24 @@ class Nosible:
674
859
  "must_include": must_include,
675
860
  "must_exclude": must_exclude,
676
861
  }
862
+ optional = {
863
+ "brand_safety":brand_safety,
864
+ "language": language,
865
+ "continent": continent,
866
+ "region": region,
867
+ "country": country,
868
+ "sector": sector,
869
+ "industry_group": industry_group,
870
+ "industry": industry,
871
+ "sub_industry": sub_industry,
872
+ "iab_tier_1": iab_tier_1,
873
+ "iab_tier_2": iab_tier_2,
874
+ "iab_tier_3": iab_tier_3,
875
+ "iab_tier_4": iab_tier_4,
876
+ }
877
+ for key, val in optional.items():
878
+ if val is not None:
879
+ payload[key] = val
677
880
 
678
881
  resp = self._post(url="https://www.nosible.ai/search/v1/fast-search", payload=payload)
679
882
  resp.raise_for_status()
@@ -732,25 +935,37 @@ class Nosible:
732
935
  n_results: int = 1000,
733
936
  n_probes: int = 30,
734
937
  n_contextify: int = 128,
735
- algorithm: str = "hybrid-2",
938
+ algorithm: str = "hybrid-3",
736
939
  min_similarity: float = None,
737
940
  must_include: list[str] = None,
738
941
  must_exclude: list[str] = None,
739
942
  autogenerate_expansions: bool = False,
740
943
  publish_start: str = None,
741
944
  publish_end: str = None,
742
- include_netlocs: list = None,
743
- exclude_netlocs: list = None,
744
945
  visited_start: str = None,
745
946
  visited_end: str = None,
746
947
  certain: bool = None,
747
- include_languages: list = None,
748
- exclude_languages: list = None,
948
+ include_netlocs: list = None,
949
+ exclude_netlocs: list = None,
749
950
  include_companies: list = None,
750
951
  exclude_companies: list = None,
751
952
  include_docs: list = None,
752
953
  exclude_docs: list = None,
954
+ brand_safety: str = None,
955
+ language: str = None,
956
+ continent: str = None,
957
+ region: str = None,
958
+ country: str = None,
959
+ sector: str = None,
960
+ industry_group: str = None,
961
+ industry: str = None,
962
+ sub_industry: str = None,
963
+ iab_tier_1: str = None,
964
+ iab_tier_2: str = None,
965
+ iab_tier_3: str = None,
966
+ iab_tier_4: str = None,
753
967
  verbose: bool = False,
968
+ **kwargs,
754
969
  ) -> ResultSet:
755
970
  """
756
971
  Perform a bulk (slow) search query (1,000–10,000 results) against the Nosible API.
@@ -795,10 +1010,6 @@ class Nosible:
795
1010
  List of netlocs (domains) to include in the search. (Max: 50)
796
1011
  exclude_netlocs : list of str, optional
797
1012
  List of netlocs (domains) to exclude in the search. (Max: 50)
798
- include_languages : list of str, optional
799
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
800
- exclude_languages : list of str, optional
801
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
802
1013
  include_companies : list of str, optional
803
1014
  Google KG IDs of public companies to require (Max: 50).
804
1015
  exclude_companies : list of str, optional
@@ -807,6 +1018,32 @@ class Nosible:
807
1018
  URL hashes of docs to include (Max: 50).
808
1019
  exclude_docs : list of str, optional
809
1020
  URL hashes of docs to exclude (Max: 50).
1021
+ brand_safety : str, optional
1022
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
1023
+ language : str, optional
1024
+ Language code to use in search (ISO 639-1 language code).
1025
+ continent : str, optional
1026
+ Continent the results must come from (e.g., "Europe", "Asia").
1027
+ region : str, optional
1028
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
1029
+ country : str, optional
1030
+ Country the results must come from.
1031
+ sector : str, optional
1032
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
1033
+ industry_group : str, optional
1034
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
1035
+ industry : str, optional
1036
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
1037
+ sub_industry : str, optional
1038
+ Sub-industry classification of the content's subject.
1039
+ iab_tier_1 : str, optional
1040
+ IAB Tier 1 category for the content.
1041
+ iab_tier_2 : str, optional
1042
+ IAB Tier 2 category for the content.
1043
+ iab_tier_3 : str, optional
1044
+ IAB Tier 3 category for the content.
1045
+ iab_tier_4 : str, optional
1046
+ IAB Tier 4 category for the content.
810
1047
  verbose : bool, optional
811
1048
  Show verbose output, Bulk search will print more information.
812
1049
 
@@ -872,6 +1109,17 @@ class Nosible:
872
1109
  ...
873
1110
  ValueError: Bulk search cannot have more than 10000 results per query.
874
1111
  """
1112
+ if "include_languages" in kwargs:
1113
+ warnings.warn(
1114
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
1115
+ "Please use the parameter 'language' instead.",
1116
+ )
1117
+ if "exclude_languages" in kwargs:
1118
+ warnings.warn(
1119
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
1120
+ "Please use the parameter 'language' instead.",
1121
+ )
1122
+
875
1123
  from cryptography.fernet import Fernet
876
1124
 
877
1125
  previous_level = self.logger.level
@@ -894,11 +1142,8 @@ class Nosible:
894
1142
  n_contextify = search.n_contextify if search.n_contextify is not None else n_contextify
895
1143
  algorithm = search.algorithm if search.algorithm is not None else algorithm
896
1144
  min_similarity = search.min_similarity if search.min_similarity is not None else min_similarity
897
- min_similarity = min_similarity if min_similarity is not None else 0
898
1145
  must_include = search.must_include if search.must_include is not None else must_include
899
- must_include = must_include if must_include is not None else []
900
1146
  must_exclude = search.must_exclude if search.must_exclude is not None else must_exclude
901
- must_exclude = must_exclude if must_exclude is not None else []
902
1147
  autogenerate_expansions = (
903
1148
  search.autogenerate_expansions
904
1149
  if search.autogenerate_expansions is not None
@@ -911,12 +1156,23 @@ class Nosible:
911
1156
  visited_start = search.visited_start if search.visited_start is not None else visited_start
912
1157
  visited_end = search.visited_end if search.visited_end is not None else visited_end
913
1158
  certain = search.certain if search.certain is not None else certain
914
- include_languages = search.include_languages if search.include_languages is not None else include_languages
915
- exclude_languages = search.exclude_languages if search.exclude_languages is not None else exclude_languages
916
1159
  include_companies = search.include_companies if search.include_companies is not None else include_companies
917
1160
  exclude_companies = search.exclude_companies if search.exclude_companies is not None else exclude_companies
918
- include_docs = search.include_docs if search.include_docs is not None else include_docs
919
- exclude_docs = search.exclude_docs if search.exclude_docs is not None else exclude_docs
1161
+ include_docs = search.include_docs if search.include_docs is not None else self.include_docs
1162
+ exclude_docs = search.exclude_docs if search.exclude_docs is not None else self.exclude_docs
1163
+ brand_safety = search.brand_safety if search.brand_safety is not None else self.brand_safety
1164
+ language = search.language if search.language is not None else self.language
1165
+ continent = search.continent if search.continent is not None else self.continent
1166
+ region = search.region if search.region is not None else self.region
1167
+ country = search.country if search.country is not None else self.country
1168
+ sector = search.sector if search.sector is not None else self.sector
1169
+ industry_group = search.industry_group if search.industry_group is not None else self.industry_group
1170
+ industry = search.industry if search.industry is not None else self.industry
1171
+ sub_industry = search.sub_industry if search.sub_industry is not None else self.sub_industry
1172
+ iab_tier_1 = search.iab_tier_1 if search.iab_tier_1 is not None else self.iab_tier_1
1173
+ iab_tier_2 = search.iab_tier_2 if search.iab_tier_2 is not None else self.iab_tier_2
1174
+ iab_tier_3 = search.iab_tier_3 if search.iab_tier_3 is not None else self.iab_tier_3
1175
+ iab_tier_4 = search.iab_tier_4 if search.iab_tier_4 is not None else self.iab_tier_4
920
1176
 
921
1177
  # Default expansions and filters
922
1178
  if expansions is None:
@@ -936,13 +1192,11 @@ class Nosible:
936
1192
  sql_filter = self._format_sql(
937
1193
  publish_start=publish_start if publish_start is not None else self.publish_start,
938
1194
  publish_end=publish_end if publish_end is not None else self.publish_end,
939
- include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
940
- exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
941
1195
  visited_start=visited_start if visited_start is not None else self.visited_start,
942
1196
  visited_end=visited_end if visited_end is not None else self.visited_end,
943
1197
  certain=certain if certain is not None else self.certain,
944
- include_languages=include_languages if include_languages is not None else self.include_languages,
945
- exclude_languages=exclude_languages if exclude_languages is not None else self.exclude_languages,
1198
+ include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
1199
+ exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
946
1200
  include_companies=include_companies if include_companies is not None else self.include_companies,
947
1201
  exclude_companies=exclude_companies if exclude_companies is not None else self.exclude_companies,
948
1202
  include_docs=include_docs if include_docs is not None else self.include_docs,
@@ -979,6 +1233,25 @@ class Nosible:
979
1233
  "must_include": must_include,
980
1234
  "must_exclude": must_exclude,
981
1235
  }
1236
+ optional = {
1237
+ "brand_safety": brand_safety,
1238
+ "language": language,
1239
+ "continent": continent,
1240
+ "region": region,
1241
+ "country": country,
1242
+ "sector": sector,
1243
+ "industry_group": industry_group,
1244
+ "industry": industry,
1245
+ "sub_industry": sub_industry,
1246
+ "iab_tier_1": iab_tier_1,
1247
+ "iab_tier_2": iab_tier_2,
1248
+ "iab_tier_3": iab_tier_3,
1249
+ "iab_tier_4": iab_tier_4,
1250
+ }
1251
+ for key, val in optional.items():
1252
+ if val is not None:
1253
+ payload[key] = val
1254
+
982
1255
  resp = self._post(url="https://www.nosible.ai/search/v1/slow-search", payload=payload)
983
1256
  try:
984
1257
  resp.raise_for_status()
@@ -1019,7 +1292,7 @@ class Nosible:
1019
1292
  show_context: bool = True,
1020
1293
  ) -> str:
1021
1294
  """
1022
- RAG-style question answering: retrieve top `n_results` via `.search()`
1295
+ RAG-style question answering: retrieve top `n_results` via `.fast_search()`
1023
1296
  then answer `query` using those documents as context.
1024
1297
 
1025
1298
  Parameters
@@ -1068,7 +1341,7 @@ class Nosible:
1068
1341
  raise ValueError("An LLM API key is required for answer().")
1069
1342
 
1070
1343
  # Retrieve top documents
1071
- results = self.search(question=query, n_results=n_results, min_similarity=min_similarity)
1344
+ results = self.fast_search(question=query, n_results=n_results, min_similarity=min_similarity)
1072
1345
 
1073
1346
  # Build RAG context
1074
1347
  context = ""
@@ -1778,13 +2051,11 @@ class Nosible:
1778
2051
  self,
1779
2052
  publish_start: str = None,
1780
2053
  publish_end: str = None,
1781
- include_netlocs: list = None,
1782
- exclude_netlocs: list = None,
1783
2054
  visited_start: str = None,
1784
2055
  visited_end: str = None,
1785
2056
  certain: bool = None,
1786
- include_languages: list = None,
1787
- exclude_languages: list = None,
2057
+ include_netlocs: list = None,
2058
+ exclude_netlocs: list = None,
1788
2059
  include_companies: list = None,
1789
2060
  exclude_companies: list = None,
1790
2061
  include_docs: list = None,
@@ -1809,10 +2080,6 @@ class Nosible:
1809
2080
  List of netlocs (domains) to include in the search. (Max: 50)
1810
2081
  exclude_netlocs : list of str, optional
1811
2082
  List of netlocs (domains) to exclude in the search. (Max: 50)
1812
- include_languages : list of str, optional
1813
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
1814
- exclude_languages : list of str, optional
1815
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
1816
2083
  include_companies : list of str, optional
1817
2084
  Google KG IDs of public companies to require (Max: 50).
1818
2085
  exclude_companies : list of str, optional
@@ -1845,8 +2112,6 @@ class Nosible:
1845
2112
  for name, value in [
1846
2113
  ("include_netlocs", include_netlocs),
1847
2114
  ("exclude_netlocs", exclude_netlocs),
1848
- ("include_languages", include_languages),
1849
- ("exclude_languages", exclude_languages),
1850
2115
  ("include_companies", include_companies),
1851
2116
  ("exclude_companies", exclude_companies),
1852
2117
  ("include_docs", include_docs),
@@ -1918,14 +2183,6 @@ class Nosible:
1918
2183
  f"(company_1 NOT IN ({company_list}) AND company_2 NOT IN ({company_list}) AND company_3 NOT IN ({company_list}))"
1919
2184
  )
1920
2185
 
1921
- # Include / exclude languages
1922
- if include_languages:
1923
- langs = ", ".join(f"'{lang}-{lang}'" for lang in include_languages)
1924
- clauses.append(f"language IN ({langs})")
1925
- if exclude_languages:
1926
- langs = ", ".join(f"'{lang}-{lang}'" for lang in exclude_languages)
1927
- clauses.append(f"language NOT IN ({langs})")
1928
-
1929
2186
  if include_docs:
1930
2187
  # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
1931
2188
  doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)