nosible 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nosible/nosible_client.py CHANGED
@@ -11,6 +11,7 @@ from collections.abc import Iterator
11
11
  from concurrent.futures import ThreadPoolExecutor
12
12
  from datetime import datetime
13
13
  from typing import Optional, Union
14
+ import warnings
14
15
 
15
16
  import httpx
16
17
  from tenacity import (
@@ -73,10 +74,6 @@ class Nosible:
73
74
  List of netlocs (domains) to include in the search. (Max: 50)
74
75
  exclude_netlocs : list of str, optional
75
76
  List of netlocs (domains) to exclude in the search. (Max: 50)
76
- include_languages : list of str, optional
77
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
78
- exclude_languages : list of str, optional
79
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
80
77
  include_companies : list of str, optional
81
78
  Google KG IDs of public companies to require (Max: 50).
82
79
  exclude_companies : list of str, optional
@@ -85,6 +82,34 @@ class Nosible:
85
82
  URL hashes of docs to include (Max: 50).
86
83
  exclude_docs : list of str, optional
87
84
  URL hashes of docs to exclude (Max: 50).
85
+ brand_safety : str, optional
86
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
87
+ language : str, optional
88
+ Language code to use in search (ISO 639-1 language code).
89
+ continent : str, optional
90
+ Continent the results must come from (e.g., "Europe", "Asia").
91
+ region : str, optional
92
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
93
+ country : str, optional
94
+ Country the results must come from.
95
+ sector : str, optional
96
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
97
+ industry_group : str, optional
98
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
99
+ industry : str, optional
100
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
101
+ sub_industry : str, optional
102
+ Sub-industry classification of the content's subject.
103
+ iab_tier_1 : str, optional
104
+ IAB Tier 1 category for the content.
105
+ iab_tier_2 : str, optional
106
+ IAB Tier 2 category for the content.
107
+ iab_tier_3 : str, optional
108
+ IAB Tier 3 category for the content.
109
+ iab_tier_4 : str, optional
110
+ IAB Tier 4 category for the content.
111
+ instruction : str, optional
112
+ Instruction to use with the search query.
88
113
 
89
114
  Notes
90
115
  -----
@@ -99,7 +124,7 @@ class Nosible:
99
124
  --------
100
125
  >>> from nosible import Nosible # doctest: +SKIP
101
126
  >>> nos = Nosible(nosible_api_key="your_api_key_here") # doctest: +SKIP
102
- >>> search = nos.search(question="What is Nosible?", n_results=5) # doctest: +SKIP
127
+ >>> search = nos.fast_search(question="What is Nosible?", n_results=5) # doctest: +SKIP
103
128
  """
104
129
 
105
130
  def __init__(
@@ -119,13 +144,38 @@ class Nosible:
119
144
  visited_start: str = None,
120
145
  visited_end: str = None,
121
146
  certain: bool = None,
122
- include_languages: list = None,
123
- exclude_languages: list = None,
124
147
  include_companies: list = None,
125
148
  exclude_companies: list = None,
126
149
  include_docs: list = None,
127
150
  exclude_docs: list = None,
151
+ brand_safety: str = None,
152
+ language: str = None,
153
+ continent: str = None,
154
+ region: str = None,
155
+ country: str = None,
156
+ sector: str = None,
157
+ industry_group: str = None,
158
+ industry: str = None,
159
+ sub_industry: str = None,
160
+ iab_tier_1: str = None,
161
+ iab_tier_2: str = None,
162
+ iab_tier_3: str = None,
163
+ iab_tier_4: str = None,
164
+ instruction: str = None,
165
+ *args, **kwargs
128
166
  ) -> None:
167
+
168
+ if "include_languages" in kwargs:
169
+ warnings.warn(
170
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
171
+ "Please use the parameter 'language' instead.",
172
+ )
173
+ if "exclude_languages" in kwargs:
174
+ warnings.warn(
175
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
176
+ "Please use the parameter 'language' instead.",
177
+ )
178
+
129
179
  # API Keys
130
180
  if nosible_api_key is not None:
131
181
  self.nosible_api_key = nosible_api_key
@@ -192,14 +242,26 @@ class Nosible:
192
242
  self.visited_start = visited_start
193
243
  self.visited_end = visited_end
194
244
  self.certain = certain
195
- self.include_languages = include_languages
196
- self.exclude_languages = exclude_languages
197
245
  self.include_companies = include_companies
198
246
  self.exclude_companies = exclude_companies
199
247
  self.exclude_docs = exclude_docs
200
248
  self.include_docs = include_docs
201
-
202
- def search(
249
+ self.brand_safety = brand_safety
250
+ self.language = language
251
+ self.continent = continent
252
+ self.region = region
253
+ self.country = country
254
+ self.sector = sector
255
+ self.industry_group = industry_group
256
+ self.industry = industry
257
+ self.sub_industry = sub_industry
258
+ self.iab_tier_1 = iab_tier_1
259
+ self.iab_tier_2 = iab_tier_2
260
+ self.iab_tier_3 = iab_tier_3
261
+ self.iab_tier_4 = iab_tier_4
262
+ self.instruction = instruction
263
+
264
+ def fast_search(
203
265
  self,
204
266
  search: Search = None,
205
267
  question: str = None,
@@ -220,12 +282,25 @@ class Nosible:
220
282
  visited_start: str = None,
221
283
  visited_end: str = None,
222
284
  certain: bool = None,
223
- include_languages: list = None,
224
- exclude_languages: list = None,
225
285
  include_companies: list = None,
226
286
  exclude_companies: list = None,
227
287
  include_docs: list = None,
228
288
  exclude_docs: list = None,
289
+ brand_safety: str = None,
290
+ language: str = None,
291
+ continent: str = None,
292
+ region: str = None,
293
+ country: str = None,
294
+ sector: str = None,
295
+ industry_group: str = None,
296
+ industry: str = None,
297
+ sub_industry: str = None,
298
+ iab_tier_1: str = None,
299
+ iab_tier_2: str = None,
300
+ iab_tier_3: str = None,
301
+ iab_tier_4: str = None,
302
+ instruction: str = None,
303
+ *args, **kwargs
229
304
  ) -> ResultSet:
230
305
  """
231
306
  Run a single search query.
@@ -273,10 +348,6 @@ class Nosible:
273
348
  List of netlocs (domains) to include in the search. (Max: 50)
274
349
  exclude_netlocs : list of str, optional
275
350
  List of netlocs (domains) to exclude in the search. (Max: 50)
276
- include_languages : list of str, optional
277
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
278
- exclude_languages : list of str, optional
279
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
280
351
  include_companies : list of str, optional
281
352
  Google KG IDs of public companies to require (Max: 50).
282
353
  exclude_companies : list of str, optional
@@ -285,6 +356,34 @@ class Nosible:
285
356
  URL hashes of docs to include (Max: 50).
286
357
  exclude_docs : list of str, optional
287
358
  URL hashes of docs to exclude (Max: 50).
359
+ brand_safety : str, optional
360
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
361
+ language : str, optional
362
+ Language code to use in search (ISO 639-1 language code).
363
+ continent : str, optional
364
+ Continent the results must come from (e.g., "Europe", "Asia").
365
+ region : str, optional
366
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
367
+ country : str, optional
368
+ Country the results must come from.
369
+ sector : str, optional
370
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
371
+ industry_group : str, optional
372
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
373
+ industry : str, optional
374
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
375
+ sub_industry : str, optional
376
+ Sub-industry classification of the content's subject.
377
+ iab_tier_1 : str, optional
378
+ IAB Tier 1 category for the content.
379
+ iab_tier_2 : str, optional
380
+ IAB Tier 2 category for the content.
381
+ iab_tier_3 : str, optional
382
+ IAB Tier 3 category for the content.
383
+ iab_tier_4 : str, optional
384
+ IAB Tier 4 category for the content.
385
+ instruction : str, optional
386
+ Instruction to use with the search query.
288
387
 
289
388
  Returns
290
389
  -------
@@ -314,27 +413,38 @@ class Nosible:
314
413
  >>> from nosible import Nosible
315
414
  >>> s = Search(question="Hedge funds seek to expand into private credit", n_results=10)
316
415
  >>> with Nosible() as nos:
317
- ... results = nos.search(search=s)
416
+ ... results = nos.fast_search(search=s)
318
417
  ... print(isinstance(results, ResultSet))
319
418
  ... print(len(results))
320
419
  True
321
420
  10
322
421
  >>> nos = Nosible(nosible_api_key="test|xyz")
323
- >>> nos.search() # doctest: +ELLIPSIS
422
+ >>> nos.fast_search() # doctest: +ELLIPSIS
324
423
  Traceback (most recent call last):
325
424
  ...
326
425
  TypeError: Specify exactly one of 'question' or 'search'.
327
426
  >>> nos = Nosible(nosible_api_key="test|xyz")
328
- >>> nos.search(question="foo", search=s) # doctest: +ELLIPSIS
427
+ >>> nos.fast_search(question="foo", search=s) # doctest: +ELLIPSIS
329
428
  Traceback (most recent call last):
330
429
  ...
331
430
  TypeError: Specify exactly one of 'question' or 'search'.
332
431
  >>> nos = Nosible(nosible_api_key="test|xyz")
333
- >>> nos.search(question="foo", n_results=101) # doctest: +ELLIPSIS
432
+ >>> nos.fast_search(question="foo", n_results=101) # doctest: +ELLIPSIS
334
433
  Traceback (most recent call last):
335
434
  ...
336
435
  ValueError: Search can not have more than 100 results - Use bulk search instead.
337
436
  """
437
+ if "include_languages" in kwargs:
438
+ warnings.warn(
439
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
440
+ "Please use the parameter 'language' instead.",
441
+ )
442
+ if "exclude_languages" in kwargs:
443
+ warnings.warn(
444
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
445
+ "Please use the parameter 'language' instead.",
446
+ )
447
+
338
448
  if (question is None and search is None) or (question is not None and search is not None):
339
449
  raise TypeError("Specify exactly one of 'question' or 'search'.")
340
450
 
@@ -357,25 +467,37 @@ class Nosible:
357
467
  visited_start=visited_start,
358
468
  visited_end=visited_end,
359
469
  certain=certain,
360
- include_languages=include_languages,
361
- exclude_languages=exclude_languages,
362
470
  include_companies=include_companies,
363
471
  exclude_companies=exclude_companies,
364
472
  include_docs=include_docs,
365
473
  exclude_docs=exclude_docs,
474
+ brand_safety=brand_safety,
475
+ language=language,
476
+ continent=continent,
477
+ region=region,
478
+ country=country,
479
+ sector=sector,
480
+ industry_group=industry_group,
481
+ industry=industry,
482
+ sub_industry=sub_industry,
483
+ iab_tier_1=iab_tier_1,
484
+ iab_tier_2=iab_tier_2,
485
+ iab_tier_3=iab_tier_3,
486
+ iab_tier_4=iab_tier_4,
487
+ instruction=instruction,
366
488
  )
367
489
 
368
490
  future = self._executor.submit(self._search_single, search_obj)
369
491
  try:
370
492
  return future.result()
371
493
  except ValueError:
372
- # Propagate our own too many results error directly
494
+ # Propagate our own "too many results" error directly.
373
495
  raise
374
496
  except Exception as e:
375
497
  self.logger.warning(f"Search for {search_obj.question!r} failed: {e}")
376
498
  raise RuntimeError(f"Search for {search_obj.question!r} failed") from e
377
499
 
378
- def searches(
500
+ def fast_searches(
379
501
  self,
380
502
  *,
381
503
  searches: Union[SearchSet, list[Search]] = None,
@@ -397,12 +519,25 @@ class Nosible:
397
519
  visited_start: str = None,
398
520
  visited_end: str = None,
399
521
  certain: bool = None,
400
- include_languages: list = None,
401
- exclude_languages: list = None,
402
522
  include_companies: list = None,
403
523
  exclude_companies: list = None,
404
524
  include_docs: list = None,
405
525
  exclude_docs: list = None,
526
+ brand_safety: str = None,
527
+ language: str = None,
528
+ continent: str = None,
529
+ region: str = None,
530
+ country: str = None,
531
+ sector: str = None,
532
+ industry_group: str = None,
533
+ industry: str = None,
534
+ sub_industry: str = None,
535
+ iab_tier_1: str = None,
536
+ iab_tier_2: str = None,
537
+ iab_tier_3: str = None,
538
+ iab_tier_4: str = None,
539
+ instruction: str = None,
540
+ **kwargs
406
541
  ) -> Iterator[ResultSet]:
407
542
  """
408
543
  Run multiple searches concurrently and yield results.
@@ -447,9 +582,6 @@ class Nosible:
447
582
  List of netlocs (domains) to include in the search. (Max: 50)
448
583
  exclude_netlocs : list of str, optional
449
584
  List of netlocs (domains) to exclude in the search. (Max: 50)
450
- include_languages : list of str, optional
451
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
452
- exclude_languages : list of str, optional
453
585
  Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
454
586
  include_companies : list of str, optional
455
587
  Google KG IDs of public companies to require (Max: 50).
@@ -459,6 +591,34 @@ class Nosible:
459
591
  URL hashes of docs to include (Max: 50).
460
592
  exclude_docs : list of str, optional
461
593
  URL hashes of docs to exclude (Max: 50).
594
+ brand_safety : str, optional
595
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
596
+ language : str, optional
597
+ Language code to use in search (ISO 639-1 language code).
598
+ continent : str, optional
599
+ Continent the results must come from (e.g., "Europe", "Asia").
600
+ region : str, optional
601
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
602
+ country : str, optional
603
+ Country the results must come from.
604
+ sector : str, optional
605
+ GICS Sector the results must relate to (e.g., "Energy", "Information Technology").
606
+ industry_group : str, optional
607
+ GICS Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
608
+ industry : str, optional
609
+ GICS Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
610
+ sub_industry : str, optional
611
+ GICS Sub-industry classification of the content's subject.
612
+ iab_tier_1 : str, optional
613
+ IAB Tier 1 category for the content.
614
+ iab_tier_2 : str, optional
615
+ IAB Tier 2 category for the content.
616
+ iab_tier_3 : str, optional
617
+ IAB Tier 3 category for the content.
618
+ iab_tier_4 : str, optional
619
+ IAB Tier 4 category for the content.
620
+ instruction : str, optional
621
+ Instruction to use with the search query.
462
622
 
463
623
  Returns
464
624
  ------
@@ -489,7 +649,7 @@ class Nosible:
489
649
  ... ]
490
650
  ... )
491
651
  >>> with Nosible() as nos:
492
- ... results_list = list(nos.searches(searches=queries))
652
+ ... results_list = list(nos.fast_searches(searches=queries))
493
653
  >>> print(len(results_list))
494
654
  2
495
655
  >>> for r in results_list:
@@ -498,7 +658,7 @@ class Nosible:
498
658
  True True
499
659
  >>> with Nosible() as nos:
500
660
  ... results_list_str = list(
501
- ... nos.searches(
661
+ ... nos.fast_searches(
502
662
  ... questions=[
503
663
  ... "What are the terms of the partnership between Microsoft and OpenAI?",
504
664
  ... "What are the terms of the partnership between Volkswagen and Uber?",
@@ -506,17 +666,28 @@ class Nosible:
506
666
  ... )
507
667
  ... )
508
668
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +ELLIPSIS
509
- >>> nos.searches() # doctest: +ELLIPSIS
669
+ >>> nos.fast_searches() # doctest: +ELLIPSIS
510
670
  Traceback (most recent call last):
511
671
  ...
512
672
  TypeError: Specify exactly one of 'questions' or 'searches'.
513
673
  >>> from nosible import Nosible
514
674
  >>> nos = Nosible(nosible_api_key="test|xyz")
515
- >>> nos.searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
675
+ >>> nos.fast_searches(questions=["A"], searches=SearchSet(searches=["A"])) # doctest: +ELLIPSIS
516
676
  Traceback (most recent call last):
517
677
  ...
518
678
  TypeError: Specify exactly one of 'questions' or 'searches'.
519
679
  """
680
+ if "include_languages" in kwargs:
681
+ warnings.warn(
682
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
683
+ "Please use the parameter 'language' instead.",
684
+ )
685
+ if "exclude_languages" in kwargs:
686
+ warnings.warn(
687
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
688
+ "Please use the parameter 'language' instead.",
689
+ )
690
+
520
691
  if (questions is None and searches is None) or (questions is not None and searches is not None):
521
692
  raise TypeError("Specify exactly one of 'questions' or 'searches'.")
522
693
 
@@ -543,12 +714,24 @@ class Nosible:
543
714
  visited_start=visited_start,
544
715
  visited_end=visited_end,
545
716
  certain=certain,
546
- include_languages=include_languages,
547
- exclude_languages=exclude_languages,
548
717
  include_companies=include_companies,
549
718
  exclude_companies=exclude_companies,
550
719
  include_docs=include_docs,
551
720
  exclude_docs=exclude_docs,
721
+ brand_safety=brand_safety,
722
+ language=language,
723
+ continent=continent,
724
+ region=region,
725
+ country=country,
726
+ sector=sector,
727
+ industry_group=industry_group,
728
+ industry=industry,
729
+ sub_industry=sub_industry,
730
+ iab_tier_1=iab_tier_1,
731
+ iab_tier_2=iab_tier_2,
732
+ iab_tier_3=iab_tier_3,
733
+ iab_tier_4=iab_tier_4,
734
+ instruction=instruction,
552
735
  )
553
736
 
554
737
  futures = [self._executor.submit(self._search_single, s) for s in searches_list]
@@ -562,6 +745,7 @@ class Nosible:
562
745
 
563
746
  return _run_generator()
564
747
 
748
+
565
749
  @_rate_limited("fast")
566
750
  def _search_single(self, search_obj: Search) -> ResultSet:
567
751
  """
@@ -590,7 +774,7 @@ class Nosible:
590
774
  >>> from nosible import Nosible
591
775
  >>> s = Search(question="Nvidia insiders dump more than $1 billion in stock", n_results=200)
592
776
  >>> with Nosible() as nos:
593
- ... results = nos.search(search=s) # doctest: +ELLIPSIS
777
+ ... results = nos.fast_search(search=s) # doctest: +ELLIPSIS
594
778
  Traceback (most recent call last):
595
779
  ...
596
780
  ValueError: Search can not have more than 100 results - Use bulk search instead.
@@ -618,18 +802,32 @@ class Nosible:
618
802
  visited_start = search_obj.visited_start if search_obj.visited_start is not None else self.visited_start
619
803
  visited_end = search_obj.visited_end if search_obj.visited_end is not None else self.visited_end
620
804
  certain = search_obj.certain if search_obj.certain is not None else self.certain
621
- include_languages = (
622
- search_obj.include_languages if search_obj.include_languages is not None else self.include_languages
623
- )
624
- exclude_languages = (
625
- search_obj.exclude_languages if search_obj.exclude_languages is not None else self.exclude_languages
626
- )
627
805
  include_companies = (
628
806
  search_obj.include_companies if search_obj.include_companies is not None else self.include_companies
629
807
  )
630
808
  exclude_companies = (
631
809
  search_obj.exclude_companies if search_obj.exclude_companies is not None else self.exclude_companies
632
810
  )
811
+ include_docs = search_obj.include_docs if search_obj.include_docs is not None else self.include_docs
812
+ exclude_docs = search_obj.exclude_docs if search_obj.exclude_docs is not None else self.exclude_docs
813
+ brand_safety = search_obj.brand_safety if search_obj.brand_safety is not None else self.brand_safety
814
+ language = search_obj.language if search_obj.language is not None else self.language
815
+ continent = search_obj.continent if search_obj.continent is not None else self.continent
816
+ region = search_obj.region if search_obj.region is not None else self.region
817
+ country = search_obj.country if search_obj.country is not None else self.country
818
+ sector = search_obj.sector if search_obj.sector is not None else self.sector
819
+ industry_group = search_obj.industry_group if search_obj.industry_group is not None else self.industry_group
820
+ industry = search_obj.industry if search_obj.industry is not None else self.industry
821
+ sub_industry = search_obj.sub_industry if search_obj.sub_industry is not None else self.sub_industry
822
+ iab_tier_1 = search_obj.iab_tier_1 if search_obj.iab_tier_1 is not None else self.iab_tier_1
823
+ iab_tier_2 = search_obj.iab_tier_2 if search_obj.iab_tier_2 is not None else self.iab_tier_2
824
+ iab_tier_3 = search_obj.iab_tier_3 if search_obj.iab_tier_3 is not None else self.iab_tier_3
825
+ iab_tier_4 = search_obj.iab_tier_4 if search_obj.iab_tier_4 is not None else self.iab_tier_4
826
+ instruction = search_obj.instruction if search_obj.instruction is not None else self.instruction
827
+
828
+ must_include = must_include if must_include is not None else []
829
+ must_exclude = must_exclude if must_exclude is not None else []
830
+ min_similarity = min_similarity if min_similarity is not None else 0
633
831
 
634
832
  if not (0.0 <= min_similarity <= 1.0):
635
833
  raise ValueError(f"Invalid min_simalarity: {min_similarity}. Must be [0,1].")
@@ -650,10 +848,10 @@ class Nosible:
650
848
  visited_start=visited_start,
651
849
  visited_end=visited_end,
652
850
  certain=certain,
653
- include_languages=include_languages,
654
- exclude_languages=exclude_languages,
655
851
  include_companies=include_companies,
656
852
  exclude_companies=exclude_companies,
853
+ include_docs=include_docs,
854
+ exclude_docs=exclude_docs,
657
855
  )
658
856
 
659
857
  # Enforce limits
@@ -674,6 +872,25 @@ class Nosible:
674
872
  "must_include": must_include,
675
873
  "must_exclude": must_exclude,
676
874
  }
875
+ optional = {
876
+ "instruction": instruction,
877
+ "brand_safety":brand_safety,
878
+ "language": language,
879
+ "continent": continent,
880
+ "region": region,
881
+ "country": country,
882
+ "sector": sector,
883
+ "industry_group": industry_group,
884
+ "industry": industry,
885
+ "sub_industry": sub_industry,
886
+ "iab_tier_1": iab_tier_1,
887
+ "iab_tier_2": iab_tier_2,
888
+ "iab_tier_3": iab_tier_3,
889
+ "iab_tier_4": iab_tier_4,
890
+ }
891
+ for key, val in optional.items():
892
+ if val is not None:
893
+ payload[key] = val
677
894
 
678
895
  resp = self._post(url="https://www.nosible.ai/search/v1/fast-search", payload=payload)
679
896
  resp.raise_for_status()
@@ -739,18 +956,31 @@ class Nosible:
739
956
  autogenerate_expansions: bool = False,
740
957
  publish_start: str = None,
741
958
  publish_end: str = None,
742
- include_netlocs: list = None,
743
- exclude_netlocs: list = None,
744
959
  visited_start: str = None,
745
960
  visited_end: str = None,
746
961
  certain: bool = None,
747
- include_languages: list = None,
748
- exclude_languages: list = None,
962
+ include_netlocs: list = None,
963
+ exclude_netlocs: list = None,
749
964
  include_companies: list = None,
750
965
  exclude_companies: list = None,
751
966
  include_docs: list = None,
752
967
  exclude_docs: list = None,
968
+ brand_safety: str = None,
969
+ language: str = None,
970
+ continent: str = None,
971
+ region: str = None,
972
+ country: str = None,
973
+ sector: str = None,
974
+ industry_group: str = None,
975
+ industry: str = None,
976
+ sub_industry: str = None,
977
+ iab_tier_1: str = None,
978
+ iab_tier_2: str = None,
979
+ iab_tier_3: str = None,
980
+ iab_tier_4: str = None,
981
+ instruction: str = None,
753
982
  verbose: bool = False,
983
+ **kwargs,
754
984
  ) -> ResultSet:
755
985
  """
756
986
  Perform a bulk (slow) search query (1,000–10,000 results) against the Nosible API.
@@ -795,10 +1025,6 @@ class Nosible:
795
1025
  List of netlocs (domains) to include in the search. (Max: 50)
796
1026
  exclude_netlocs : list of str, optional
797
1027
  List of netlocs (domains) to exclude in the search. (Max: 50)
798
- include_languages : list of str, optional
799
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
800
- exclude_languages : list of str, optional
801
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
802
1028
  include_companies : list of str, optional
803
1029
  Google KG IDs of public companies to require (Max: 50).
804
1030
  exclude_companies : list of str, optional
@@ -807,6 +1033,34 @@ class Nosible:
807
1033
  URL hashes of docs to include (Max: 50).
808
1034
  exclude_docs : list of str, optional
809
1035
  URL hashes of docs to exclude (Max: 50).
1036
+ brand_safety : str, optional
1037
+ Whether it is safe, sensitive, or unsafe to advertise on this content.
1038
+ language : str, optional
1039
+ Language code to use in search (ISO 639-1 language code).
1040
+ continent : str, optional
1041
+ Continent the results must come from (e.g., "Europe", "Asia").
1042
+ region : str, optional
1043
+ Region or subcontinent the results must come from (e.g., "Southern Africa", "Caribbean").
1044
+ country : str, optional
1045
+ Country the results must come from.
1046
+ sector : str, optional
1047
+ Sector the results must relate to (e.g., "Energy", "Information Technology").
1048
+ industry_group : str, optional
1049
+ Industry group the results must relate to (e.g., "Automobiles & Components", "Insurance").
1050
+ industry : str, optional
1051
+ Industry the results must relate to (e.g., "Consumer Finance", "Passenger Airlines").
1052
+ sub_industry : str, optional
1053
+ Sub-industry classification of the content's subject.
1054
+ iab_tier_1 : str, optional
1055
+ IAB Tier 1 category for the content.
1056
+ iab_tier_2 : str, optional
1057
+ IAB Tier 2 category for the content.
1058
+ iab_tier_3 : str, optional
1059
+ IAB Tier 3 category for the content.
1060
+ iab_tier_4 : str, optional
1061
+ IAB Tier 4 category for the content.
1062
+ instruction : str, optional
1063
+ Instruction to use with the search query.
810
1064
  verbose : bool, optional
811
1065
  Show verbose output, Bulk search will print more information.
812
1066
 
@@ -872,6 +1126,17 @@ class Nosible:
872
1126
  ...
873
1127
  ValueError: Bulk search cannot have more than 10000 results per query.
874
1128
  """
1129
+ if "include_languages" in kwargs:
1130
+ warnings.warn(
1131
+ "The 'include_languages' parameter is deprecated and will be removed in a future release. "
1132
+ "Please use the parameter 'language' instead.",
1133
+ )
1134
+ if "exclude_languages" in kwargs:
1135
+ warnings.warn(
1136
+ "The 'exclude_languages' parameter is deprecated and will be removed in a future release. "
1137
+ "Please use the parameter 'language' instead.",
1138
+ )
1139
+
875
1140
  from cryptography.fernet import Fernet
876
1141
 
877
1142
  previous_level = self.logger.level
@@ -894,11 +1159,8 @@ class Nosible:
894
1159
  n_contextify = search.n_contextify if search.n_contextify is not None else n_contextify
895
1160
  algorithm = search.algorithm if search.algorithm is not None else algorithm
896
1161
  min_similarity = search.min_similarity if search.min_similarity is not None else min_similarity
897
- min_similarity = min_similarity if min_similarity is not None else 0
898
1162
  must_include = search.must_include if search.must_include is not None else must_include
899
- must_include = must_include if must_include is not None else []
900
1163
  must_exclude = search.must_exclude if search.must_exclude is not None else must_exclude
901
- must_exclude = must_exclude if must_exclude is not None else []
902
1164
  autogenerate_expansions = (
903
1165
  search.autogenerate_expansions
904
1166
  if search.autogenerate_expansions is not None
@@ -911,12 +1173,24 @@ class Nosible:
911
1173
  visited_start = search.visited_start if search.visited_start is not None else visited_start
912
1174
  visited_end = search.visited_end if search.visited_end is not None else visited_end
913
1175
  certain = search.certain if search.certain is not None else certain
914
- include_languages = search.include_languages if search.include_languages is not None else include_languages
915
- exclude_languages = search.exclude_languages if search.exclude_languages is not None else exclude_languages
916
1176
  include_companies = search.include_companies if search.include_companies is not None else include_companies
917
1177
  exclude_companies = search.exclude_companies if search.exclude_companies is not None else exclude_companies
918
- include_docs = search.include_docs if search.include_docs is not None else include_docs
919
- exclude_docs = search.exclude_docs if search.exclude_docs is not None else exclude_docs
1178
+ include_docs = search.include_docs if search.include_docs is not None else self.include_docs
1179
+ exclude_docs = search.exclude_docs if search.exclude_docs is not None else self.exclude_docs
1180
+ brand_safety = search.brand_safety if search.brand_safety is not None else self.brand_safety
1181
+ language = search.language if search.language is not None else self.language
1182
+ continent = search.continent if search.continent is not None else self.continent
1183
+ region = search.region if search.region is not None else self.region
1184
+ country = search.country if search.country is not None else self.country
1185
+ sector = search.sector if search.sector is not None else self.sector
1186
+ industry_group = search.industry_group if search.industry_group is not None else self.industry_group
1187
+ industry = search.industry if search.industry is not None else self.industry
1188
+ sub_industry = search.sub_industry if search.sub_industry is not None else self.sub_industry
1189
+ iab_tier_1 = search.iab_tier_1 if search.iab_tier_1 is not None else self.iab_tier_1
1190
+ iab_tier_2 = search.iab_tier_2 if search.iab_tier_2 is not None else self.iab_tier_2
1191
+ iab_tier_3 = search.iab_tier_3 if search.iab_tier_3 is not None else self.iab_tier_3
1192
+ iab_tier_4 = search.iab_tier_4 if search.iab_tier_4 is not None else self.iab_tier_4
1193
+ instruction = search.instruction if search.instruction is not None else self.instruction
920
1194
 
921
1195
  # Default expansions and filters
922
1196
  if expansions is None:
@@ -936,13 +1210,11 @@ class Nosible:
936
1210
  sql_filter = self._format_sql(
937
1211
  publish_start=publish_start if publish_start is not None else self.publish_start,
938
1212
  publish_end=publish_end if publish_end is not None else self.publish_end,
939
- include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
940
- exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
941
1213
  visited_start=visited_start if visited_start is not None else self.visited_start,
942
1214
  visited_end=visited_end if visited_end is not None else self.visited_end,
943
1215
  certain=certain if certain is not None else self.certain,
944
- include_languages=include_languages if include_languages is not None else self.include_languages,
945
- exclude_languages=exclude_languages if exclude_languages is not None else self.exclude_languages,
1216
+ include_netlocs=include_netlocs if include_netlocs is not None else self.include_netlocs,
1217
+ exclude_netlocs=exclude_netlocs if exclude_netlocs is not None else self.exclude_netlocs,
946
1218
  include_companies=include_companies if include_companies is not None else self.include_companies,
947
1219
  exclude_companies=exclude_companies if exclude_companies is not None else self.exclude_companies,
948
1220
  include_docs=include_docs if include_docs is not None else self.include_docs,
@@ -979,6 +1251,26 @@ class Nosible:
979
1251
  "must_include": must_include,
980
1252
  "must_exclude": must_exclude,
981
1253
  }
1254
+ optional = {
1255
+ "instruction": instruction,
1256
+ "brand_safety": brand_safety,
1257
+ "language": language,
1258
+ "continent": continent,
1259
+ "region": region,
1260
+ "country": country,
1261
+ "sector": sector,
1262
+ "industry_group": industry_group,
1263
+ "industry": industry,
1264
+ "sub_industry": sub_industry,
1265
+ "iab_tier_1": iab_tier_1,
1266
+ "iab_tier_2": iab_tier_2,
1267
+ "iab_tier_3": iab_tier_3,
1268
+ "iab_tier_4": iab_tier_4,
1269
+ }
1270
+ for key, val in optional.items():
1271
+ if val is not None:
1272
+ payload[key] = val
1273
+
982
1274
  resp = self._post(url="https://www.nosible.ai/search/v1/slow-search", payload=payload)
983
1275
  try:
984
1276
  resp.raise_for_status()
@@ -1019,7 +1311,7 @@ class Nosible:
1019
1311
  show_context: bool = True,
1020
1312
  ) -> str:
1021
1313
  """
1022
- RAG-style question answering: retrieve top `n_results` via `.search()`
1314
+ RAG-style question answering: retrieve top `n_results` via `.fast_search()`
1023
1315
  then answer `query` using those documents as context.
1024
1316
 
1025
1317
  Parameters
@@ -1068,7 +1360,7 @@ class Nosible:
1068
1360
  raise ValueError("An LLM API key is required for answer().")
1069
1361
 
1070
1362
  # Retrieve top documents
1071
- results = self.search(question=query, n_results=n_results, min_similarity=min_similarity)
1363
+ results = self.fast_search(question=query, n_results=n_results, min_similarity=min_similarity)
1072
1364
 
1073
1365
  # Build RAG context
1074
1366
  context = ""
@@ -1778,13 +2070,11 @@ class Nosible:
1778
2070
  self,
1779
2071
  publish_start: str = None,
1780
2072
  publish_end: str = None,
1781
- include_netlocs: list = None,
1782
- exclude_netlocs: list = None,
1783
2073
  visited_start: str = None,
1784
2074
  visited_end: str = None,
1785
2075
  certain: bool = None,
1786
- include_languages: list = None,
1787
- exclude_languages: list = None,
2076
+ include_netlocs: list = None,
2077
+ exclude_netlocs: list = None,
1788
2078
  include_companies: list = None,
1789
2079
  exclude_companies: list = None,
1790
2080
  include_docs: list = None,
@@ -1809,10 +2099,6 @@ class Nosible:
1809
2099
  List of netlocs (domains) to include in the search. (Max: 50)
1810
2100
  exclude_netlocs : list of str, optional
1811
2101
  List of netlocs (domains) to exclude in the search. (Max: 50)
1812
- include_languages : list of str, optional
1813
- Languages to include in the search. (Max: 50, ISO 639-1 language codes).
1814
- exclude_languages : list of str, optional
1815
- Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
1816
2102
  include_companies : list of str, optional
1817
2103
  Google KG IDs of public companies to require (Max: 50).
1818
2104
  exclude_companies : list of str, optional
@@ -1845,8 +2131,6 @@ class Nosible:
1845
2131
  for name, value in [
1846
2132
  ("include_netlocs", include_netlocs),
1847
2133
  ("exclude_netlocs", exclude_netlocs),
1848
- ("include_languages", include_languages),
1849
- ("exclude_languages", exclude_languages),
1850
2134
  ("include_companies", include_companies),
1851
2135
  ("exclude_companies", exclude_companies),
1852
2136
  ("include_docs", include_docs),
@@ -1918,14 +2202,6 @@ class Nosible:
1918
2202
  f"(company_1 NOT IN ({company_list}) AND company_2 NOT IN ({company_list}) AND company_3 NOT IN ({company_list}))"
1919
2203
  )
1920
2204
 
1921
- # Include / exclude languages
1922
- if include_languages:
1923
- langs = ", ".join(f"'{lang}-{lang}'" for lang in include_languages)
1924
- clauses.append(f"language IN ({langs})")
1925
- if exclude_languages:
1926
- langs = ", ".join(f"'{lang}-{lang}'" for lang in exclude_languages)
1927
- clauses.append(f"language NOT IN ({langs})")
1928
-
1929
2205
  if include_docs:
1930
2206
  # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
1931
2207
  doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)