nosible 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nosible/nosible_client.py CHANGED
@@ -3,9 +3,10 @@ import json
3
3
  import logging
4
4
  import os
5
5
  import time
6
- import traceback
6
+ import types
7
+ import typing
7
8
  from collections.abc import Iterator
8
- from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from concurrent.futures import ThreadPoolExecutor
9
10
  from typing import Union
10
11
 
11
12
  import polars as pl
@@ -25,8 +26,10 @@ from tenacity import (
25
26
  from nosible.classes.result_set import ResultSet
26
27
  from nosible.classes.search import Search
27
28
  from nosible.classes.search_set import SearchSet
29
+ from nosible.classes.snippet_set import SnippetSet
28
30
  from nosible.classes.web_page import WebPageData
29
31
  from nosible.utils.json_tools import json_loads
32
+ from nosible.utils.question_builder import _get_question
30
33
  from nosible.utils.rate_limiter import PLAN_RATE_LIMITS, RateLimiter, _rate_limited
31
34
 
32
35
  # Set up a module‐level logger.
@@ -47,37 +50,33 @@ class Nosible:
47
50
  llm_api_key : str, optional
48
51
  API key for LLM-based query expansions.
49
52
  openai_base_url : str
50
- Base URL for the OpenAI-compatible LLM API.
51
- sentiment_model : str
52
- Model to use for sentiment analysis and expansions.
53
+ Base URL for the OpenAI-compatible LLM API. (default is OpenRouter's API endpoint)
54
+ sentiment_model : str, optional
55
+ Model to use for sentiment analysis (default is "openai/gpt-4o").
53
56
  timeout : int
54
57
  Request timeout for HTTP calls.
55
- retries : int, default=5
58
+ retries : int,
56
59
  Number of retry attempts for transient HTTP errors.
57
- concurrency : int, default=10
60
+ concurrency : int,
58
61
  Maximum concurrent search requests.
59
62
  publish_start : str, optional
60
- Earliest publish date filter (ISO formatted date).
63
+ Start date for when the document was published (ISO format).
61
64
  publish_end : str, optional
62
- Latest publish date filter (ISO formatted date).
63
- include_netlocs : list of str, optional
64
- Domains to include.
65
- exclude_netlocs : list of str, optional
66
- Domains to exclude.
65
+ End date for when the document was published (ISO format).
67
66
  visited_start : str, optional
68
- Earliest visit date filter (ISO formatted date).
67
+ Start date for when the document was visited by NOSIBLE (ISO format).
69
68
  visited_end : str, optional
70
- Latest visit date filter (ISO formatted date).
69
+ End date for when the document was visited by NOSIBLE (ISO format).
71
70
  certain : bool, optional
72
- True if we are 100% sure of the date.
73
- include_languages : list of str, optional
74
- Language codes to include (Max: 50).
75
- exclude_languages : list of str, optional
76
- Language codes to exclude (Max: 50).
71
+ Only include documents where we are 100% sure of the date.
77
72
  include_netlocs : list of str, optional
78
- Only include results from these domains (Max: 50).
73
+ List of netlocs (domains) to include in the search. (Max: 50)
79
74
  exclude_netlocs : list of str, optional
80
- Exclude results from these domains (Max: 50).
75
+ List of netlocs (domains) to exclude in the search. (Max: 50)
76
+ include_languages : list of str, optional
77
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
78
+ exclude_languages : list of str, optional
79
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
81
80
  include_companies : list of str, optional
82
81
  Google KG IDs of public companies to require (Max: 50).
83
82
  exclude_companies : list of str, optional
@@ -86,10 +85,6 @@ class Nosible:
86
85
  URL hashes of docs to include (Max: 50).
87
86
  exclude_docs : list of str, optional
88
87
  URL hashes of docs to exclude (Max: 50).
89
- openai_base_url : str, optional
90
- Base URL for the OpenAI API (default is OpenRouter).
91
- sentiment_model : str, optional
92
- Model to use for sentiment analysis (default is "openai/gpt-4o").
93
88
 
94
89
  Notes
95
90
  -----
@@ -243,38 +238,34 @@ class Nosible:
243
238
  List of LLM‐generated expansions.
244
239
  sql_filter : list of str, optional
245
240
  SQL‐style filter clauses.
246
- n_results : int, default=100
241
+ n_results : int
247
242
  Max number of results (max 100).
248
- n_probes : int, default=30
243
+ n_probes : int
249
244
  Number of index shards to probe.
250
- n_contextify : int, default=128
245
+ n_contextify : int
251
246
  Context window size per result.
252
- algorithm : str, default="hybrid-2"
247
+ algorithm : str
253
248
  Search algorithm type.
254
- autogenerate_expansions : bool, default=False
249
+ autogenerate_expansions : bool
255
250
  Do you want to generate expansions automatically using a LLM?
256
251
  publish_start : str, optional
257
- Earliest publish date filter (ISO formatted date).
252
+ Start date for when the document was published (ISO format).
258
253
  publish_end : str, optional
259
- Latest publish date filter (ISO formatted date).
260
- include_netlocs : list of str, optional
261
- Domains to include.
262
- exclude_netlocs : list of str, optional
263
- Domains to exclude.
254
+ End date for when the document was published (ISO format).
264
255
  visited_start : str, optional
265
- Earliest visit date filter (ISO formatted date).
256
+ Start date for when the document was visited by NOSIBLE (ISO format).
266
257
  visited_end : str, optional
267
- Latest visit date filter (ISO formatted date).
258
+ End date for when the document was visited by NOSIBLE (ISO format).
268
259
  certain : bool, optional
269
- True if we are 100% sure of the date.
270
- include_languages : list of str, optional
271
- Language codes to include (Max: 50).
272
- exclude_languages : list of str, optional
273
- Language codes to exclude (Max: 50).
260
+ Only include documents where we are 100% sure of the date.
274
261
  include_netlocs : list of str, optional
275
- Only include results from these domains (Max: 50).
262
+ List of netlocs (domains) to include in the search. (Max: 50)
276
263
  exclude_netlocs : list of str, optional
277
- Exclude results from these domains (Max: 50).
264
+ List of netlocs (domains) to exclude in the search. (Max: 50)
265
+ include_languages : list of str, optional
266
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
267
+ exclude_languages : list of str, optional
268
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
278
269
  include_companies : list of str, optional
279
270
  Google KG IDs of public companies to require (Max: 50).
280
271
  exclude_companies : list of str, optional
@@ -297,6 +288,8 @@ class Nosible:
297
288
  If neither question nor search are specified
298
289
  RuntimeError
299
290
  If the response fails in any way.
291
+ ValueError
292
+ If `n_results` is greater than 100.
300
293
 
301
294
  Notes
302
295
  -----
@@ -401,50 +394,50 @@ class Nosible:
401
394
  ----------
402
395
  searches: SearchSet or list of Search
403
396
  The searches execute.
404
- queries : list of str
397
+ questions : list of str
405
398
  The search queries to execute.
406
399
  expansions : list of str, optional
407
400
  List of expansion terms to use for each search.
408
401
  sql_filter : list of str, optional
409
402
  SQL-like filters to apply to the search.
410
- n_results : int, default=100
403
+ n_results : int
411
404
  Number of results to return per search.
412
- n_probes : int, default=30
405
+ n_probes : int
413
406
  Number of probes to use for the search algorithm.
414
- n_contextify : int, default=128
407
+ n_contextify : int
415
408
  Context window size for the search.
416
- algorithm : str, default="hybrid-2"
409
+ algorithm : str
417
410
  Search algorithm to use.
418
- autogenerate_expansions : bool, default=False
411
+ autogenerate_expansions : bool
419
412
  Do you want to generate expansions automatically using a LLM?
420
413
  publish_start : str, optional
421
- Filter results published after this date (ISO formatted date).
414
+ Start date for when the document was published (ISO format).
422
415
  publish_end : str, optional
423
- Filter results published before this date (ISO formatted date).
424
- include_netlocs : list of str, optional
425
- Only include results from these domains.
426
- exclude_netlocs : list of str, optional
427
- Exclude results from these domains.
416
+ End date for when the document was published (ISO format).
428
417
  visited_start : str, optional
429
- Only include results visited after this date (ISO formatted date).
418
+ Start date for when the document was visited by NOSIBLE (ISO format).
430
419
  visited_end : str, optional
431
- Only include results visited before this date (ISO formatted date).
420
+ End date for when the document was visited by NOSIBLE (ISO format).
432
421
  certain : bool, optional
433
- Only include results with high certainty.
422
+ Only include documents where we are 100% sure of the date.
423
+ include_netlocs : list of str, optional
424
+ List of netlocs (domains) to include in the search. (Max: 50)
425
+ exclude_netlocs : list of str, optional
426
+ List of netlocs (domains) to exclude in the search. (Max: 50)
434
427
  include_languages : list of str, optional
435
- Only include results in these languages (Max: 50).
428
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
436
429
  exclude_languages : list of str, optional
437
- Exclude results in these languages (Max: 50).
430
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
438
431
  include_companies : list of str, optional
439
- Only include results from these companies (Max: 50).
432
+ Google KG IDs of public companies to require (Max: 50).
440
433
  exclude_companies : list of str, optional
441
- Exclude results from these companies (Max: 50).
442
- include_netlocs : list of str, optional
443
- Only include results from these domains (Max: 50).
444
- exclude_netlocs : list of str, optional
445
- Exclude results from these domains (Max: 50).
434
+ Google KG IDs of public companies to forbid (Max: 50).
435
+ include_docs : list of str, optional
436
+ URL hashes of docs to include (Max: 50).
437
+ exclude_docs : list of str, optional
438
+ URL hashes of docs to exclude (Max: 50).
446
439
 
447
- Yields
440
+ Returns
448
441
  ------
449
442
  ResultSet or None
450
443
  Each completed search’s results, or None on failure.
@@ -457,8 +450,6 @@ class Nosible:
457
450
  If both queries and searches are specified.
458
451
  TypeError
459
452
  If neither queries nor searches are specified.
460
- RuntimeError
461
- If the response fails in any way.
462
453
 
463
454
  Notes
464
455
  -----
@@ -469,7 +460,10 @@ class Nosible:
469
460
  --------
470
461
  >>> from nosible import Nosible
471
462
  >>> queries = SearchSet(
472
- ... [Search(question="Hedge funds seek to expand into private credit", n_results=5), Search(question="How have the Trump tariffs impacted the US economy?", n_results=5)]
463
+ ... [
464
+ ... Search(question="Hedge funds seek to expand into private credit", n_results=5),
465
+ ... Search(question="How have the Trump tariffs impacted the US economy?", n_results=5),
466
+ ... ]
473
467
  ... )
474
468
  >>> with Nosible() as nos:
475
469
  ... results_list = list(nos.searches(searches=queries))
@@ -480,10 +474,14 @@ class Nosible:
480
474
  True True
481
475
  True True
482
476
  >>> with Nosible() as nos:
483
- ... results_list_str = list(nos.searches(questions=[
484
- ... "What are the terms of the partnership between Microsoft and OpenAI?",
485
- ... "What are the terms of the partnership between Volkswagen and Uber?"
486
- ... ]))
477
+ ... results_list_str = list(
478
+ ... nos.searches(
479
+ ... questions=[
480
+ ... "What are the terms of the partnership between Microsoft and OpenAI?",
481
+ ... "What are the terms of the partnership between Volkswagen and Uber?",
482
+ ... ]
483
+ ... )
484
+ ... )
487
485
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +ELLIPSIS
488
486
  >>> nos.searches() # doctest: +ELLIPSIS
489
487
  Traceback (most recent call last):
@@ -535,6 +533,7 @@ class Nosible:
535
533
  except Exception as e:
536
534
  self.logger.warning(f"Search failed: {e!r}")
537
535
  yield None
536
+
538
537
  return _run_generator()
539
538
 
540
539
  @_rate_limited("fast")
@@ -569,7 +568,7 @@ class Nosible:
569
568
  ValueError: Search can not have more than 100 results - Use bulk search instead.
570
569
  """
571
570
  # --------------------------------------------------------------------------------------------------------------
572
- # Setting search params. Individual search will overide Nosible defaults.
571
+ # Setting search params. Individual search will override Nosible defaults.
573
572
  # --------------------------------------------------------------------------------------------------------------
574
573
  question = search_obj.question # No default
575
574
  expansions = search_obj.expansions if search_obj.expansions is not None else [] # Default to empty list
@@ -578,7 +577,9 @@ class Nosible:
578
577
  n_probes = search_obj.n_probes if search_obj.n_probes is not None else 30
579
578
  n_contextify = search_obj.n_contextify if search_obj.n_contextify is not None else 128
580
579
  algorithm = search_obj.algorithm if search_obj.algorithm is not None else "hybrid-2"
581
- autogenerate_expansions = search_obj.autogenerate_expansions if search_obj.autogenerate_expansions is not None else False
580
+ autogenerate_expansions = (
581
+ search_obj.autogenerate_expansions if search_obj.autogenerate_expansions is not None else False
582
+ )
582
583
  publish_start = search_obj.publish_start if search_obj.publish_start is not None else self.publish_start
583
584
  publish_end = search_obj.publish_end if search_obj.publish_end is not None else self.publish_end
584
585
  include_netlocs = search_obj.include_netlocs if search_obj.include_netlocs is not None else self.include_netlocs
@@ -724,46 +725,42 @@ class Nosible:
724
725
  Optional list of expanded query strings.
725
726
  sql_filter : list of str, optional
726
727
  Optional SQL WHERE clause filters.
727
- n_results : int, default=100
728
+ n_results : int
728
729
  Number of results per query (1,000–10,000).
729
- n_probes : int, default=30
730
+ n_probes : int
730
731
  Number of shards to probe.
731
- n_contextify : int, default=128
732
+ n_contextify : int
732
733
  Context window size per result.
733
- algorithm : str, default="hybrid-2"
734
+ algorithm : str
734
735
  Search algorithm identifier.
735
- autogenerate_expansions : bool, default=False
736
+ autogenerate_expansions : bool
736
737
  Do you want to generate expansions automatically using a LLM?
737
738
  publish_start : str, optional
738
- Filter for earliest publish date.
739
+ Start date for when the document was published (ISO format).
739
740
  publish_end : str, optional
740
- Filter for latest publish date.
741
- include_netlocs : list of str, optional
742
- Domains to include.
743
- exclude_netlocs : list of str, optional
744
- Domains to exclude.
741
+ End date for when the document was published (ISO format).
745
742
  visited_start : str, optional
746
- Filter for earliest visit date.
743
+ Start date for when the document was visited by NOSIBLE (ISO format).
747
744
  visited_end : str, optional
748
- Filter for latest visit date.
745
+ End date for when the document was visited by NOSIBLE (ISO format).
749
746
  certain : bool, optional
750
- True if we are 100% sure of the date.
751
- include_languages : list of str, optional
752
- Languages to include (Max: 50).
753
- exclude_languages : list of str, optional
754
- Languages to exclude (Max: 50).
747
+ Only include documents where we are 100% sure of the date.
755
748
  include_netlocs : list of str, optional
756
- Only include results from these domains (Max: 50).
749
+ List of netlocs (domains) to include in the search. (Max: 50)
757
750
  exclude_netlocs : list of str, optional
758
- Exclude results from these domains (Max: 50).
751
+ List of netlocs (domains) to exclude in the search. (Max: 50)
752
+ include_languages : list of str, optional
753
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
754
+ exclude_languages : list of str, optional
755
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
759
756
  include_companies : list of str, optional
760
- Company IDs to require (Max: 50).
757
+ Google KG IDs of public companies to require (Max: 50).
761
758
  exclude_companies : list of str, optional
762
- Company IDs to forbid (Max: 50).
759
+ Google KG IDs of public companies to forbid (Max: 50).
763
760
  include_docs : list of str, optional
764
- URL hashes of documents to include (Max: 50).
761
+ URL hashes of docs to include (Max: 50).
765
762
  exclude_docs : list of str, optional
766
- URL hashes of documents to exclude (Max: 50).
763
+ URL hashes of docs to exclude (Max: 50).
767
764
  verbose : bool, optional
768
765
  Show verbose output, Bulk search will print more information.
769
766
 
@@ -790,23 +787,21 @@ class Nosible:
790
787
 
791
788
  Examples
792
789
  --------
793
- >>> from nosible.classes.search import Search
794
- >>> from nosible import Nosible
795
- >>> with Nosible(include_netlocs=["bbc.com"]) as nos: # doctest: +SKIP
796
- ... results = nos.bulk_search(question="Nvidia insiders dump more than $1 billion in stock", n_results=2000) # doctest: +SKIP
790
+ >>> from nosible.classes.search import Search # doctest: +SKIP
791
+ >>> from nosible import Nosible # doctest: +SKIP
792
+ >>> with Nosible(exclude_netlocs=["bbc.com"]) as nos: # doctest: +SKIP
793
+ ... results = nos.bulk_search(question=_get_question(), n_results=2000) # doctest: +SKIP
797
794
  ... print(isinstance(results, ResultSet)) # doctest: +SKIP
798
795
  ... print(len(results)) # doctest: +SKIP
799
796
  True
800
797
  2000
801
-
802
- >>> s = Search(question="OpenAI", n_results=1000) # doctest: +SKIP
798
+ >>> s = Search(question=_get_question(), n_results=1000) # doctest: +SKIP
803
799
  >>> with Nosible() as nos: # doctest: +SKIP
804
800
  ... results = nos.bulk_search(search=s) # doctest: +SKIP
805
801
  ... print(isinstance(results, ResultSet)) # doctest: +SKIP
806
802
  ... print(len(results)) # doctest: +SKIP
807
803
  True
808
804
  1000
809
-
810
805
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +SKIP
811
806
  >>> nos.bulk_search() # doctest: +SKIP
812
807
  Traceback (most recent call last):
@@ -814,20 +809,18 @@ class Nosible:
814
809
  TypeError: Either question or search must be specified
815
810
 
816
811
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +SKIP
817
- >>> nos.bulk_search(question="foo", search=Search(question="foo")) # doctest: +SKIP
812
+ >>> nos.bulk_search(question=_get_question(), search=Search(question=_get_question())) # doctest: +SKIP
818
813
  Traceback (most recent call last):
819
814
  ...
820
815
  TypeError: Question and search cannot be both specified
821
-
822
816
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +SKIP
823
- >>> nos.bulk_search(question="foo", n_results=100) # doctest: +SKIP
817
+ >>> nos.bulk_search(question=_get_question(), n_results=100) # doctest: +SKIP
824
818
  Traceback (most recent call last):
825
819
  ...
826
- ValueError: Bulk search must have at least 100 results per query; use search() for smaller result sets.
827
-
820
+ ValueError: Bulk search must have at least 1000 results per query; use search() for smaller result sets.
828
821
  >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +SKIP
829
- >>> nos.bulk_search(question="foo", n_results=10001) # doctest: +SKIP
830
- Traceback (most recent call last):
822
+ >>> nos.bulk_search(question=_get_question(), n_results=10001) # doctest: +SKIP
823
+ Traceback (most recent call last): # doctest: +SKIP
831
824
  ...
832
825
  ValueError: Bulk search cannot have more than 10000 results per query.
833
826
  """
@@ -850,8 +843,11 @@ class Nosible:
850
843
  n_probes = search.n_probes if search.n_probes is not None else n_probes
851
844
  n_contextify = search.n_contextify if search.n_contextify is not None else n_contextify
852
845
  algorithm = search.algorithm if search.algorithm is not None else algorithm
853
- autogenerate_expansions = search.autogenerate_expansions if search.autogenerate_expansions is not None \
846
+ autogenerate_expansions = (
847
+ search.autogenerate_expansions
848
+ if search.autogenerate_expansions is not None
854
849
  else autogenerate_expansions
850
+ )
855
851
  publish_start = search.publish_start if search.publish_start is not None else publish_start
856
852
  publish_end = search.publish_end if search.publish_end is not None else publish_end
857
853
  include_netlocs = search.include_netlocs if search.include_netlocs is not None else include_netlocs
@@ -893,7 +889,7 @@ class Nosible:
893
889
  self.logger.debug(f"SQL Filter: {sql_filter}")
894
890
 
895
891
  # Validate n_result bounds
896
- if n_results <= 1000:
892
+ if n_results < 1000:
897
893
  raise ValueError(
898
894
  "Bulk search must have at least 1000 results per query; use search() for smaller result sets."
899
895
  )
@@ -955,13 +951,13 @@ class Nosible:
955
951
 
956
952
  Parameters
957
953
  ----------
958
- html : str, default=""
954
+ html : str
959
955
  Raw HTML to process instead of fetching.
960
- recrawl : bool, default=False
956
+ recrawl : bool
961
957
  If True, force a fresh crawl.
962
- render : bool, default=False
958
+ render : bool
963
959
  If True, allow JavaScript rendering before extraction.
964
- url : str, default=None
960
+ url : str
965
961
  The URL to fetch and parse.
966
962
 
967
963
  Returns
@@ -982,26 +978,24 @@ class Nosible:
982
978
 
983
979
  Examples
984
980
  --------
985
- >>> from nosible import Nosible # doctest: +SKIP
986
- >>> with Nosible() as nos: # doctest: +SKIP
987
- ... out = nos.visit(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/") # doctest: +SKIP
988
- ... print(isinstance(out, type(WebPageData))) # doctest: +SKIP
989
- ... print(hasattr(out, "languages")) # doctest: +SKIP
990
- ... print(hasattr(out, "page")) # doctest: +SKIP
981
+ >>> from nosible import Nosible
982
+ >>> with Nosible() as nos:
983
+ ... out = nos.visit(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
984
+ ... print(isinstance(out, WebPageData))
985
+ ... print(hasattr(out, "languages"))
986
+ ... print(hasattr(out, "page"))
991
987
  True
992
988
  True
993
989
  True
994
- >>> with Nosible() as nos: # doctest: +SKIP
995
- ... out = nos.visit() # doctest: +SKIP
996
- ... print(isinstance(out, type(WebPageData))) # doctest: +SKIP
997
- ... print(hasattr(out, "languages")) # doctest: +SKIP
998
- ... print(hasattr(out, "page")) # doctest: +SKIP
990
+ >>> with Nosible() as nos:
991
+ ... out = nos.visit()
992
+ ... print(isinstance(out, type(WebPageData)))
993
+ ... print(hasattr(out, "languages"))
994
+ ... print(hasattr(out, "page")) # doctest: +ELLIPSIS
999
995
  Traceback (most recent call last):
1000
996
  ...
1001
997
  TypeError: URL must be provided
1002
998
  """
1003
-
1004
- # self._enforce("visit")
1005
999
  if url is None:
1006
1000
  raise TypeError("URL must be provided")
1007
1001
  response = self._post(
@@ -1014,7 +1008,7 @@ class Nosible:
1014
1008
  self.logger.error(f"Failed to parse JSON from response: {e}")
1015
1009
  raise ValueError("Invalid JSON response from server") from e
1016
1010
 
1017
- if data == {'message': 'Sorry, the URL could not be fetched.'}:
1011
+ if data == {"message": "Sorry, the URL could not be fetched."}:
1018
1012
  raise ValueError("The URL could not be found.")
1019
1013
 
1020
1014
  if "response" not in data:
@@ -1029,7 +1023,7 @@ class Nosible:
1029
1023
  metadata=response_data.get("metadata"),
1030
1024
  page=response_data.get("page"),
1031
1025
  request=response_data.get("request"),
1032
- snippets=response_data.get("snippets"),
1026
+ snippets=SnippetSet.from_dict(response_data.get("snippets", {})),
1033
1027
  statistics=response_data.get("statistics"),
1034
1028
  structured=response_data.get("structured"),
1035
1029
  url_tree=response_data.get("url_tree"),
@@ -1093,17 +1087,12 @@ class Nosible:
1093
1087
 
1094
1088
  Raises
1095
1089
  ------
1096
- ValueError
1097
- If the API returns an unexpected message.
1098
- requests.HTTPError
1099
- If the HTTP request fails.
1100
1090
 
1101
1091
  Examples
1102
1092
  --------
1103
1093
  >>> from nosible import Nosible
1104
1094
  >>> with Nosible() as nos:
1105
- ... idx = nos.indexed(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
1106
- ... print(idx)
1095
+ ... print(nos.indexed(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/"))
1107
1096
  True
1108
1097
  """
1109
1098
  response = self._post(url="https://www.nosible.ai/search/v1/indexed", payload={"url": url})
@@ -1116,9 +1105,12 @@ class Nosible:
1116
1105
  return True
1117
1106
  if msg == "The URL is nowhere to be found.":
1118
1107
  return False
1119
- raise ValueError(f"Unexpected response from indexed endpoint: {data!r}")
1108
+ if msg == "The URL could not be retrieved.":
1109
+ return False
1120
1110
  except requests.HTTPError:
1121
1111
  return False
1112
+ except:
1113
+ return False
1122
1114
 
1123
1115
  def preflight(self, url: str = None) -> str:
1124
1116
  """
@@ -1175,40 +1167,47 @@ class Nosible:
1175
1167
 
1176
1168
  Examples
1177
1169
  --------
1178
- >>> nos = Nosible(nosible_api_key="test|xyz") # doctest: +SKIP
1179
- >>> print(nos.get_rate_limits()) # doctest: +SKIP
1180
- Free (Your current plan)
1181
- | Endpoint | Per Month | Per Day | Per Minute |
1182
- | ----------- | --------- | ------- | ---------- |
1183
- | Fast Search | 3 000 | 100 | 10 |
1184
- | URL Visits | 300 | 10 | 1 |
1185
- | Slow Search | 300 | 10 | 1 |
1186
-
1187
- Basic
1188
- | Endpoint | Per Month | Per Day | Per Minute |
1170
+ >>> nos = Nosible(nosible_api_key="test|xyz")
1171
+ >>> print(nos.get_rate_limits()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
1172
+ Below are the rate limits for all NOSIBLE plans.
1173
+ To upgrade your package, visit https://www.nosible.ai/products.
1174
+ <BLANKLINE>
1175
+ Unless otherwise indicated, bulk searches are limited to one-at-a-time per API key.
1176
+ <BLANKLINE>
1177
+ Free: (Your current plan)
1178
+ | Endpoint | Per Month | Per Minute | Effective CPM |
1179
+ | ----------- | --------- | ---------- | ------------- |
1180
+ | Search | 3000 | 60 | $4.00 |
1181
+ | URL Visits | 300 | 60 | $4.00 |
1182
+ | Bulk Search | 300 | 60 | $4.00 |
1183
+ <BLANKLINE>
1184
+ Basic ($49p/m):
1185
+ | Endpoint | Per Month | Per Minute | Effective CPM |
1189
1186
  ...
1190
1187
  """
1191
1188
  # Human-friendly plan names
1192
1189
  display = {
1193
1190
  "test": "Free",
1194
- "basic": "Basic",
1195
- "pro": "Pro",
1196
- "pro+": "Pro+",
1197
- "bus": "Business",
1198
- "bus+": "Business+",
1199
- "ent": "Enterprise",
1191
+ "basic": "Basic ($49p/m)",
1192
+ "pro": "Pro ($199p/m)",
1193
+ "pro+": "Pro+ ($799p/m)",
1194
+ "bus": "Business ($3999p/m)",
1195
+ "bus+": "Business+ ($7499p/m)",
1196
+ "ent": "Enterprise ($14999p/m)",
1200
1197
  }
1201
1198
 
1202
1199
  # Human-friendly endpoint names
1203
- endpoint_name = {"fast": "Fast Search", "visit": "URL Visits", "slow": "Bulk Search"}
1200
+ endpoint_name = {"fast": "Search", "visit": "URL Visits", "slow": "Bulk Search"}
1204
1201
 
1205
1202
  out = [
1206
1203
  "Below are the rate limits for all NOSIBLE plans.",
1207
1204
  "To upgrade your package, visit https://www.nosible.ai/products.\n",
1205
+ "Unless otherwise indicated, bulk searches are limited to one-at-a-time per API key.\n"
1208
1206
  ]
1209
1207
 
1210
1208
  user_plan = self._get_user_plan()
1211
1209
  current_plan = ""
1210
+ cpm_counter = 4.0
1212
1211
 
1213
1212
  # Preserve the order you care about:
1214
1213
  for plan in ["test", "basic", "pro", "pro+", "bus", "bus+", "ent"]:
@@ -1217,17 +1216,19 @@ class Nosible:
1217
1216
  current_plan = " (Your current plan)"
1218
1217
 
1219
1218
  out.append(f"{name}:{current_plan}")
1220
- out.append("| Endpoint | Per Month | Per Day | Per Minute |")
1221
- out.append("| ----------- | --------- | ------- | ---------- |")
1219
+ out.append("| Endpoint | Per Month | Per Minute | Effective CPM |")
1220
+ out.append("| ----------- | --------- | ---------- | ------------- |")
1222
1221
 
1223
1222
  for ep in ["fast", "visit", "slow"]:
1224
1223
  buckets = PLAN_RATE_LIMITS[plan][ep]
1225
1224
  # Find minute & day
1226
1225
  minute = next(limit for limit, i in buckets if i == 60)
1227
- day = next(limit for limit, i in buckets if i == 24 * 3600)
1228
- month = day * 30
1229
- out.append(f"| {endpoint_name[ep]:<11} | {month:>9} | {day:>7} | {minute:>10} |")
1226
+ month = next(limit for limit, i in buckets if i == 24 * 3600 * 30)
1227
+ cpm = f"${cpm_counter:.2f}"
1230
1228
 
1229
+ out.append(f"| {endpoint_name[ep]:<11} | {month:>9} | {minute:>10} | {cpm:>13} |")
1230
+
1231
+ cpm_counter = cpm_counter - 0.5
1231
1232
  out.append("") # Blank line
1232
1233
  current_plan = ""
1233
1234
 
@@ -1238,10 +1239,6 @@ class Nosible:
1238
1239
  Close the Nosible client, shutting down the HTTP session
1239
1240
  and thread pool to release network and threading resources.
1240
1241
 
1241
- Returns
1242
- -------
1243
- None
1244
-
1245
1242
  Examples
1246
1243
  --------
1247
1244
  >>> from nosible import Nosible
@@ -1287,6 +1284,8 @@ class Nosible:
1287
1284
  If the user API key is invalid.
1288
1285
  ValueError
1289
1286
  If the user hits their rate limit.
1287
+ ValueError
1288
+ If the user is making too many concurrent searches.
1290
1289
  ValueError
1291
1290
  If an unexpected error occurs.
1292
1291
  ValueError
@@ -1320,6 +1319,8 @@ class Nosible:
1320
1319
  raise ValueError("You made a bad request.")
1321
1320
  if response.status_code == 429:
1322
1321
  raise ValueError("You have hit your rate limit.")
1322
+ if response.status_code == 409:
1323
+ raise ValueError("Too many concurrent searches.")
1323
1324
  if response.status_code == 500:
1324
1325
  raise ValueError("An unexpected error occurred.")
1325
1326
  if response.status_code == 502:
@@ -1349,16 +1350,16 @@ class Nosible:
1349
1350
 
1350
1351
  Examples
1351
1352
  --------
1352
- >>> nos = Nosible(nosible_api_key="test+|xyz") # doctest: +SKIP
1353
+ >>> nos = Nosible(nosible_api_key="test+|xyz") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
1353
1354
  Traceback (most recent call last):
1354
1355
  ...
1355
- ValueError: test+ is not a valid plan prefix, your API key is invalid.
1356
+ ValueError: Your API key is not valid: test+ is not a valid plan prefix.
1356
1357
  """
1357
1358
  # Split off anything after the first '|'
1358
1359
  prefix = (self.nosible_api_key or "").split("|", 1)[0]
1359
1360
 
1360
- # Map prefixes -> human-friendly plan names
1361
- plans = {"test", "basic", "pro", "pro+", "bus", "bus+", "ent"}
1361
+ # Map prefixes -> plan names
1362
+ plans = {"test", "basic", "pro", "pro+", "bus", "bus+", "ent", "chat"}
1362
1363
 
1363
1364
  if prefix not in plans:
1364
1365
  raise ValueError(f"Your API key is not valid: {prefix} is not a valid plan prefix.")
@@ -1388,11 +1389,10 @@ class Nosible:
1388
1389
 
1389
1390
  Examples
1390
1391
  --------
1391
-
1392
- >>> from nosible import Nosible # doctest: +SKIP
1393
- >>> nos = Nosible(llm_api_key=None) # doctest: +SKIP
1394
- >>> nos.llm_api_key = None # doctest: +SKIP
1395
- >>> nos._generate_expansions("anything") # doctest: +SKIP
1392
+ >>> from nosible import Nosible
1393
+ >>> nos = Nosible(llm_api_key=None)
1394
+ >>> nos.llm_api_key = None
1395
+ >>> nos._generate_expansions("anything") # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
1396
1396
  Traceback (most recent call last):
1397
1397
  ...
1398
1398
  ValueError: LLM API key is required for generating expansions.
@@ -1503,35 +1503,31 @@ class Nosible:
1503
1503
  Parameters
1504
1504
  ----------
1505
1505
  publish_start : str, optional
1506
- Earliest published date filter.
1506
+ Start date for when the document was published (ISO format).
1507
1507
  publish_end : str, optional
1508
- Latest published date filter.
1509
- include_netlocs : list of str, optional
1510
- Domains to whitelist.
1511
- exclude_netlocs : list of str, optional
1512
- Domains to blacklist.
1508
+ End date for when the document was published (ISO format).
1513
1509
  visited_start : str, optional
1514
- Earliest visit date filter.
1510
+ Start date for when the document was visited by NOSIBLE (ISO format).
1515
1511
  visited_end : str, optional
1516
- Latest visit date filter.
1512
+ End date for when the document was visited by NOSIBLE (ISO format).
1517
1513
  certain : bool, optional
1518
- True if we are 100% sure of the date.
1519
- include_languages : list of str, optional
1520
- Languages to include (Max: 50).
1521
- exclude_languages : list of str, optional
1522
- Languages to exclude (Max: 50).
1514
+ Only include documents where we are 100% sure of the date.
1523
1515
  include_netlocs : list of str, optional
1524
- Only include results from these domains (Max: 50).
1516
+ List of netlocs (domains) to include in the search. (Max: 50)
1525
1517
  exclude_netlocs : list of str, optional
1526
- Exclude results from these domains (Max: 50).
1518
+ List of netlocs (domains) to exclude in the search. (Max: 50)
1519
+ include_languages : list of str, optional
1520
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
1521
+ exclude_languages : list of str, optional
1522
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
1527
1523
  include_companies : list of str, optional
1528
- Public Company Google KG IDs to require (Max: 50).
1524
+ Google KG IDs of public companies to require (Max: 50).
1529
1525
  exclude_companies : list of str, optional
1530
- Public Company Google KG IDs to forbid (Max: 50).
1526
+ Google KG IDs of public companies to forbid (Max: 50).
1531
1527
  include_docs : list of str, optional
1532
- URL hashes of documents to include (Max: 50).
1528
+ URL hashes of docs to include (Max: 50).
1533
1529
  exclude_docs : list of str, optional
1534
- URL hashes of documents to exclude (Max: 50).
1530
+ URL hashes of docs to exclude (Max: 50).
1535
1531
 
1536
1532
  Returns
1537
1533
  -------
@@ -1540,20 +1536,19 @@ class Nosible:
1540
1536
 
1541
1537
  Raises
1542
1538
  ------
1543
-
1544
1539
  ValueError
1545
1540
  If more than 50 items in a filter are given.
1546
1541
  """
1547
1542
  # Validate list lengths
1548
1543
  for name, lst in [
1549
- ('include_netlocs', include_netlocs),
1550
- ('exclude_netlocs', exclude_netlocs),
1551
- ('include_languages', include_languages),
1552
- ('exclude_languages', exclude_languages),
1553
- ('include_companies', include_companies),
1554
- ('exclude_companies', exclude_companies),
1555
- ('include_docs', include_docs),
1556
- ('exclude_docs', exclude_docs),
1544
+ ("include_netlocs", include_netlocs),
1545
+ ("exclude_netlocs", exclude_netlocs),
1546
+ ("include_languages", include_languages),
1547
+ ("exclude_languages", exclude_languages),
1548
+ ("include_companies", include_companies),
1549
+ ("exclude_companies", exclude_companies),
1550
+ ("include_docs", include_docs),
1551
+ ("exclude_docs", exclude_docs),
1557
1552
  ]:
1558
1553
  if lst is not None and len(lst) > 50:
1559
1554
  raise ValueError(f"Too many items for '{name}' filter ({len(lst)}); maximum allowed is 50.")
@@ -1590,10 +1585,10 @@ class Nosible:
1590
1585
  variants = set()
1591
1586
  for n in include_netlocs:
1592
1587
  variants.add(n)
1593
- if n.startswith('www.'):
1588
+ if n.startswith("www."):
1594
1589
  variants.add(n[4:])
1595
1590
  else:
1596
- variants.add('www.' + n)
1591
+ variants.add("www." + n)
1597
1592
  in_list = ", ".join(f"'{v}'" for v in sorted(variants))
1598
1593
  clauses.append(f"netloc IN ({in_list})")
1599
1594
 
@@ -1602,10 +1597,10 @@ class Nosible:
1602
1597
  variants = set()
1603
1598
  for n in exclude_netlocs:
1604
1599
  variants.add(n)
1605
- if n.startswith('www.'):
1600
+ if n.startswith("www."):
1606
1601
  variants.add(n[4:])
1607
1602
  else:
1608
- variants.add('www.' + n)
1603
+ variants.add("www." + n)
1609
1604
  ex_list = ", ".join(f"'{v}'" for v in sorted(variants))
1610
1605
  clauses.append(f"netloc NOT IN ({ex_list})")
1611
1606
 
@@ -1698,7 +1693,7 @@ class Nosible:
1698
1693
  except Exception:
1699
1694
  return False
1700
1695
 
1701
- def __enter__(self):
1696
+ def __enter__(self) -> "Nosible":
1702
1697
  """
1703
1698
  Enter the context manager, returning this client instance.
1704
1699
 
@@ -1709,32 +1704,42 @@ class Nosible:
1709
1704
  """
1710
1705
  return self
1711
1706
 
1712
- def __exit__(self, exc_type: type, exc: Exception, tb: traceback):
1707
+ def __exit__(
1708
+ self,
1709
+ _exc_type: typing.Optional[type[BaseException]],
1710
+ _exc_val: typing.Optional[BaseException],
1711
+ _exc_tb: typing.Optional[types.TracebackType],
1712
+ ) -> typing.Optional[bool]:
1713
1713
  """
1714
- Exit the context manager, ensuring cleanup of resources.
1714
+ Always clean up (self.close()), but let exceptions propagate.
1715
+ Return True only if you really want to suppress an exception.
1715
1716
 
1716
1717
  Parameters
1717
1718
  ----------
1718
- exc_type : type or None
1719
- Exception type if raised.
1720
- exc : Exception or None
1721
- Exception instance if raised.
1722
- tb : traceback or None
1723
- Traceback if exception was raised.
1719
+ exc_type : Optional[type[BaseException]]
1720
+ The type of the exception raised, if any.
1721
+ exc_val : Optional[BaseException]
1722
+ The exception instance, if any.
1723
+ exc_tb : Optional[types.TracebackType]
1724
+ The traceback object, if any.
1724
1725
 
1725
1726
  Returns
1726
1727
  -------
1727
- None
1728
+ Optional[bool]
1729
+ False to propagate exceptions, True to suppress them.
1728
1730
  """
1729
- self.close()
1731
+ try:
1732
+ self.close()
1733
+ except Exception as cleanup_err:
1734
+ # optional: log or re-raise, but don’t hide the original exc
1735
+ print(f"Cleanup failed: {cleanup_err!r}")
1736
+ # Return False (or None) => exceptions inside the with‐block are re-raised.
1737
+ return False
1730
1738
 
1731
1739
  def __del__(self):
1732
1740
  """
1733
1741
  Destructor to ensure resources are cleaned up if not explicitly closed.
1734
1742
 
1735
- Returns
1736
- -------
1737
- None
1738
1743
  """
1739
1744
  # Ensure it's called
1740
1745
  self.close()