PyPI - nosible - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.10__py3-none-any.whl - Mend

nosible 0.3.6py3-none-any.whl → 0.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

nosible/classes/result.py +17 -17
nosible/classes/result_set.py +5 -0
nosible/classes/snippet.py +0 -7
nosible/classes/web_page.py +0 -2
nosible/nosible_client.py +72 -41
nosible/utils/rate_limiter.py +9 -77
{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/METADATA +2 -2
nosible-0.3.10.dist-info/RECORD +16 -0
{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/WHEEL +1 -1
nosible-0.3.6.dist-info/RECORD +0 -16
{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/licenses/LICENSE +0 -0
{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/top_level.txt +0 -0

nosible/classes/result.py CHANGED Viewed

@@ -36,6 +36,8 @@ class Result:
         The author of the content.
     content : str, optional
         The main content or body of the search result.
+    best_chunk : str, optional
+        The best snippet of text that matches your question from the search result.
     language : str, optional
         The language code of the content (e.g., 'en' for English).
     similarity : float, optional
@@ -105,6 +107,8 @@ class Result:
     """The author of the content."""
     content: str | None = None
     """The main content or body of the search result."""
+    best_chunk: str | None = None
+    """The best snippet of text that matches your question from the search result."""
     language: str | None = None
     """The language code of the content (e.g., 'en' for English)."""
     similarity: float | None = None
@@ -150,23 +154,14 @@ class Result:
         >>> result = Result(title="Example Domain", similarity=0.9876)
         >>> print(str(result))
           0.99 | Example Domain
-        >>> result = Result(title=None, similarity=None)
-        >>> print(str(result))
-        {
-            "url": null,
-            "title": null,
-            "description": null,
-            "netloc": null,
-            "published": null,
-            "visited": null,
-            "author": null,
-            "content": null,
-            "language": null,
-            "similarity": null,
-            "url_hash": null
-        }
         """
-        return print_dict(self.to_dict())
+        # Get the full dictionary
+        data = self.to_dict()
+        # Create a new dictionary excluding keys where the value is None
+        clean_data = {k: v for k, v in data.items() if v is not None}
+        return print_dict(clean_data)
     def __getitem__(self, key: str) -> str | float | bool | None:
         """
@@ -519,6 +514,11 @@ class Result:
         try:
             from nosible import Search
+            # Exclude the original doc from the new search.
+            exclude_docs_list = list(exclude_docs) if exclude_docs else []
+            if self.url_hash and self.url_hash not in exclude_docs_list:
+                exclude_docs_list.append(self.url_hash)
             s = Search(
                 question=self.title,
                 expansions=[],
@@ -537,7 +537,7 @@ class Result:
                 include_companies=include_companies,
                 exclude_companies=exclude_companies,
                 include_docs=include_docs,
-                exclude_docs=exclude_docs,
+                exclude_docs=exclude_docs_list,
                 brand_safety=brand_safety,
                 language=language,
                 continent=continent,

nosible/classes/result_set.py CHANGED Viewed

@@ -54,6 +54,7 @@ class ResultSet(Iterator[Result]):
         "visited",
         "author",
         "content",
+        "best_chunk",
         "language",
         "similarity",
         "url_hash",
@@ -1004,6 +1005,7 @@ class ResultSet(Iterator[Result]):
                     visited=row.get("visited"),
                     author=row.get("author"),
                     content=row.get("content"),
+                    best_chunk=row.get("best_chunk"),
                     language=row.get("language"),
                     similarity=row.get("similarity"),
                     url_hash=row.get("url_hash"),
@@ -1113,6 +1115,7 @@ class ResultSet(Iterator[Result]):
                     visited=row.get("visited"),
                     author=row.get("author"),
                     content=row.get("content"),
+                    best_chunk=row.get("best_chunk"),
                     language=row.get("language"),
                     similarity=row.get("semantics", {}).get("similarity", row.get("similarity")),
                     url_hash=row.get("url_hash"),
@@ -1212,6 +1215,7 @@ class ResultSet(Iterator[Result]):
                                 visited=data.get("visited"),
                                 author=data.get("author"),
                                 content=data.get("content"),
+                                best_chunk=data.get("best_chunk"),
                                 language=data.get("language"),
                                 similarity=data.get("similarity"),
                                 url_hash=data.get("url_hash"),
@@ -1449,6 +1453,7 @@ class ResultSet(Iterator[Result]):
                     visited=d.get("visited"),
                     author=d.get("author"),
                     content=d.get("content"),
+                    best_chunk=d.get("best_chunk"),
                     language=d.get("language"),
                     similarity=d.get("similarity", d.get("semantics", {}).get("similarity")),
                     url_hash=d.get("url_hash"),

nosible/classes/snippet.py CHANGED Viewed

@@ -30,18 +30,13 @@ class Snippet:
         The words in the snippet.
     links : list or None
         List of links associated with the snippet.
-    companies : list or None
-        List of companies mentioned in the snippet.
     Examples
     --------
     >>> snippet = Snippet(content="Example snippet", language="en")
     >>> print(snippet.content)
     Example snippet
     """
     content: str = field(default=None, repr=True, compare=True)
     """The text content of the snippet."""
     images: list = field(default=None, repr=True, compare=False)
@@ -62,8 +57,6 @@ class Snippet:
     """The words in the snippet."""
     links: list = field(default=None, repr=False, compare=False)
     """List of links associated with the snippet."""
-    companies: list = field(default=None, repr=False, compare=False)
-    """List of companies mentioned in the snippet."""
     def __str__(self):
         """

nosible/classes/web_page.py CHANGED Viewed

@@ -40,8 +40,6 @@ class WebPageData:
     {'description': 'Example'}
     """
-    companies: list = None
-    """A list of companies mentioned in the webpage, if applicable. (GKIDS)"""
     full_text: str = None
     """The full text content of the webpage."""
     languages: dict = None

nosible/nosible_client.py CHANGED Viewed

@@ -29,7 +29,7 @@ from nosible.classes.search_set import SearchSet
 from nosible.classes.snippet_set import SnippetSet
 from nosible.classes.web_page import WebPageData
 from nosible.utils.json_tools import json_loads
-from nosible.utils.rate_limiter import PLAN_RATE_LIMITS, RateLimiter, _rate_limited
+from nosible.utils.rate_limiter import RateLimiter, _rate_limited
 # Set up a module‐level logger.
 logger = logging.getLogger(__name__)
@@ -202,11 +202,6 @@ class Nosible:
         logging.getLogger("httpx").setLevel(logging.WARNING)
         logging.getLogger("httpcore").setLevel(logging.WARNING)
-        self._limiters = {
-            endpoint: [RateLimiter(calls, period) for calls, period in buckets]
-            for endpoint, buckets in PLAN_RATE_LIMITS[self._get_user_plan()].items()
-        }
         # Define retry decorator
         self._post = retry(
             reraise=True,
@@ -230,7 +225,34 @@ class Nosible:
         self._executor = ThreadPoolExecutor(max_workers=self.concurrency)
         # Headers
-        self.headers = {"Accept-Encoding": "gzip", "Content-Type": "application/json", "api-key": self.nosible_api_key}
+        self.headers = {
+            "Accept-Encoding": "gzip",
+            "Content-Type": "application/json",
+            "api-key": self.nosible_api_key
+        }
+        # Wrap _get_limits with retry.
+        self._get_limits = retry(
+            reraise=True,
+            stop=stop_after_attempt(self.retries) | stop_after_delay(self.timeout),
+            wait=wait_exponential(multiplier=1, min=1, max=20),
+            retry=retry_if_exception_type(httpx.RequestError),
+            before_sleep=before_sleep_log(self.logger, logging.WARNING),
+        )(self._get_limits)
+        raw_limits = self._get_limits()
+        # Map API query_type -> your decorator endpoint keys
+        mapped_limits = {
+            "fast": raw_limits.get("fast", []),
+            "bulk": raw_limits.get("slow", []),
+            "scrape-url": raw_limits.get("visit", []),
+        }
+        self._limiters = {
+            endpoint: [RateLimiter(calls, period) for calls, period in buckets]
+            for endpoint, buckets in mapped_limits.items()
+        }
         # Filters
         self.publish_start = publish_start
@@ -1522,7 +1544,6 @@ class Nosible:
         response_data = data["response"]
         return WebPageData(
-            companies=response_data.get("companies"),
             full_text=response_data.get("full_text"),
             languages=response_data.get("languages"),
             metadata=response_data.get("metadata"),
@@ -1603,7 +1624,6 @@ class Nosible:
         return filtered
     def close(self):
         """
         Close the Nosible client, shutting down the HTTP session
@@ -1703,41 +1723,52 @@ class Nosible:
         return response
-    def _get_user_plan(self) -> str:
+    def _get_limits(self) -> dict[str, list[tuple[int, float]]]:
+        """
+        TODO
         """
-        Determine the user's subscription plan from the API key.
+        url = "https://www.nosible.ai/search/v2/limits"
+        resp = self._session.get(
+            url=url,
+            headers=self.headers,
+            timeout=self.timeout,
+            follow_redirects=True,
+        )
-        The `nosible_api_key` is expected to start with a plan prefix followed by
-        a pipe (`|`) and any additional data. This method splits on the first
-        pipe character, validates the prefix against supported plans, and returns it.
+        if resp.status_code == 401:
+            raise ValueError("Your API key is not valid.")
+        if resp.status_code == 429:
+            raise ValueError("You have hit your rate limit.")
+        if resp.status_code == 409:
+            raise ValueError("Too many concurrent searches.")
+        if resp.status_code == 502:
+            raise ValueError("NOSIBLE is currently restarting.")
+        if resp.status_code == 504:
+            raise ValueError("NOSIBLE is currently overloaded.")
-        Returns
-        -------
-        str
-            The plan you are currently on.
+        resp.raise_for_status()
-        Raises
-        ------
-        ValueError
-            If the extracted prefix is not one of the recognized plan names.
+        try:
+            data = resp.json()
+        except Exception as e:
+            raise ValueError("Invalid JSON response from /limits") from e
-        Examples
-        --------
-        >>> nos = Nosible(nosible_api_key="test+|xyz")  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-        Traceback (most recent call last):
-        ...
-        ValueError: Your API key is not valid: test+ is not a valid plan prefix.
-        """
-        # Split off anything after the first '|'
-        prefix = (self.nosible_api_key or "").split("|", 1)[0]
+        limits_list = data.get("limits")
+        if not isinstance(limits_list, list):
+            raise ValueError(f"Invalid /limits response shape: {data!r}")
+        grouped: dict[str, list[tuple[int, float]]] = {}
+        for item in limits_list:
+            query_type = item.get("query_type")
+            duration = item.get("duration_seconds")
+            limit = item.get("limit")
-        # Map prefixes -> plan names
-        plans = {"test", "self", "basic", "pro", "pro+", "bus", "bus+", "ent", "chat", "cons", "stup", "busn", "prod"}
+            if query_type is None or duration is None or limit is None:
+                raise ValueError(f"Invalid limit entry: {item!r}")
-        if prefix not in plans:
-            raise ValueError(f"Your API key is not valid: {prefix} is not a valid plan prefix.")
+            grouped.setdefault(str(query_type), []).append((int(limit), float(duration)))
-        return prefix
+        return grouped
     def _generate_expansions(self, question: Union[str, Search]) -> list:
         """
@@ -2039,13 +2070,13 @@ class Nosible:
         if include_docs:
             # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
-            doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)
-            clauses.append(f"doc_hash IN ({doc_hashes})")
+            docs = ", ".join(f"'{doc}'" for doc in include_docs)
+            clauses.append(f"doc IN ({docs})")
         if exclude_docs:
             # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
-            doc_hashes = ", ".join(f"'{doc}'" for doc in exclude_docs)
-            clauses.append(f"doc_hash NOT IN ({doc_hashes})")
+            docs = ", ".join(f"'{doc}'" for doc in exclude_docs)
+            clauses.append(f"doc NOT IN ({docs})")
         # Join everything
         if clauses:
@@ -2092,7 +2123,7 @@ class Nosible:
             "netloc",
             "language",
             "companies"
-            "doc_hash",
+            "doc",
         ]
         import polars as pl  # Lazy import

nosible/utils/rate_limiter.py CHANGED Viewed

@@ -4,81 +4,10 @@ import time
 from pyrate_limiter import Limiter, Rate
 from pyrate_limiter.buckets.in_memory_bucket import InMemoryBucket
-from pyrate_limiter.exceptions import BucketFullException
+from pyrate_limiter.exceptions import BucketFullException, LimiterDelayException
 log = logging.getLogger(__name__)
-PLAN_RATE_LIMITS = {
-    "test": {
-        # Per minute limit, then per month.
-        "scrape-url": [(60, 60), (300, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (300, 24 * 3600 * 30)],
-        "fast": [(60, 60), (3000, 24 * 3600 * 30)],
-    },
-    "basic": {
-        "scrape-url": [(60, 60), (1400, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (1400, 24 * 3600 * 30)],
-        "fast": [(60, 60), (14_000, 24 * 3600 * 30)],
-    },
-    "pro": {
-        "scrape-url": [(60, 60), (6700, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (6700, 24 * 3600 * 30)],
-        "fast": [(60, 60), (67_000, 24 * 3600 * 30)],
-    },
-    "pro+": {
-        "scrape-url": [(60, 60), (32_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (32_000, 24 * 3600 * 30)],
-        "fast": [(60, 60), (320_000, 24 * 3600 * 30)],
-    },
-    "bus": {
-        "scrape-url": [(60, 60), (200_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (200_000, 24 * 3600 * 30)],
-        "fast": [(60, 60), (2_000_000, 24 * 3600 * 30)],
-    },
-    "bus+": {
-        "scrape-url": [(60, 60), (500_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (500_000, 24 * 3600 * 30)],
-        "fast": [(120, 60), (5_000_000, 24 * 3600 * 30)],
-    },
-    "ent": {
-        "scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
-    },
-    "prod": {
-        "scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
-    },
-    # This plan is used for testing in the package
-    "chat": {
-        "scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
-        "fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
-    },
-    "self": {
-        "scrape-url": [(6000, 60), (1_500_000, 24 * 3600 * 30)],
-        "bulk": [(6000, 60), (1_500_000, 24 * 3600 * 30)],
-        "fast": [(36_000, 60), (15_000_000, 24 * 3600 * 30)],
-    },
-    "cons": {
-        "scrape-url": [(60, 60), (3000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (3000, 24 * 3600 * 30)],
-        "fast": [(120, 60), (30_000, 24 * 3600 * 30)],
-    },
-    "stup": {
-        "scrape-url": [(60, 60), (30_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (30_000, 24 * 3600 * 30)],
-        "fast": [(360, 60), (300_000, 24 * 3600 * 30)],
-    },
-    # This plan is used for testing in the package
-    "busn": {
-        "scrape-url": [(60, 60), (300_000, 24 * 3600 * 30)],
-        "bulk": [(60, 60), (300_000, 24 * 3600 * 30)],
-        "fast": [(360, 60), (3_000_000, 24 * 3600 * 30)],
-    },
-}
 def _rate_limited(endpoint):
     """
@@ -133,7 +62,7 @@ class RateLimiter:
         # Build our bucket
         bucket = InMemoryBucket([Rate(max_calls, period_ms)])
-        self._limiter = Limiter(bucket)
+        self._limiter = Limiter(bucket, max_delay=1000)
     def acquire(self) -> None:
         """
@@ -177,7 +106,7 @@ class RateLimiter:
                 # Ensure at least a small sleep if rounding to zero
                 time.sleep(wait_s)
-    def try_acquire(self) -> bool:
+    def try_acquire(self, name: str = None) -> bool:
         """
         Attempt to acquire a slot without blocking.
@@ -196,8 +125,11 @@ class RateLimiter:
         >>> rl.try_acquire()
         False
         """
+        key = name if name else self._GLOBAL_KEY
         try:
-            self._limiter.try_acquire(self._GLOBAL_KEY)
+            self._limiter.try_acquire(key)
             return True
-        except BucketFullException:
-            return False
+        except (BucketFullException, LimiterDelayException):
+            # Return False instead of crashing when the limit is hit
+            return False

{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nosible
-Version: 0.3.6
+Version: 0.3.10
 Summary: Python client for the NOSIBLE Search API
 Home-page: https://github.com/NosibleAI/nosible-py
 Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
@@ -31,7 +31,7 @@ Requires-Dist: polars
 Requires-Dist: duckdb
 Requires-Dist: openai
 Requires-Dist: tantivy
-Requires-Dist: pyrate-limiter
+Requires-Dist: pyrate-limiter<4
 Requires-Dist: tenacity
 Requires-Dist: cryptography
 Requires-Dist: pyarrow

nosible-0.3.10.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+nosible/__init__.py,sha256=11QmG9Wjprp_zB0VnPxGjqKwHmaoB0hoT8AGO6cGVMM,1426
+nosible/nosible_client.py,sha256=nq9WKGVCyarG5-3vXZOT67vLfGq7rlSEOBbqfLZpEE0,89644
+nosible/classes/result.py,sha256=DvUUt4hiZ8PJBGTIwkq5GFB4ors_2L9dApNlawSLesA,22978
+nosible/classes/result_set.py,sha256=9xuACJVp5n3pKECBjhkzNMb7PAQd4P1HmtkZH3KFAyo,52911
+nosible/classes/search.py,sha256=VrpuAsS4pxcirR6l6WStJN0gPIfahZ_9Cx3YdNKczSw,13860
+nosible/classes/search_set.py,sha256=VvtKXQ1_Ws_W-0p0C-wUvvdskeuXAyr65tpfvexAVw0,9895
+nosible/classes/snippet.py,sha256=K0o4aQbbvsnNzgnhZIu7RXLASkVO-R3iBIY7sCpNpLw,4787
+nosible/classes/snippet_set.py,sha256=0jPMDhJNCO02WhvY1QR1HedvADvBxRcN6x3FItEgSiI,5099
+nosible/classes/web_page.py,sha256=gAOBnqaK59f5erdRERxhvKmcwRVSXvhwJwyHIjPyeek,5732
+nosible/utils/json_tools.py,sha256=PcSMjcLEhbA626jAIn0SuD_1-4QDduapZUenTSt3N2E,4569
+nosible/utils/rate_limiter.py,sha256=Io7OCU6MmGoPswYj6PivtSNkFdzLaeam3tf29F5XeUQ,3980
+nosible-0.3.10.dist-info/licenses/LICENSE,sha256=8ifsV4DrsiKi8KVBFy8SBb3KXPXhofE3pYq07q1TSCQ,1117
+nosible-0.3.10.dist-info/METADATA,sha256=oWvzqQvtmgzhcaxIQ0APzsqoE_PSuSzVT_RWTYk5tQ8,6035
+nosible-0.3.10.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+nosible-0.3.10.dist-info/top_level.txt,sha256=mOconHuKcNJ1jTAj3DapQP_xB8YOmjTMyHg5txKH3uA,8
+nosible-0.3.10.dist-info/RECORD,,

{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

nosible-0.3.6.dist-info/RECORD DELETED Viewed

@@ -1,16 +0,0 @@
-nosible/__init__.py,sha256=11QmG9Wjprp_zB0VnPxGjqKwHmaoB0hoT8AGO6cGVMM,1426
-nosible/nosible_client.py,sha256=VOwc2nYOL-OtO9Xp3yyUOdJ3aUzr9_9lxDHJoCiADio,88665
-nosible/classes/result.py,sha256=c1YOFdhZOnSjdcNqpc3YjXxKcoJ2c0ik6wgfIsXTAP0,22697
-nosible/classes/result_set.py,sha256=8XrnnpGdm5lskqE0E7qSRR_mofPZfEHxtOFrkmfpbEM,52662
-nosible/classes/search.py,sha256=VrpuAsS4pxcirR6l6WStJN0gPIfahZ_9Cx3YdNKczSw,13860
-nosible/classes/search_set.py,sha256=VvtKXQ1_Ws_W-0p0C-wUvvdskeuXAyr65tpfvexAVw0,9895
-nosible/classes/snippet.py,sha256=m2qxgnMxIxx4ZOIMqUAViGLf7C1Y4NCGaioyEKw2-Zg,4994
-nosible/classes/snippet_set.py,sha256=0jPMDhJNCO02WhvY1QR1HedvADvBxRcN6x3FItEgSiI,5099
-nosible/classes/web_page.py,sha256=cvwQspxS0pU2nFgPLqnDtDWlLONHp1KwxerflHueLJ8,5838
-nosible/utils/json_tools.py,sha256=PcSMjcLEhbA626jAIn0SuD_1-4QDduapZUenTSt3N2E,4569
-nosible/utils/rate_limiter.py,sha256=zbzEGtVnHooknEbwQSK2dtk_afsmbM0diz5t9JF68Bw,6470
-nosible-0.3.6.dist-info/licenses/LICENSE,sha256=8ifsV4DrsiKi8KVBFy8SBb3KXPXhofE3pYq07q1TSCQ,1117
-nosible-0.3.6.dist-info/METADATA,sha256=CQtjTZlY1LhV_URUXbvfUBWmrFaDJ1BWg_MtM2qC0lk,6032
-nosible-0.3.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nosible-0.3.6.dist-info/top_level.txt,sha256=mOconHuKcNJ1jTAj3DapQP_xB8YOmjTMyHg5txKH3uA,8
-nosible-0.3.6.dist-info/RECORD,,

{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nosible-0.3.6.dist-info → nosible-0.3.10.dist-info}/top_level.txt RENAMED Viewed

File without changes

nosible 0.3.6__py3-none-any.whl → 0.3.10__py3-none-any.whl

nosible 0.3.6py3-none-any.whl → 0.3.10py3-none-any.whl