PyPI - nosible - Versions diffs - 0.3.6__tar.gz → 0.3.9__tar.gz - Mend

nosible 0.3.6tar.gz → 0.3.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{nosible-0.3.6/src/nosible.egg-info → nosible-0.3.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nosible
-Version: 0.3.6
+Version: 0.3.9
 Summary: Python client for the NOSIBLE Search API
 Home-page: https://github.com/NosibleAI/nosible-py
 Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton

{nosible-0.3.6 → nosible-0.3.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "nosible"
-version = "0.3.6"
+version = "0.3.9"
 description = "Python client for the NOSIBLE Search API"
 readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.9"
@@ -61,5 +61,5 @@ dev-dependencies = [
   "pytest-doctestplus",
   "pytest-xdist",
   "urllib3==1.26.15",
-  "hishel",
+  "hishel[async]",
 ]

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/result.py RENAMED Viewed

@@ -36,6 +36,8 @@ class Result:
         The author of the content.
     content : str, optional
         The main content or body of the search result.
+    best_chunk : str, optional
+        The best snippet of text that matches your question from the search result.
     language : str, optional
         The language code of the content (e.g., 'en' for English).
     similarity : float, optional
@@ -105,6 +107,8 @@ class Result:
     """The author of the content."""
     content: str | None = None
     """The main content or body of the search result."""
+    best_chunk: str | None = None
+    """The best snippet of text that matches your question from the search result."""
     language: str | None = None
     """The language code of the content (e.g., 'en' for English)."""
     similarity: float | None = None
@@ -150,23 +154,14 @@ class Result:
         >>> result = Result(title="Example Domain", similarity=0.9876)
         >>> print(str(result))
           0.99 | Example Domain
-        >>> result = Result(title=None, similarity=None)
-        >>> print(str(result))
-        {
-            "url": null,
-            "title": null,
-            "description": null,
-            "netloc": null,
-            "published": null,
-            "visited": null,
-            "author": null,
-            "content": null,
-            "language": null,
-            "similarity": null,
-            "url_hash": null
-        }
         """
-        return print_dict(self.to_dict())
+        # Get the full dictionary
+        data = self.to_dict()
+        # Create a new dictionary excluding keys where the value is None
+        clean_data = {k: v for k, v in data.items() if v is not None}
+        return print_dict(clean_data)
     def __getitem__(self, key: str) -> str | float | bool | None:
         """
@@ -519,6 +514,11 @@ class Result:
         try:
             from nosible import Search
+            # Exclude the original doc from the new search.
+            exclude_docs_list = list(exclude_docs) if exclude_docs else []
+            if self.url_hash and self.url_hash not in exclude_docs_list:
+                exclude_docs_list.append(self.url_hash)
             s = Search(
                 question=self.title,
                 expansions=[],
@@ -537,7 +537,7 @@ class Result:
                 include_companies=include_companies,
                 exclude_companies=exclude_companies,
                 include_docs=include_docs,
-                exclude_docs=exclude_docs,
+                exclude_docs=exclude_docs_list,
                 brand_safety=brand_safety,
                 language=language,
                 continent=continent,

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/result_set.py RENAMED Viewed

@@ -54,6 +54,7 @@ class ResultSet(Iterator[Result]):
         "visited",
         "author",
         "content",
+        "best_chunk",
         "language",
         "similarity",
         "url_hash",
@@ -1004,6 +1005,7 @@ class ResultSet(Iterator[Result]):
                     visited=row.get("visited"),
                     author=row.get("author"),
                     content=row.get("content"),
+                    best_chunk=row.get("best_chunk"),
                     language=row.get("language"),
                     similarity=row.get("similarity"),
                     url_hash=row.get("url_hash"),
@@ -1113,6 +1115,7 @@ class ResultSet(Iterator[Result]):
                     visited=row.get("visited"),
                     author=row.get("author"),
                     content=row.get("content"),
+                    best_chunk=row.get("best_chunk"),
                     language=row.get("language"),
                     similarity=row.get("semantics", {}).get("similarity", row.get("similarity")),
                     url_hash=row.get("url_hash"),
@@ -1212,6 +1215,7 @@ class ResultSet(Iterator[Result]):
                                 visited=data.get("visited"),
                                 author=data.get("author"),
                                 content=data.get("content"),
+                                best_chunk=data.get("best_chunk"),
                                 language=data.get("language"),
                                 similarity=data.get("similarity"),
                                 url_hash=data.get("url_hash"),
@@ -1449,6 +1453,7 @@ class ResultSet(Iterator[Result]):
                     visited=d.get("visited"),
                     author=d.get("author"),
                     content=d.get("content"),
+                    best_chunk=d.get("best_chunk"),
                     language=d.get("language"),
                     similarity=d.get("similarity", d.get("semantics", {}).get("similarity")),
                     url_hash=d.get("url_hash"),

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/snippet.py RENAMED Viewed

@@ -30,18 +30,13 @@ class Snippet:
         The words in the snippet.
     links : list or None
         List of links associated with the snippet.
-    companies : list or None
-        List of companies mentioned in the snippet.
     Examples
     --------
     >>> snippet = Snippet(content="Example snippet", language="en")
     >>> print(snippet.content)
     Example snippet
     """
     content: str = field(default=None, repr=True, compare=True)
     """The text content of the snippet."""
     images: list = field(default=None, repr=True, compare=False)
@@ -62,8 +57,6 @@ class Snippet:
     """The words in the snippet."""
     links: list = field(default=None, repr=False, compare=False)
     """List of links associated with the snippet."""
-    companies: list = field(default=None, repr=False, compare=False)
-    """List of companies mentioned in the snippet."""
     def __str__(self):
         """

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/web_page.py RENAMED Viewed

@@ -40,8 +40,6 @@ class WebPageData:
     {'description': 'Example'}
     """
-    companies: list = None
-    """A list of companies mentioned in the webpage, if applicable. (GKIDS)"""
     full_text: str = None
     """The full text content of the webpage."""
     languages: dict = None

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/nosible_client.py RENAMED Viewed

@@ -1522,7 +1522,6 @@ class Nosible:
         response_data = data["response"]
         return WebPageData(
-            companies=response_data.get("companies"),
             full_text=response_data.get("full_text"),
             languages=response_data.get("languages"),
             metadata=response_data.get("metadata"),
@@ -2039,13 +2038,13 @@ class Nosible:
         if include_docs:
             # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
-            doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)
-            clauses.append(f"doc_hash IN ({doc_hashes})")
+            docs = ", ".join(f"'{doc}'" for doc in include_docs)
+            clauses.append(f"doc IN ({docs})")
         if exclude_docs:
             # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
-            doc_hashes = ", ".join(f"'{doc}'" for doc in exclude_docs)
-            clauses.append(f"doc_hash NOT IN ({doc_hashes})")
+            docs = ", ".join(f"'{doc}'" for doc in exclude_docs)
+            clauses.append(f"doc NOT IN ({docs})")
         # Join everything
         if clauses:
@@ -2092,7 +2091,7 @@ class Nosible:
             "netloc",
             "language",
             "companies"
-            "doc_hash",
+            "doc",
         ]
         import polars as pl  # Lazy import

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/utils/rate_limiter.py RENAMED Viewed

@@ -4,7 +4,7 @@ import time
 from pyrate_limiter import Limiter, Rate
 from pyrate_limiter.buckets.in_memory_bucket import InMemoryBucket
-from pyrate_limiter.exceptions import BucketFullException
+from pyrate_limiter.exceptions import BucketFullException, LimiterDelayException
 log = logging.getLogger(__name__)
@@ -133,7 +133,7 @@ class RateLimiter:
         # Build our bucket
         bucket = InMemoryBucket([Rate(max_calls, period_ms)])
-        self._limiter = Limiter(bucket)
+        self._limiter = Limiter(bucket, max_delay=1000)
     def acquire(self) -> None:
         """
@@ -177,7 +177,7 @@ class RateLimiter:
                 # Ensure at least a small sleep if rounding to zero
                 time.sleep(wait_s)
-    def try_acquire(self) -> bool:
+    def try_acquire(self, name: str = None) -> bool:
         """
         Attempt to acquire a slot without blocking.
@@ -196,8 +196,11 @@ class RateLimiter:
         >>> rl.try_acquire()
         False
         """
+        key = name if name else self._GLOBAL_KEY
         try:
-            self._limiter.try_acquire(self._GLOBAL_KEY)
+            self._limiter.try_acquire(key)
             return True
-        except BucketFullException:
-            return False
+        except (BucketFullException, LimiterDelayException):
+            # Return False instead of crashing when the limit is hit
+            return False

{nosible-0.3.6 → nosible-0.3.9/src/nosible.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nosible
-Version: 0.3.6
+Version: 0.3.9
 Summary: Python client for the NOSIBLE Search API
 Home-page: https://github.com/NosibleAI/nosible-py
 Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton

{nosible-0.3.6 → nosible-0.3.9}/tests/test_01_nosible.py RENAMED Viewed

@@ -1,12 +1,9 @@
-import json
 import pytest
-import os
-import time
 import re
-import polars as pl
+import pytest
-from nosible import Nosible, Result, ResultSet, Search, Snippet, SnippetSet
+from nosible import Nosible, ResultSet, Search, SnippetSet
 from nosible.classes.search_set import SearchSet
 from nosible.classes.web_page import WebPageData
@@ -92,9 +89,6 @@ def test_validate_sql():
     assert not Nosible()._validate_sql(sql="SELECT * FROM missing_table")
-# —— Your additional tests —— #
 def test_search_minimal(search_data):
     # from your snippet: isinstance(search_data, ResultSet)
     assert isinstance(search_data, ResultSet)

{nosible-0.3.6 → nosible-0.3.9}/tests/test_02_results.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import pytest
-from polars.dependencies import pandas as pd
+import pandas as pd
 from nosible import Result, ResultSet
@@ -84,8 +84,6 @@ def test_resultset_to_dict(search_data):
         assert "published" in res
         assert "similarity" in res
         assert res["url_hash"] == key
-    # results_copy_from_dict = ResultSet.from_dict(results_dict)
-    # assert results == results_copy_from_dict
 # to_dicts
@@ -149,3 +147,32 @@ def test_resultset_getitem(search_data):
         _ = search_data[len(search_data)]  # Out of range index
     with pytest.raises(TypeError):
         _ = search_data["invalid"]  # Invalid type for index
+def test_similar_excludes_current_document():
+    """
+    Test that the similar method properly excludes the current document from search results.
+    This test creates a Nosible client, performs a fast search, takes the first result,
+    and verifies that calling similar() on that result excludes it from the returned results.
+    """
+    from nosible import Nosible
+    # Create a Nosible client (similar to test_01_nosible.py)
+    with Nosible(concurrency=1) as nos:
+        # Perform a search to get some results
+        search_results = nos.fast_search(question="Hedge funds seek to expand into private credit", n_results=10)
+        # Get the first result
+        first_result = search_results[0]
+        # Call similar() on the first result
+        similar_results = first_result.similar(client=nos, n_results=10)
+        # Verify that the first result is NOT in the similar results
+        # We check by comparing URL hashes
+        similar_hashes = [r.url_hash for r in similar_results if r.url_hash]
+        assert first_result.url_hash not in similar_hashes, f"Original result URL hash {first_result.url_hash} should not be in similar results"
+        # Also verify that similar results were actually returned (should be non-empty)
+        assert len(similar_results) >= 0, "Similar results should be returned (may be empty if no similar docs found)"

{nosible-0.3.6 → nosible-0.3.9}/tests/test_04_snippets.py RENAMED Viewed

@@ -1,5 +1,4 @@
-from nosible import Snippet, SnippetSet, WebPageData
-import pytest
+from nosible import Snippet, SnippetSet
 def test_snippet_initialization(snippets_data):

{nosible-0.3.6 → nosible-0.3.9}/LICENSE RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/README.md RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/setup.cfg RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/setup.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/__init__.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/search.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/search_set.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/snippet_set.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible/utils/json_tools.py RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/requires.txt RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/top_level.txt RENAMED Viewed

File without changes

{nosible-0.3.6 → nosible-0.3.9}/tests/test_03_search_searchset.py RENAMED Viewed

File without changes

nosible 0.3.6__tar.gz → 0.3.9__tar.gz

nosible 0.3.6tar.gz → 0.3.9tar.gz