PyPI - nosible - Versions diffs - 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

nosible 0.1.8py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

nosible/classes/result.py +69 -106
nosible/classes/result_set.py +121 -115
nosible/classes/search.py +83 -88
nosible/classes/search_set.py +27 -12
nosible/classes/snippet.py +57 -74
nosible/classes/snippet_set.py +62 -63
nosible/classes/web_page.py +39 -103
nosible/nosible_client.py +551 -234
nosible/utils/json_tools.py +58 -8
nosible/utils/question_builder.py +131 -0
nosible/utils/rate_limiter.py +30 -24
{nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/METADATA +27 -49
nosible-0.2.1.dist-info/RECORD +17 -0
nosible-0.1.8.dist-info/RECORD +0 -16
{nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/WHEEL +0 -0
{nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/licenses/LICENSE +0 -0
{nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/top_level.txt +0 -0

nosible/classes/search.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from __future__ import annotations
+from dataclasses import asdict, dataclass
 from typing import TYPE_CHECKING
-from nosible.utils.json_tools import json_dumps, json_loads
+from nosible.utils.json_tools import json_dumps, json_loads, print_dict
 if TYPE_CHECKING:
     from nosible.classes.search_set import SearchSet
+@dataclass(init=True, repr=True, eq=True)
 class Search:
     """
     Represents the parameters for a search operation.
@@ -31,40 +33,40 @@ class Search:
         Number of context documents to retrieve.
     algorithm : str, optional
         Search algorithm to use.
-    output_type : str, optional
-        Type of output to produce.
+    min_similarity : float
+        Results must have at least this similarity score.
+    must_include: list of str
+        Only results mentioning these strings will be included.
+    must_exclude : list of str
+        Any result mentioning these strings will be excluded.
     autogenerate_expansions : bool, default=False
         Do you want to generate expansions automatically using a LLM?
     publish_start : str, optional
-        Start date for published documents (ISO format).
+        Start date for when the document was published (ISO format).
     publish_end : str, optional
-        End date for published documents (ISO format).
-    include_netlocs : list of str, optional
-        List of netlocs (domains) to include in the search.
-    exclude_netlocs : list of str, optional
-        List of netlocs (domains) to exclude from the search.
+        End date for when the document was published (ISO format).
     visited_start : str, optional
-        Start date for visited documents (ISO format).
+        Start date for when the document was visited by NOSIBLE (ISO format).
     visited_end : str, optional
-        End date for visited documents (ISO format).
+        End date for when the document was visited by NOSIBLE (ISO format).
     certain : bool, optional
-        Whether to only include certain results.
-    include_languages : list of str, optional
-        Languages to include in the search (Max: 50).
-    exclude_languages : list of str, optional
-        Languages to exclude from the search (Max: 50).
+        Only include documents where we are 100% sure of the date.
     include_netlocs : list of str, optional
-        Only include results from these domains (Max: 50).
+        List of netlocs (domains) to include in the search. (Max 50)
     exclude_netlocs : list of str, optional
-        Exclude results from these domains (Max: 50).
+        List of netlocs (domains) to exclude in the search. (Max 50)
+    include_languages : list of str, optional
+        Languages to include in the search. (Max 50, ISO 639-1 language codes).
+    exclude_languages : list of str, optional
+        Language codes to exclude in the search (Max 50, ISO 639-1 language codes).
     include_companies : list of str, optional
-        Companies to include in the search (Max: 50).
+        Google KG IDs of public companies to require (Max 50).
     exclude_companies : list of str, optional
-        Companies to exclude from the search (Max: 50).
+        Google KG IDs of public companies to forbid (Max 50).
     include_docs : list of str, optional
-        Document IDs to include in the search (Max: 50).
+        URL hashes of docs to include (Max 50).
     exclude_docs : list of str, optional
-        Document IDs to exclude from the search (Max: 50).
+        URL hashes of docs to exclude (Max 50).
     Examples
     --------
@@ -82,6 +84,55 @@ class Search:
     What is Python?
     """
+    question: str | None = None
+    """The main search question or query."""
+    expansions: list[str] | None = None
+    """List of query expansions or related terms."""
+    sql_filter: str | None = None
+    """Additional SQL filter to apply to the search."""
+    n_results: int | None = None
+    """Number of results to return."""
+    n_probes: int | None = None
+    """Number of probe queries to use."""
+    n_contextify: int | None = None
+    """Number of context documents to retrieve."""
+    algorithm: str | None = None
+    """Search algorithm to use."""
+    min_similarity: float | None = None
+    """Results must have at least this similarity score."""
+    must_include: list[str] | None = None
+    """Only results mentioning these strings will be included."""
+    must_exclude: list[str] | None = None
+    """Any result mentioning these strings will be excluded."""
+    autogenerate_expansions: bool = False
+    """Do you want to generate expansions automatically using a LLM?"""
+    publish_start: str | None = None
+    """Start date for when the document was published."""
+    publish_end: str | None = None
+    """End date for when the document was published."""
+    visited_start: str | None = None
+    """Start date for when the document was visited by NOSIBLE."""
+    visited_end: str | None = None
+    """End date for when the document was visited by NOSIBLE."""
+    certain: bool | None = None
+    """Only include documents where we are 100% sure of the date."""
+    include_netlocs: list[str] | None = None
+    """List of netlocs (domains) to include in the search (Max 50)."""
+    exclude_netlocs: list[str] | None = None
+    """List of netlocs (domains) to exclude in the search (Max 50)."""
+    include_languages: list[str] | None = None
+    """Languages to include in the search. (Max 50)"""
+    exclude_languages: list[str] | None = None
+    """Language codes to exclude in the search (Max 50)"""
+    include_companies: list[str] | None = None
+    """Google KG IDs of public companies to require (Max 50)."""
+    exclude_companies: list[str] | None = None
+    """Google KG IDs of public companies to forbid (Max 50)."""
+    include_docs: list[str] | None = None
+    """URL hashes of docs to include (Max 50)."""
+    exclude_docs: list[str] | None = None
+    """URL hashes of docs to exclude (Max 50)."""
     _FIELDS = [
         "question",
         "expansions",
@@ -90,7 +141,9 @@ class Search:
         "n_probes",
         "n_contextify",
         "algorithm",
-        "output_type",
+        "min_similarity",
+        "must_include",
+        "must_exclude",
         "autogenerate_expansions",
         "publish_start",
         "publish_end",
@@ -107,67 +160,17 @@ class Search:
         "exclude_docs",
     ]
-    def __init__(
-        self,
-        question: str = None,
-        expansions: list[str] = None,
-        sql_filter: str = None,
-        n_results: int = None,
-        n_probes: int = None,
-        n_contextify: int = None,
-        algorithm: str = None,
-        output_type: str = None,
-        autogenerate_expansions: bool = False,
-        publish_start: str = None,
-        publish_end: str = None,
-        include_netlocs: list[str] = None,
-        exclude_netlocs: list[str] = None,
-        visited_start: str = None,
-        visited_end: str = None,
-        certain: bool = None,
-        include_languages: list[str] = None,
-        exclude_languages: list[str] = None,
-        include_companies: list[str] = None,
-        exclude_companies: list[str] = None,
-        include_docs: list[str] = None,
-        exclude_docs: list[str] = None,
-    ) -> None:
-        self.question = question
-        self.expansions = expansions
-        self.sql_filter = sql_filter
-        self.n_results = n_results
-        self.n_probes = n_probes
-        self.n_contextify = n_contextify
-        self.algorithm = algorithm
-        self.output_type = output_type
-        self.autogenerate_expansions = autogenerate_expansions
-        self.publish_start = publish_start
-        self.publish_end = publish_end
-        self.include_netlocs = include_netlocs
-        self.exclude_netlocs = exclude_netlocs
-        self.visited_start = visited_start
-        self.visited_end = visited_end
-        self.certain = certain
-        self.include_languages = include_languages
-        self.exclude_languages = exclude_languages
-        self.include_companies = include_companies
-        self.exclude_companies = exclude_companies
-        self.include_docs = include_docs
-        self.exclude_docs = exclude_docs
     def __str__(self) -> str:
         """
         Return a readable string representation of the search parameters.
         Only non-None fields are shown, each on its own line for clarity.
+        Returns
+        -------
+        str
+            A string representation of the Search instance, showing only the
         """
-        attrs = []
-        for attr in self._FIELDS:
-            value = getattr(self, attr)
-            if value is not None:
-                attrs.append(f"    {attr} = {value!r}")
-        if not attrs:
-            return "Search()"
-        return "Search(\n" + ",\n".join(attrs) + "\n)"
+        return print_dict(self.to_dict())
     def __add__(self, other: Search) -> SearchSet:
         """
@@ -222,7 +225,7 @@ class Search:
         >>> search.to_dict()["question"]
         'What is Python?'
         """
-        return {field: getattr(self, field) for field in self._FIELDS}
+        return asdict(self, dict_factory=dict)
     @classmethod
     def from_dict(cls, data: dict) -> Search:
@@ -267,10 +270,6 @@ class Search:
         Raises
         ------
-        IOError
-            If the file cannot be written.
-        TypeError
-            If serialization of the search parameters fails.
         Examples
         --------
@@ -304,10 +303,6 @@ class Search:
         Raises
         ------
-        IOError
-            If the file cannot be read.
-        json.JSONDecodeError
-            If the file content is not valid JSON.
         Examples
         --------

nosible/classes/search_set.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from collections.abc import Iterator
+from dataclasses import dataclass, field
 from nosible.classes.search import Search
 from nosible.utils.json_tools import json_dumps, json_loads
+@dataclass()
 class SearchSet(Iterator[Search]):
     """
     Manages an iterable collection of Search objects.
@@ -32,9 +34,10 @@ class SearchSet(Iterator[Search]):
     What is AI?
     """
-    def __init__(self, searches: list[Search] = None) -> None:
-        self.searches = searches or []
-        self._index = 0
+    searches: list[Search] = field(default_factory=list)
+    """ A list of Search objects in the collection."""
+    _index: int = field(default=0, init=False, repr=False, compare=False)
+    """ Internal index for iteration over searches."""
     def __iter__(self) -> "SearchSet":
         """
@@ -199,7 +202,7 @@ class SearchSet(Iterator[Search]):
         """
         del self.searches[index]
-    def to_list(self) -> list[dict]:
+    def to_dicts(self) -> list[dict]:
         """
         Convert all Search objects in the collection to a list of dictionaries.
@@ -219,7 +222,7 @@ class SearchSet(Iterator[Search]):
         >>> s1 = Search(question="What is Python?", n_results=3)
         >>> s2 = Search(question="What is PEP8?", n_results=2)
         >>> searches = SearchSet([s1, s2])
-        >>> searches.to_list()[1]["question"]
+        >>> searches.to_dicts()[1]["question"]
         'What is PEP8?'
         """
         return [s.to_dict() for s in self.searches]
@@ -242,6 +245,10 @@ class SearchSet(Iterator[Search]):
         str
             A JSON string representation of the SearchSet collection if no path is provided.
+        Raises
+        -------
+        RuntimeError
+            If there is an error during serialization or file writing.
         Examples
         --------
@@ -251,14 +258,22 @@ class SearchSet(Iterator[Search]):
         >>> json_str = searches.to_json()
         >>> isinstance(json_str, str)
         True
-        >>> searches.to_json("searches.json")  # The file 'searches.json' will contain both search queries in JSON format.
+        >>> searches.to_json(
+        ...     "searches.json"
+        ... )  # The file 'searches.json' will contain both search queries in JSON format.
         """
-        data = json_dumps(self.to_list())
-        if path:
-            with open(path, "w") as f:
-                f.write(data)
-            return None
-        return data
+        try:
+            json_bytes = json_dumps(self.to_dicts())
+            if path:
+                try:
+                    with open(path, "w") as f:
+                        f.write(json_bytes)
+                    return None
+                except Exception as e:
+                    raise RuntimeError(f"Failed to write JSON to '{path}': {e}") from e
+            return json_bytes
+        except Exception as e:
+            raise RuntimeError(f"Failed to serialize results to JSON: {e}") from e
     @classmethod
     def from_json(cls, path: str) -> "SearchSet":

nosible/classes/snippet.py CHANGED Viewed

@@ -1,6 +1,9 @@
-from nosible.utils.json_tools import json_dumps
+from dataclasses import asdict, dataclass, field
+from nosible.utils.json_tools import json_dumps, print_dict
+@dataclass(init=True, repr=True, eq=True, frozen=True)
 class Snippet:
     """
     A class representing a snippet of text, typically extracted from a web page.
@@ -25,6 +28,11 @@ class Snippet:
         Hash of the URL from which the snippet was extracted.
     words : str or None
         The words in the snippet.
+    links : list or None
+        List of links associated with the snippet.
+    companies : list or None
+        List of companies mentioned in the snippet.
     Examples
     --------
@@ -34,67 +42,28 @@ class Snippet:
     """
-    def __init__(
-        self,
-        *,
-        companies: list = None,
-        content: str = None,
-        images: list = None,
-        language: str = None,
-        next_snippet_hash: str = None,
-        prev_snippet_hash: str = None,
-        snippet_hash: str = None,
-        statistics: dict = None,
-        url_hash: str = None,
-        words: str = None,
-    ):
-        """
-        Initialize a Snippet instance.
-        Parameters
-        ----------
-        companies : list, optional
-            A list of companies mentioned in the snippet, if applicable. (GKIDS)
-        content : str
-            The text content of the snippet.
-        images : list, optional
-            List of image URLs associated with the snippet.
-        language : str, optional
-            The language of the snippet.
-        snippet_hash : str, optional
-            A unique hash for the snippet.
-        statistics : dict, optional
-            Statistical information about the snippet (e.g., word count).
-        words : str, optional
-            The words in the snippet.
-        Examples
-        --------
-        >>> snippet = Snippet(content="Example snippet", language="en")
-        >>> print(snippet.content)
-        Example snippet
-        """
-        self.companies = companies or []
-        self.content = content
-        self.images = images
-        self.language = language
-        self.snippet_hash = snippet_hash
-        self.statistics = statistics
-        self.words = words
-        self.url_hash = url_hash
-        self.next_snippet_hash = next_snippet_hash
-        self.prev_snippet_hash = prev_snippet_hash
-    def __repr__(self):
-        """
-        Returns a string representation of the Snippet object.
-        Returns
-        -------
-        str
-            A string representation of the Snippet.
-        """
-        return f"Snippet(content={self.content[:30]}, language={self.language}, snippet_hash={self.snippet_hash})"
+    content: str = field(default=None, repr=True, compare=True)
+    """The text content of the snippet."""
+    images: list = field(default=None, repr=True, compare=False)
+    """List of image URLs associated with the snippet."""
+    language: str = field(default=None, repr=True, compare=False)
+    """The language of the snippet."""
+    next_snippet_hash: str = field(default=None, repr=True, compare=False)
+    """Hash of the next snippet in sequence."""
+    prev_snippet_hash: str = field(default=None, repr=True, compare=False)
+    """Hash of the previous snippet in sequence."""
+    snippet_hash: str = field(default=None, repr=True, compare=True)
+    """A unique hash for the snippet."""
+    statistics: dict = field(default=None, repr=False, compare=False)
+    """Statistical information about the snippet."""
+    url_hash: str = field(default=None, repr=True, compare=False)
+    """Hash of the URL from which the snippet was extracted."""
+    words: str = field(default=None, repr=False, compare=False)
+    """The words in the snippet."""
+    links: list = field(default=None, repr=False, compare=False)
+    """List of links associated with the snippet."""
+    companies: list = field(default=None, repr=False, compare=False)
+    """List of companies mentioned in the snippet."""
     def __str__(self):
         """
@@ -105,7 +74,7 @@ class Snippet:
         str
             A string representation of the Snippet.
         """
-        return f"Snippet: {self.content}"
+        return print_dict(self.to_dict())
     def __getitem__(self, key: str):
         """
@@ -146,17 +115,31 @@ class Snippet:
         >>> isinstance(snippet_dict, dict)
         True
         """
-        return {
-            "content": self.content,
-            "images": self.images,
-            "language": self.language,
-            "snippet_hash": self.snippet_hash,
-            "statistics": self.statistics,
-            "words": self.words,
-            "url_hash": self.url_hash,
-            "next_snippet_hash": self.next_snippet_hash,
-            "prev_snippet_hash": self.prev_snippet_hash,
-        }
+        return asdict(self, dict_factory=dict)
+    @classmethod
+    def from_dict(cls, data: dict) -> "Snippet":
+        """
+        Create a Snippet instance from a dictionary.
+        Parameters
+        ----------
+        data : dict
+            Dictionary containing snippet data.
+        Returns
+        -------
+        Snippet
+            An instance of Snippet populated with the provided data.
+        Examples
+        --------
+        >>> snippet_data = {"content": "Example snippet", "snippet_hash": "hash1"}
+        >>> snippet = Snippet.from_dict(snippet_data)
+        >>> isinstance(snippet, Snippet)
+        True
+        """
+        return cls(**data)
     def to_json(self) -> str:
         """

nosible 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl

nosible 0.1.8py3-none-any.whl → 0.2.1py3-none-any.whl