PyPI - nosible - Versions diffs - 0.1.9__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

nosible 0.1.9py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

nosible/classes/result.py +21 -8
nosible/classes/result_set.py +46 -26
nosible/classes/search.py +16 -0
nosible/nosible_client.py +346 -38
nosible/utils/json_tools.py +8 -7
{nosible-0.1.9.dist-info → nosible-0.2.2.dist-info}/METADATA +97 -17
nosible-0.2.2.dist-info/RECORD +16 -0
nosible/utils/question_builder.py +0 -131
nosible-0.1.9.dist-info/RECORD +0 -17
{nosible-0.1.9.dist-info → nosible-0.2.2.dist-info}/WHEEL +0 -0
{nosible-0.1.9.dist-info → nosible-0.2.2.dist-info}/licenses/LICENSE +0 -0
{nosible-0.1.9.dist-info → nosible-0.2.2.dist-info}/top_level.txt +0 -0

nosible/classes/result.py CHANGED Viewed

@@ -3,9 +3,8 @@ from __future__ import annotations
 from dataclasses import asdict, dataclass
 from typing import TYPE_CHECKING
-from openai import OpenAI
 from nosible.classes.web_page import WebPageData
+from nosible.utils.json_tools import print_dict
 if TYPE_CHECKING:
     from nosible.classes.result_set import ResultSet
@@ -102,11 +101,21 @@ class Result:
           0.99 | Example Domain
         >>> result = Result(title=None, similarity=None)
         >>> print(str(result))
-           N/A | No Title
+        {
+            "url": null,
+            "title": null,
+            "description": null,
+            "netloc": null,
+            "published": null,
+            "visited": null,
+            "author": null,
+            "content": null,
+            "language": null,
+            "similarity": null,
+            "url_hash": null
+        }
         """
-        similarity = f"{self.similarity:.2f}" if self.similarity is not None else "N/A"
-        title = self.title or "No Title"
-        return f"{similarity:>6} | {title}"
+        return print_dict(self.to_dict())
     def __getitem__(self, key: str) -> str | float | bool | None:
         """
@@ -295,12 +304,12 @@ class Result:
             The response must be a float in [-1.0, 1.0]. No other text must be returned.
         """
+        from openai import OpenAI
         llm_client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=client.llm_api_key)
         # Call the chat completions endpoint.
         resp = llm_client.chat.completions.create(
-            model="openai/gpt-4o", messages=[{"role": "user", "content": prompt.strip()}], temperature=0.7
+            model=client.sentiment_model, messages=[{"role": "user", "content": prompt.strip()}], temperature=0.7
         )
         raw = resp.choices[0].message.content
@@ -335,6 +344,8 @@ class Result:
         exclude_languages: list = None,
         include_companies: list = None,
         exclude_companies: list = None,
+        include_docs: list = None,
+        exclude_docs: list = None,
     ) -> ResultSet:
         """
         Find similar search results based on the content or metadata of this Result.
@@ -429,6 +440,8 @@ class Result:
                 exclude_languages=exclude_languages,
                 include_companies=include_companies,
                 exclude_companies=exclude_companies,
+                include_docs=include_docs,
+                exclude_docs=exclude_docs,
             )
             return client.search(search=s)
         except Exception as e:

nosible/classes/result_set.py CHANGED Viewed

@@ -2,15 +2,15 @@ from __future__ import annotations
 from collections.abc import Iterator
 from dataclasses import dataclass, field
-import duckdb
-import pandas as pd
-import polars as pl
-from tantivy import Document, Index, SchemaBuilder
+from typing import TYPE_CHECKING
 from nosible.classes.result import Result
 from nosible.utils.json_tools import json_dumps, json_loads
+if TYPE_CHECKING:
+    import pandas as pd
+    import polars as pl
 @dataclass(frozen=True)
 class ResultSet(Iterator[Result]):
@@ -182,29 +182,34 @@ class ResultSet(Iterator[Result]):
         # Setup if required
         return self
-    def __getitem__(self, key: int) -> Result:
+    def __getitem__(self, key: int | slice) -> Result | ResultSet:
         """
-        Get a Result by index.
+        Get a Result by index or a list of Results by slice.
         Parameters
         ----------
-        key : int
-            Index of the result to retrieve.
+        key : int or slice
+            Index or slice of the result(s) to retrieve.
         Returns
         -------
-        Result
-            The Result at the specified index.
+        Result or ResultSet
+            A single Result if `key` is an integer, or a ResultSet containing the sliced results if `key` is a slice.
         Raises
         ------
         IndexError
             If index is out of range.
+        TypeError
+            If key is not an integer or slice.
         """
-        if 0 <= key < len(self.results):
-            return self.results[key]
-        raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
-        raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
+        if isinstance(key, int):
+            if 0 <= key < len(self.results):
+                return self.results[key]
+            raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
+        if isinstance(key, slice):
+            return ResultSet(self.results[key])
+        raise TypeError("ResultSet indices must be integers or slices.")
     def __add__(self, other: ResultSet | Result) -> ResultSet:
         """
@@ -285,12 +290,13 @@ class ResultSet(Iterator[Result]):
     def find_in_search_results(self, query: str, top_k: int = 10) -> ResultSet:
         """
-        Perform an in-memory search over a ResultSet collection using Tantivy.
+        This allows you to search within the results of a search using BM25 scoring by
+        performing an in-memory search over a ResultSet collection using Tantivy.
         Parameters
         ----------
         query : str
-            The search string to rank within these results.
+            The search string you want to find within these results.
         top_k : int
             Number of top results to return.
@@ -316,6 +322,8 @@ class ResultSet(Iterator[Result]):
         Document returned
         Document returned
         """
+        from tantivy import Document, Index, SchemaBuilder
         # Build the Tantivy schema
         schema_builder = SchemaBuilder()
         # Int for doc retrieval.
@@ -439,6 +447,9 @@ class ResultSet(Iterator[Result]):
         Traceback (most recent call last):
         ValueError: Cannot analyze by 'foobar' - not a valid field.
         """
+        import pandas as pd
+        import polars as pl
         # Convert to Polars DataFrame
         df: pl.DataFrame = self.to_polars()
@@ -467,7 +478,7 @@ class ResultSet(Iterator[Result]):
             # Extract year-month
             df = df.with_columns(pl.col(by).dt.strftime("%Y-%m").alias("year_month"))
             # Count per month
-            vc = df.group_by("year_month").agg(pl.count().alias("count")).sort("year_month")
+            vc = df.group_by("year_month").agg(pl.len().alias("count")).sort("year_month")
             rows = vc.rows()
             if not rows:
                 return {}
@@ -571,6 +582,10 @@ class ResultSet(Iterator[Result]):
         >>> "url" in df.columns
         True
         """
+        # Lazy import for runtime, but allow static type checking
+        import polars as pl
         return pl.DataFrame(self.to_dicts())
     def to_pandas(self) -> pd.DataFrame:
@@ -911,7 +926,7 @@ class ResultSet(Iterator[Result]):
             import duckdb
             # Convert to Polars DataFrame and then to Arrow Table
-            df = self.to_polars()
+            df = self.to_polars()  # noqa: F841
             # Connect to DuckDB and write the Arrow Table to a table
             con = duckdb.connect(out)
             # Write the DataFrame to the specified table name, replacing if exists
@@ -964,6 +979,8 @@ class ResultSet(Iterator[Result]):
         >>> results[0].title
         'Example Domain'
         """
+        import polars as pl
         try:
             df = pl.read_csv(file_path)
         except Exception as e:
@@ -1124,6 +1141,8 @@ class ResultSet(Iterator[Result]):
         >>> print(len(df))
         1
         """
+        import polars as pl
         pl_df = pl.from_pandas(df)
         return cls.from_polars(pl_df)
@@ -1239,6 +1258,8 @@ class ResultSet(Iterator[Result]):
         >>> results[0].title
         'Example Domain'
         """
+        import polars as pl
         try:
             df = pl.read_parquet(file_path)
         except Exception as e:
@@ -1288,6 +1309,8 @@ class ResultSet(Iterator[Result]):
         >>> results[0].title
         'Example Domain'
         """
+        import polars as pl
         try:
             df = pl.read_ipc(file_path)
         except Exception as e:
@@ -1340,7 +1363,11 @@ class ResultSet(Iterator[Result]):
         >>> loaded[0].title
         'Example Domain'
         """
+        import polars as pl
         try:
+            import duckdb
             con = duckdb.connect(file_path, read_only=True)
         except Exception as e:
             raise RuntimeError(f"Failed to connect to DuckDB file '{file_path}': {e}") from e
@@ -1492,10 +1519,3 @@ class ResultSet(Iterator[Result]):
         """
         # TODO: cleanup handles, sessions, etc.
         pass
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod(optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE)
-    print("All tests passed!")

nosible/classes/search.py CHANGED Viewed

@@ -33,6 +33,12 @@ class Search:
         Number of context documents to retrieve.
     algorithm : str, optional
         Search algorithm to use.
+    min_similarity : float
+        Results must have at least this similarity score.
+    must_include: list of str
+        Only results mentioning these strings will be included.
+    must_exclude : list of str
+        Any result mentioning these strings will be excluded.
     autogenerate_expansions : bool, default=False
         Do you want to generate expansions automatically using a LLM?
     publish_start : str, optional
@@ -65,6 +71,7 @@ class Search:
     Examples
     --------
     Create a search with specific parameters:
     >>> search = Search(
     ...     question="What is Python?",
     ...     n_results=5,
@@ -91,6 +98,12 @@ class Search:
     """Number of context documents to retrieve."""
     algorithm: str | None = None
     """Search algorithm to use."""
+    min_similarity: float | None = None
+    """Results must have at least this similarity score."""
+    must_include: list[str] | None = None
+    """Only results mentioning these strings will be included."""
+    must_exclude: list[str] | None = None
+    """Any result mentioning these strings will be excluded."""
     autogenerate_expansions: bool = False
     """Do you want to generate expansions automatically using a LLM?"""
     publish_start: str | None = None
@@ -128,6 +141,9 @@ class Search:
         "n_probes",
         "n_contextify",
         "algorithm",
+        "min_similarity",
+        "must_include",
+        "must_exclude",
         "autogenerate_expansions",
         "publish_start",
         "publish_end",

nosible 0.1.9__py3-none-any.whl → 0.2.2__py3-none-any.whl

nosible 0.1.9py3-none-any.whl → 0.2.2py3-none-any.whl