nosible 0.3.6__tar.gz → 0.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {nosible-0.3.6/src/nosible.egg-info → nosible-0.3.9}/PKG-INFO +1 -1
  2. {nosible-0.3.6 → nosible-0.3.9}/pyproject.toml +2 -2
  3. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/result.py +17 -17
  4. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/result_set.py +5 -0
  5. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/snippet.py +0 -7
  6. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/web_page.py +0 -2
  7. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/nosible_client.py +5 -6
  8. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/utils/rate_limiter.py +9 -6
  9. {nosible-0.3.6 → nosible-0.3.9/src/nosible.egg-info}/PKG-INFO +1 -1
  10. {nosible-0.3.6 → nosible-0.3.9}/tests/test_01_nosible.py +2 -8
  11. {nosible-0.3.6 → nosible-0.3.9}/tests/test_02_results.py +30 -3
  12. {nosible-0.3.6 → nosible-0.3.9}/tests/test_04_snippets.py +1 -2
  13. {nosible-0.3.6 → nosible-0.3.9}/LICENSE +0 -0
  14. {nosible-0.3.6 → nosible-0.3.9}/README.md +0 -0
  15. {nosible-0.3.6 → nosible-0.3.9}/setup.cfg +0 -0
  16. {nosible-0.3.6 → nosible-0.3.9}/setup.py +0 -0
  17. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/__init__.py +0 -0
  18. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/search.py +0 -0
  19. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/search_set.py +0 -0
  20. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/classes/snippet_set.py +0 -0
  21. {nosible-0.3.6 → nosible-0.3.9}/src/nosible/utils/json_tools.py +0 -0
  22. {nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/SOURCES.txt +0 -0
  23. {nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/dependency_links.txt +0 -0
  24. {nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/requires.txt +0 -0
  25. {nosible-0.3.6 → nosible-0.3.9}/src/nosible.egg-info/top_level.txt +0 -0
  26. {nosible-0.3.6 → nosible-0.3.9}/tests/test_03_search_searchset.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nosible
3
- Version: 0.3.6
3
+ Version: 0.3.9
4
4
  Summary: Python client for the NOSIBLE Search API
5
5
  Home-page: https://github.com/NosibleAI/nosible-py
6
6
  Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nosible"
3
- version = "0.3.6"
3
+ version = "0.3.9"
4
4
  description = "Python client for the NOSIBLE Search API"
5
5
  readme = { file = "README.md", content-type = "text/markdown" }
6
6
  requires-python = ">=3.9"
@@ -61,5 +61,5 @@ dev-dependencies = [
61
61
  "pytest-doctestplus",
62
62
  "pytest-xdist",
63
63
  "urllib3==1.26.15",
64
- "hishel",
64
+ "hishel[async]",
65
65
  ]
@@ -36,6 +36,8 @@ class Result:
36
36
  The author of the content.
37
37
  content : str, optional
38
38
  The main content or body of the search result.
39
+ best_chunk : str, optional
40
+ The best snippet of text that matches your question from the search result.
39
41
  language : str, optional
40
42
  The language code of the content (e.g., 'en' for English).
41
43
  similarity : float, optional
@@ -105,6 +107,8 @@ class Result:
105
107
  """The author of the content."""
106
108
  content: str | None = None
107
109
  """The main content or body of the search result."""
110
+ best_chunk: str | None = None
111
+ """The best snippet of text that matches your question from the search result."""
108
112
  language: str | None = None
109
113
  """The language code of the content (e.g., 'en' for English)."""
110
114
  similarity: float | None = None
@@ -150,23 +154,14 @@ class Result:
150
154
  >>> result = Result(title="Example Domain", similarity=0.9876)
151
155
  >>> print(str(result))
152
156
  0.99 | Example Domain
153
- >>> result = Result(title=None, similarity=None)
154
- >>> print(str(result))
155
- {
156
- "url": null,
157
- "title": null,
158
- "description": null,
159
- "netloc": null,
160
- "published": null,
161
- "visited": null,
162
- "author": null,
163
- "content": null,
164
- "language": null,
165
- "similarity": null,
166
- "url_hash": null
167
- }
168
157
  """
169
- return print_dict(self.to_dict())
158
+ # Get the full dictionary
159
+ data = self.to_dict()
160
+
161
+ # Create a new dictionary excluding keys where the value is None
162
+ clean_data = {k: v for k, v in data.items() if v is not None}
163
+
164
+ return print_dict(clean_data)
170
165
 
171
166
  def __getitem__(self, key: str) -> str | float | bool | None:
172
167
  """
@@ -519,6 +514,11 @@ class Result:
519
514
  try:
520
515
  from nosible import Search
521
516
 
517
+ # Exclude the original doc from the new search.
518
+ exclude_docs_list = list(exclude_docs) if exclude_docs else []
519
+ if self.url_hash and self.url_hash not in exclude_docs_list:
520
+ exclude_docs_list.append(self.url_hash)
521
+
522
522
  s = Search(
523
523
  question=self.title,
524
524
  expansions=[],
@@ -537,7 +537,7 @@ class Result:
537
537
  include_companies=include_companies,
538
538
  exclude_companies=exclude_companies,
539
539
  include_docs=include_docs,
540
- exclude_docs=exclude_docs,
540
+ exclude_docs=exclude_docs_list,
541
541
  brand_safety=brand_safety,
542
542
  language=language,
543
543
  continent=continent,
@@ -54,6 +54,7 @@ class ResultSet(Iterator[Result]):
54
54
  "visited",
55
55
  "author",
56
56
  "content",
57
+ "best_chunk",
57
58
  "language",
58
59
  "similarity",
59
60
  "url_hash",
@@ -1004,6 +1005,7 @@ class ResultSet(Iterator[Result]):
1004
1005
  visited=row.get("visited"),
1005
1006
  author=row.get("author"),
1006
1007
  content=row.get("content"),
1008
+ best_chunk=row.get("best_chunk"),
1007
1009
  language=row.get("language"),
1008
1010
  similarity=row.get("similarity"),
1009
1011
  url_hash=row.get("url_hash"),
@@ -1113,6 +1115,7 @@ class ResultSet(Iterator[Result]):
1113
1115
  visited=row.get("visited"),
1114
1116
  author=row.get("author"),
1115
1117
  content=row.get("content"),
1118
+ best_chunk=row.get("best_chunk"),
1116
1119
  language=row.get("language"),
1117
1120
  similarity=row.get("semantics", {}).get("similarity", row.get("similarity")),
1118
1121
  url_hash=row.get("url_hash"),
@@ -1212,6 +1215,7 @@ class ResultSet(Iterator[Result]):
1212
1215
  visited=data.get("visited"),
1213
1216
  author=data.get("author"),
1214
1217
  content=data.get("content"),
1218
+ best_chunk=data.get("best_chunk"),
1215
1219
  language=data.get("language"),
1216
1220
  similarity=data.get("similarity"),
1217
1221
  url_hash=data.get("url_hash"),
@@ -1449,6 +1453,7 @@ class ResultSet(Iterator[Result]):
1449
1453
  visited=d.get("visited"),
1450
1454
  author=d.get("author"),
1451
1455
  content=d.get("content"),
1456
+ best_chunk=d.get("best_chunk"),
1452
1457
  language=d.get("language"),
1453
1458
  similarity=d.get("similarity", d.get("semantics", {}).get("similarity")),
1454
1459
  url_hash=d.get("url_hash"),
@@ -30,18 +30,13 @@ class Snippet:
30
30
  The words in the snippet.
31
31
  links : list or None
32
32
  List of links associated with the snippet.
33
- companies : list or None
34
- List of companies mentioned in the snippet.
35
-
36
33
 
37
34
  Examples
38
35
  --------
39
36
  >>> snippet = Snippet(content="Example snippet", language="en")
40
37
  >>> print(snippet.content)
41
38
  Example snippet
42
-
43
39
  """
44
-
45
40
  content: str = field(default=None, repr=True, compare=True)
46
41
  """The text content of the snippet."""
47
42
  images: list = field(default=None, repr=True, compare=False)
@@ -62,8 +57,6 @@ class Snippet:
62
57
  """The words in the snippet."""
63
58
  links: list = field(default=None, repr=False, compare=False)
64
59
  """List of links associated with the snippet."""
65
- companies: list = field(default=None, repr=False, compare=False)
66
- """List of companies mentioned in the snippet."""
67
60
 
68
61
  def __str__(self):
69
62
  """
@@ -40,8 +40,6 @@ class WebPageData:
40
40
  {'description': 'Example'}
41
41
  """
42
42
 
43
- companies: list = None
44
- """A list of companies mentioned in the webpage, if applicable. (GKIDS)"""
45
43
  full_text: str = None
46
44
  """The full text content of the webpage."""
47
45
  languages: dict = None
@@ -1522,7 +1522,6 @@ class Nosible:
1522
1522
 
1523
1523
  response_data = data["response"]
1524
1524
  return WebPageData(
1525
- companies=response_data.get("companies"),
1526
1525
  full_text=response_data.get("full_text"),
1527
1526
  languages=response_data.get("languages"),
1528
1527
  metadata=response_data.get("metadata"),
@@ -2039,13 +2038,13 @@ class Nosible:
2039
2038
 
2040
2039
  if include_docs:
2041
2040
  # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
2042
- doc_hashes = ", ".join(f"'{doc}'" for doc in include_docs)
2043
- clauses.append(f"doc_hash IN ({doc_hashes})")
2041
+ docs = ", ".join(f"'{doc}'" for doc in include_docs)
2042
+ clauses.append(f"doc IN ({docs})")
2044
2043
 
2045
2044
  if exclude_docs:
2046
2045
  # Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
2047
- doc_hashes = ", ".join(f"'{doc}'" for doc in exclude_docs)
2048
- clauses.append(f"doc_hash NOT IN ({doc_hashes})")
2046
+ docs = ", ".join(f"'{doc}'" for doc in exclude_docs)
2047
+ clauses.append(f"doc NOT IN ({docs})")
2049
2048
 
2050
2049
  # Join everything
2051
2050
  if clauses:
@@ -2092,7 +2091,7 @@ class Nosible:
2092
2091
  "netloc",
2093
2092
  "language",
2094
2093
  "companies"
2095
- "doc_hash",
2094
+ "doc",
2096
2095
  ]
2097
2096
  import polars as pl # Lazy import
2098
2097
 
@@ -4,7 +4,7 @@ import time
4
4
 
5
5
  from pyrate_limiter import Limiter, Rate
6
6
  from pyrate_limiter.buckets.in_memory_bucket import InMemoryBucket
7
- from pyrate_limiter.exceptions import BucketFullException
7
+ from pyrate_limiter.exceptions import BucketFullException, LimiterDelayException
8
8
 
9
9
  log = logging.getLogger(__name__)
10
10
 
@@ -133,7 +133,7 @@ class RateLimiter:
133
133
 
134
134
  # Build our bucket
135
135
  bucket = InMemoryBucket([Rate(max_calls, period_ms)])
136
- self._limiter = Limiter(bucket)
136
+ self._limiter = Limiter(bucket, max_delay=1000)
137
137
 
138
138
  def acquire(self) -> None:
139
139
  """
@@ -177,7 +177,7 @@ class RateLimiter:
177
177
  # Ensure at least a small sleep if rounding to zero
178
178
  time.sleep(wait_s)
179
179
 
180
- def try_acquire(self) -> bool:
180
+ def try_acquire(self, name: str = None) -> bool:
181
181
  """
182
182
  Attempt to acquire a slot without blocking.
183
183
 
@@ -196,8 +196,11 @@ class RateLimiter:
196
196
  >>> rl.try_acquire()
197
197
  False
198
198
  """
199
+ key = name if name else self._GLOBAL_KEY
200
+
199
201
  try:
200
- self._limiter.try_acquire(self._GLOBAL_KEY)
202
+ self._limiter.try_acquire(key)
201
203
  return True
202
- except BucketFullException:
203
- return False
204
+ except (BucketFullException, LimiterDelayException):
205
+ # Return False instead of crashing when the limit is hit
206
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nosible
3
- Version: 0.3.6
3
+ Version: 0.3.9
4
4
  Summary: Python client for the NOSIBLE Search API
5
5
  Home-page: https://github.com/NosibleAI/nosible-py
6
6
  Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
@@ -1,12 +1,9 @@
1
- import json
2
1
  import pytest
3
- import os
4
- import time
5
2
  import re
6
3
 
7
- import polars as pl
4
+ import pytest
8
5
 
9
- from nosible import Nosible, Result, ResultSet, Search, Snippet, SnippetSet
6
+ from nosible import Nosible, ResultSet, Search, SnippetSet
10
7
  from nosible.classes.search_set import SearchSet
11
8
  from nosible.classes.web_page import WebPageData
12
9
 
@@ -92,9 +89,6 @@ def test_validate_sql():
92
89
  assert not Nosible()._validate_sql(sql="SELECT * FROM missing_table")
93
90
 
94
91
 
95
- # —— Your additional tests —— #
96
-
97
-
98
92
  def test_search_minimal(search_data):
99
93
  # from your snippet: isinstance(search_data, ResultSet)
100
94
  assert isinstance(search_data, ResultSet)
@@ -1,5 +1,5 @@
1
1
  import pytest
2
- from polars.dependencies import pandas as pd
2
+ import pandas as pd
3
3
  from nosible import Result, ResultSet
4
4
 
5
5
 
@@ -84,8 +84,6 @@ def test_resultset_to_dict(search_data):
84
84
  assert "published" in res
85
85
  assert "similarity" in res
86
86
  assert res["url_hash"] == key
87
- # results_copy_from_dict = ResultSet.from_dict(results_dict)
88
- # assert results == results_copy_from_dict
89
87
 
90
88
 
91
89
  # to_dicts
@@ -149,3 +147,32 @@ def test_resultset_getitem(search_data):
149
147
  _ = search_data[len(search_data)] # Out of range index
150
148
  with pytest.raises(TypeError):
151
149
  _ = search_data["invalid"] # Invalid type for index
150
+
151
+
152
+ def test_similar_excludes_current_document():
153
+ """
154
+ Test that the similar method properly excludes the current document from search results.
155
+
156
+ This test creates a Nosible client, performs a fast search, takes the first result,
157
+ and verifies that calling similar() on that result excludes it from the returned results.
158
+ """
159
+ from nosible import Nosible
160
+
161
+ # Create a Nosible client (similar to test_01_nosible.py)
162
+ with Nosible(concurrency=1) as nos:
163
+ # Perform a search to get some results
164
+ search_results = nos.fast_search(question="Hedge funds seek to expand into private credit", n_results=10)
165
+
166
+ # Get the first result
167
+ first_result = search_results[0]
168
+
169
+ # Call similar() on the first result
170
+ similar_results = first_result.similar(client=nos, n_results=10)
171
+
172
+ # Verify that the first result is NOT in the similar results
173
+ # We check by comparing URL hashes
174
+ similar_hashes = [r.url_hash for r in similar_results if r.url_hash]
175
+ assert first_result.url_hash not in similar_hashes, f"Original result URL hash {first_result.url_hash} should not be in similar results"
176
+
177
+ # Also verify that similar results were actually returned (should be non-empty)
178
+ assert len(similar_results) >= 0, "Similar results should be returned (may be empty if no similar docs found)"
@@ -1,5 +1,4 @@
1
- from nosible import Snippet, SnippetSet, WebPageData
2
- import pytest
1
+ from nosible import Snippet, SnippetSet
3
2
 
4
3
 
5
4
  def test_snippet_initialization(snippets_data):
File without changes
File without changes
File without changes
File without changes
File without changes