nosible 0.3.5__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nosible-0.3.5/src/nosible.egg-info → nosible-0.3.9}/PKG-INFO +19 -1
- {nosible-0.3.5 → nosible-0.3.9}/README.md +18 -0
- {nosible-0.3.5 → nosible-0.3.9}/pyproject.toml +2 -2
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/result.py +17 -17
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/result_set.py +5 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/snippet.py +0 -7
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/web_page.py +0 -2
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/nosible_client.py +5 -6
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/utils/rate_limiter.py +9 -6
- {nosible-0.3.5 → nosible-0.3.9/src/nosible.egg-info}/PKG-INFO +19 -1
- {nosible-0.3.5 → nosible-0.3.9}/tests/test_01_nosible.py +2 -8
- {nosible-0.3.5 → nosible-0.3.9}/tests/test_02_results.py +30 -3
- {nosible-0.3.5 → nosible-0.3.9}/tests/test_04_snippets.py +1 -2
- {nosible-0.3.5 → nosible-0.3.9}/LICENSE +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/setup.cfg +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/setup.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/__init__.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/search.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/search_set.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/classes/snippet_set.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible/utils/json_tools.py +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible.egg-info/SOURCES.txt +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible.egg-info/dependency_links.txt +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible.egg-info/requires.txt +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/src/nosible.egg-info/top_level.txt +0 -0
- {nosible-0.3.5 → nosible-0.3.9}/tests/test_03_search_searchset.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nosible
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Python client for the NOSIBLE Search API
|
|
5
5
|
Home-page: https://github.com/NosibleAI/nosible-py
|
|
6
6
|
Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
|
|
@@ -146,6 +146,24 @@ with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
|
146
146
|
print(results)
|
|
147
147
|
```
|
|
148
148
|
|
|
149
|
+
### 🤖 Cybernaut 1
|
|
150
|
+
|
|
151
|
+
An AI agent with unrestricted access to everything in NOSIBLE including every shard, algorithm, selector,
|
|
152
|
+
reranker, and signal. It knows what these things are and can tune them on the fly to find better results.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from nosible import Nosible
|
|
156
|
+
|
|
157
|
+
with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
158
|
+
|
|
159
|
+
results = client.search(
|
|
160
|
+
# search() gives you access to Cybernaut 1
|
|
161
|
+
question="Find me interesting technical blogs about Monte Carlo Tree Search."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
print(results)
|
|
165
|
+
```
|
|
166
|
+
|
|
149
167
|
### 📄 Documentation
|
|
150
168
|
|
|
151
169
|
You can find the full NOSIBLE Search Client documentation
|
|
@@ -103,6 +103,24 @@ with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
|
103
103
|
print(results)
|
|
104
104
|
```
|
|
105
105
|
|
|
106
|
+
### 🤖 Cybernaut 1
|
|
107
|
+
|
|
108
|
+
An AI agent with unrestricted access to everything in NOSIBLE including every shard, algorithm, selector,
|
|
109
|
+
reranker, and signal. It knows what these things are and can tune them on the fly to find better results.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from nosible import Nosible
|
|
113
|
+
|
|
114
|
+
with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
115
|
+
|
|
116
|
+
results = client.search(
|
|
117
|
+
# search() gives you access to Cybernaut 1
|
|
118
|
+
question="Find me interesting technical blogs about Monte Carlo Tree Search."
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
print(results)
|
|
122
|
+
```
|
|
123
|
+
|
|
106
124
|
### 📄 Documentation
|
|
107
125
|
|
|
108
126
|
You can find the full NOSIBLE Search Client documentation
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nosible"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.9"
|
|
4
4
|
description = "Python client for the NOSIBLE Search API"
|
|
5
5
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
6
6
|
requires-python = ">=3.9"
|
|
@@ -61,5 +61,5 @@ dev-dependencies = [
|
|
|
61
61
|
"pytest-doctestplus",
|
|
62
62
|
"pytest-xdist",
|
|
63
63
|
"urllib3==1.26.15",
|
|
64
|
-
"hishel",
|
|
64
|
+
"hishel[async]",
|
|
65
65
|
]
|
|
@@ -36,6 +36,8 @@ class Result:
|
|
|
36
36
|
The author of the content.
|
|
37
37
|
content : str, optional
|
|
38
38
|
The main content or body of the search result.
|
|
39
|
+
best_chunk : str, optional
|
|
40
|
+
The best snippet of text that matches your question from the search result.
|
|
39
41
|
language : str, optional
|
|
40
42
|
The language code of the content (e.g., 'en' for English).
|
|
41
43
|
similarity : float, optional
|
|
@@ -105,6 +107,8 @@ class Result:
|
|
|
105
107
|
"""The author of the content."""
|
|
106
108
|
content: str | None = None
|
|
107
109
|
"""The main content or body of the search result."""
|
|
110
|
+
best_chunk: str | None = None
|
|
111
|
+
"""The best snippet of text that matches your question from the search result."""
|
|
108
112
|
language: str | None = None
|
|
109
113
|
"""The language code of the content (e.g., 'en' for English)."""
|
|
110
114
|
similarity: float | None = None
|
|
@@ -150,23 +154,14 @@ class Result:
|
|
|
150
154
|
>>> result = Result(title="Example Domain", similarity=0.9876)
|
|
151
155
|
>>> print(str(result))
|
|
152
156
|
0.99 | Example Domain
|
|
153
|
-
>>> result = Result(title=None, similarity=None)
|
|
154
|
-
>>> print(str(result))
|
|
155
|
-
{
|
|
156
|
-
"url": null,
|
|
157
|
-
"title": null,
|
|
158
|
-
"description": null,
|
|
159
|
-
"netloc": null,
|
|
160
|
-
"published": null,
|
|
161
|
-
"visited": null,
|
|
162
|
-
"author": null,
|
|
163
|
-
"content": null,
|
|
164
|
-
"language": null,
|
|
165
|
-
"similarity": null,
|
|
166
|
-
"url_hash": null
|
|
167
|
-
}
|
|
168
157
|
"""
|
|
169
|
-
|
|
158
|
+
# Get the full dictionary
|
|
159
|
+
data = self.to_dict()
|
|
160
|
+
|
|
161
|
+
# Create a new dictionary excluding keys where the value is None
|
|
162
|
+
clean_data = {k: v for k, v in data.items() if v is not None}
|
|
163
|
+
|
|
164
|
+
return print_dict(clean_data)
|
|
170
165
|
|
|
171
166
|
def __getitem__(self, key: str) -> str | float | bool | None:
|
|
172
167
|
"""
|
|
@@ -519,6 +514,11 @@ class Result:
|
|
|
519
514
|
try:
|
|
520
515
|
from nosible import Search
|
|
521
516
|
|
|
517
|
+
# Exclude the original doc from the new search.
|
|
518
|
+
exclude_docs_list = list(exclude_docs) if exclude_docs else []
|
|
519
|
+
if self.url_hash and self.url_hash not in exclude_docs_list:
|
|
520
|
+
exclude_docs_list.append(self.url_hash)
|
|
521
|
+
|
|
522
522
|
s = Search(
|
|
523
523
|
question=self.title,
|
|
524
524
|
expansions=[],
|
|
@@ -537,7 +537,7 @@ class Result:
|
|
|
537
537
|
include_companies=include_companies,
|
|
538
538
|
exclude_companies=exclude_companies,
|
|
539
539
|
include_docs=include_docs,
|
|
540
|
-
exclude_docs=
|
|
540
|
+
exclude_docs=exclude_docs_list,
|
|
541
541
|
brand_safety=brand_safety,
|
|
542
542
|
language=language,
|
|
543
543
|
continent=continent,
|
|
@@ -54,6 +54,7 @@ class ResultSet(Iterator[Result]):
|
|
|
54
54
|
"visited",
|
|
55
55
|
"author",
|
|
56
56
|
"content",
|
|
57
|
+
"best_chunk",
|
|
57
58
|
"language",
|
|
58
59
|
"similarity",
|
|
59
60
|
"url_hash",
|
|
@@ -1004,6 +1005,7 @@ class ResultSet(Iterator[Result]):
|
|
|
1004
1005
|
visited=row.get("visited"),
|
|
1005
1006
|
author=row.get("author"),
|
|
1006
1007
|
content=row.get("content"),
|
|
1008
|
+
best_chunk=row.get("best_chunk"),
|
|
1007
1009
|
language=row.get("language"),
|
|
1008
1010
|
similarity=row.get("similarity"),
|
|
1009
1011
|
url_hash=row.get("url_hash"),
|
|
@@ -1113,6 +1115,7 @@ class ResultSet(Iterator[Result]):
|
|
|
1113
1115
|
visited=row.get("visited"),
|
|
1114
1116
|
author=row.get("author"),
|
|
1115
1117
|
content=row.get("content"),
|
|
1118
|
+
best_chunk=row.get("best_chunk"),
|
|
1116
1119
|
language=row.get("language"),
|
|
1117
1120
|
similarity=row.get("semantics", {}).get("similarity", row.get("similarity")),
|
|
1118
1121
|
url_hash=row.get("url_hash"),
|
|
@@ -1212,6 +1215,7 @@ class ResultSet(Iterator[Result]):
|
|
|
1212
1215
|
visited=data.get("visited"),
|
|
1213
1216
|
author=data.get("author"),
|
|
1214
1217
|
content=data.get("content"),
|
|
1218
|
+
best_chunk=data.get("best_chunk"),
|
|
1215
1219
|
language=data.get("language"),
|
|
1216
1220
|
similarity=data.get("similarity"),
|
|
1217
1221
|
url_hash=data.get("url_hash"),
|
|
@@ -1449,6 +1453,7 @@ class ResultSet(Iterator[Result]):
|
|
|
1449
1453
|
visited=d.get("visited"),
|
|
1450
1454
|
author=d.get("author"),
|
|
1451
1455
|
content=d.get("content"),
|
|
1456
|
+
best_chunk=d.get("best_chunk"),
|
|
1452
1457
|
language=d.get("language"),
|
|
1453
1458
|
similarity=d.get("similarity", d.get("semantics", {}).get("similarity")),
|
|
1454
1459
|
url_hash=d.get("url_hash"),
|
|
@@ -30,18 +30,13 @@ class Snippet:
|
|
|
30
30
|
The words in the snippet.
|
|
31
31
|
links : list or None
|
|
32
32
|
List of links associated with the snippet.
|
|
33
|
-
companies : list or None
|
|
34
|
-
List of companies mentioned in the snippet.
|
|
35
|
-
|
|
36
33
|
|
|
37
34
|
Examples
|
|
38
35
|
--------
|
|
39
36
|
>>> snippet = Snippet(content="Example snippet", language="en")
|
|
40
37
|
>>> print(snippet.content)
|
|
41
38
|
Example snippet
|
|
42
|
-
|
|
43
39
|
"""
|
|
44
|
-
|
|
45
40
|
content: str = field(default=None, repr=True, compare=True)
|
|
46
41
|
"""The text content of the snippet."""
|
|
47
42
|
images: list = field(default=None, repr=True, compare=False)
|
|
@@ -62,8 +57,6 @@ class Snippet:
|
|
|
62
57
|
"""The words in the snippet."""
|
|
63
58
|
links: list = field(default=None, repr=False, compare=False)
|
|
64
59
|
"""List of links associated with the snippet."""
|
|
65
|
-
companies: list = field(default=None, repr=False, compare=False)
|
|
66
|
-
"""List of companies mentioned in the snippet."""
|
|
67
60
|
|
|
68
61
|
def __str__(self):
|
|
69
62
|
"""
|
|
@@ -40,8 +40,6 @@ class WebPageData:
|
|
|
40
40
|
{'description': 'Example'}
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
companies: list = None
|
|
44
|
-
"""A list of companies mentioned in the webpage, if applicable. (GKIDS)"""
|
|
45
43
|
full_text: str = None
|
|
46
44
|
"""The full text content of the webpage."""
|
|
47
45
|
languages: dict = None
|
|
@@ -1522,7 +1522,6 @@ class Nosible:
|
|
|
1522
1522
|
|
|
1523
1523
|
response_data = data["response"]
|
|
1524
1524
|
return WebPageData(
|
|
1525
|
-
companies=response_data.get("companies"),
|
|
1526
1525
|
full_text=response_data.get("full_text"),
|
|
1527
1526
|
languages=response_data.get("languages"),
|
|
1528
1527
|
metadata=response_data.get("metadata"),
|
|
@@ -2039,13 +2038,13 @@ class Nosible:
|
|
|
2039
2038
|
|
|
2040
2039
|
if include_docs:
|
|
2041
2040
|
# Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
|
|
2042
|
-
|
|
2043
|
-
clauses.append(f"
|
|
2041
|
+
docs = ", ".join(f"'{doc}'" for doc in include_docs)
|
|
2042
|
+
clauses.append(f"doc IN ({docs})")
|
|
2044
2043
|
|
|
2045
2044
|
if exclude_docs:
|
|
2046
2045
|
# Assume these are URL hashes, e.g. "ENNmqkF1mGNhVhvhmbUEs4U2"
|
|
2047
|
-
|
|
2048
|
-
clauses.append(f"
|
|
2046
|
+
docs = ", ".join(f"'{doc}'" for doc in exclude_docs)
|
|
2047
|
+
clauses.append(f"doc NOT IN ({docs})")
|
|
2049
2048
|
|
|
2050
2049
|
# Join everything
|
|
2051
2050
|
if clauses:
|
|
@@ -2092,7 +2091,7 @@ class Nosible:
|
|
|
2092
2091
|
"netloc",
|
|
2093
2092
|
"language",
|
|
2094
2093
|
"companies"
|
|
2095
|
-
"
|
|
2094
|
+
"doc",
|
|
2096
2095
|
]
|
|
2097
2096
|
import polars as pl # Lazy import
|
|
2098
2097
|
|
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
|
|
5
5
|
from pyrate_limiter import Limiter, Rate
|
|
6
6
|
from pyrate_limiter.buckets.in_memory_bucket import InMemoryBucket
|
|
7
|
-
from pyrate_limiter.exceptions import BucketFullException
|
|
7
|
+
from pyrate_limiter.exceptions import BucketFullException, LimiterDelayException
|
|
8
8
|
|
|
9
9
|
log = logging.getLogger(__name__)
|
|
10
10
|
|
|
@@ -133,7 +133,7 @@ class RateLimiter:
|
|
|
133
133
|
|
|
134
134
|
# Build our bucket
|
|
135
135
|
bucket = InMemoryBucket([Rate(max_calls, period_ms)])
|
|
136
|
-
self._limiter = Limiter(bucket)
|
|
136
|
+
self._limiter = Limiter(bucket, max_delay=1000)
|
|
137
137
|
|
|
138
138
|
def acquire(self) -> None:
|
|
139
139
|
"""
|
|
@@ -177,7 +177,7 @@ class RateLimiter:
|
|
|
177
177
|
# Ensure at least a small sleep if rounding to zero
|
|
178
178
|
time.sleep(wait_s)
|
|
179
179
|
|
|
180
|
-
def try_acquire(self) -> bool:
|
|
180
|
+
def try_acquire(self, name: str = None) -> bool:
|
|
181
181
|
"""
|
|
182
182
|
Attempt to acquire a slot without blocking.
|
|
183
183
|
|
|
@@ -196,8 +196,11 @@ class RateLimiter:
|
|
|
196
196
|
>>> rl.try_acquire()
|
|
197
197
|
False
|
|
198
198
|
"""
|
|
199
|
+
key = name if name else self._GLOBAL_KEY
|
|
200
|
+
|
|
199
201
|
try:
|
|
200
|
-
self._limiter.try_acquire(
|
|
202
|
+
self._limiter.try_acquire(key)
|
|
201
203
|
return True
|
|
202
|
-
except BucketFullException:
|
|
203
|
-
|
|
204
|
+
except (BucketFullException, LimiterDelayException):
|
|
205
|
+
# Return False instead of crashing when the limit is hit
|
|
206
|
+
return False
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nosible
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Python client for the NOSIBLE Search API
|
|
5
5
|
Home-page: https://github.com/NosibleAI/nosible-py
|
|
6
6
|
Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
|
|
@@ -146,6 +146,24 @@ with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
|
146
146
|
print(results)
|
|
147
147
|
```
|
|
148
148
|
|
|
149
|
+
### 🤖 Cybernaut 1
|
|
150
|
+
|
|
151
|
+
An AI agent with unrestricted access to everything in NOSIBLE including every shard, algorithm, selector,
|
|
152
|
+
reranker, and signal. It knows what these things are and can tune them on the fly to find better results.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from nosible import Nosible
|
|
156
|
+
|
|
157
|
+
with Nosible(nosible_api_key="YOUR API KEY") as client:
|
|
158
|
+
|
|
159
|
+
results = client.search(
|
|
160
|
+
# search() gives you access to Cybernaut 1
|
|
161
|
+
question="Find me interesting technical blogs about Monte Carlo Tree Search."
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
print(results)
|
|
165
|
+
```
|
|
166
|
+
|
|
149
167
|
### 📄 Documentation
|
|
150
168
|
|
|
151
169
|
You can find the full NOSIBLE Search Client documentation
|
|
@@ -1,12 +1,9 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import pytest
|
|
3
|
-
import os
|
|
4
|
-
import time
|
|
5
2
|
import re
|
|
6
3
|
|
|
7
|
-
import
|
|
4
|
+
import pytest
|
|
8
5
|
|
|
9
|
-
from nosible import Nosible,
|
|
6
|
+
from nosible import Nosible, ResultSet, Search, SnippetSet
|
|
10
7
|
from nosible.classes.search_set import SearchSet
|
|
11
8
|
from nosible.classes.web_page import WebPageData
|
|
12
9
|
|
|
@@ -92,9 +89,6 @@ def test_validate_sql():
|
|
|
92
89
|
assert not Nosible()._validate_sql(sql="SELECT * FROM missing_table")
|
|
93
90
|
|
|
94
91
|
|
|
95
|
-
# —— Your additional tests —— #
|
|
96
|
-
|
|
97
|
-
|
|
98
92
|
def test_search_minimal(search_data):
|
|
99
93
|
# from your snippet: isinstance(search_data, ResultSet)
|
|
100
94
|
assert isinstance(search_data, ResultSet)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import pytest
|
|
2
|
-
|
|
2
|
+
import pandas as pd
|
|
3
3
|
from nosible import Result, ResultSet
|
|
4
4
|
|
|
5
5
|
|
|
@@ -84,8 +84,6 @@ def test_resultset_to_dict(search_data):
|
|
|
84
84
|
assert "published" in res
|
|
85
85
|
assert "similarity" in res
|
|
86
86
|
assert res["url_hash"] == key
|
|
87
|
-
# results_copy_from_dict = ResultSet.from_dict(results_dict)
|
|
88
|
-
# assert results == results_copy_from_dict
|
|
89
87
|
|
|
90
88
|
|
|
91
89
|
# to_dicts
|
|
@@ -149,3 +147,32 @@ def test_resultset_getitem(search_data):
|
|
|
149
147
|
_ = search_data[len(search_data)] # Out of range index
|
|
150
148
|
with pytest.raises(TypeError):
|
|
151
149
|
_ = search_data["invalid"] # Invalid type for index
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def test_similar_excludes_current_document():
|
|
153
|
+
"""
|
|
154
|
+
Test that the similar method properly excludes the current document from search results.
|
|
155
|
+
|
|
156
|
+
This test creates a Nosible client, performs a fast search, takes the first result,
|
|
157
|
+
and verifies that calling similar() on that result excludes it from the returned results.
|
|
158
|
+
"""
|
|
159
|
+
from nosible import Nosible
|
|
160
|
+
|
|
161
|
+
# Create a Nosible client (similar to test_01_nosible.py)
|
|
162
|
+
with Nosible(concurrency=1) as nos:
|
|
163
|
+
# Perform a search to get some results
|
|
164
|
+
search_results = nos.fast_search(question="Hedge funds seek to expand into private credit", n_results=10)
|
|
165
|
+
|
|
166
|
+
# Get the first result
|
|
167
|
+
first_result = search_results[0]
|
|
168
|
+
|
|
169
|
+
# Call similar() on the first result
|
|
170
|
+
similar_results = first_result.similar(client=nos, n_results=10)
|
|
171
|
+
|
|
172
|
+
# Verify that the first result is NOT in the similar results
|
|
173
|
+
# We check by comparing URL hashes
|
|
174
|
+
similar_hashes = [r.url_hash for r in similar_results if r.url_hash]
|
|
175
|
+
assert first_result.url_hash not in similar_hashes, f"Original result URL hash {first_result.url_hash} should not be in similar results"
|
|
176
|
+
|
|
177
|
+
# Also verify that similar results were actually returned (should be non-empty)
|
|
178
|
+
assert len(similar_results) >= 0, "Similar results should be returned (may be empty if no similar docs found)"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|