nosible 0.3.9__tar.gz → 0.3.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nosible-0.3.9/src/nosible.egg-info → nosible-0.3.10}/PKG-INFO +2 -2
- {nosible-0.3.9 → nosible-0.3.10}/pyproject.toml +2 -2
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/nosible_client.py +67 -35
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/utils/rate_limiter.py +0 -71
- {nosible-0.3.9 → nosible-0.3.10/src/nosible.egg-info}/PKG-INFO +2 -2
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible.egg-info/requires.txt +1 -1
- {nosible-0.3.9 → nosible-0.3.10}/tests/test_01_nosible.py +0 -5
- {nosible-0.3.9 → nosible-0.3.10}/LICENSE +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/README.md +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/setup.cfg +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/setup.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/__init__.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/result.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/result_set.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/search.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/search_set.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/snippet.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/snippet_set.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/classes/web_page.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible/utils/json_tools.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible.egg-info/SOURCES.txt +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible.egg-info/dependency_links.txt +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/src/nosible.egg-info/top_level.txt +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/tests/test_02_results.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/tests/test_03_search_searchset.py +0 -0
- {nosible-0.3.9 → nosible-0.3.10}/tests/test_04_snippets.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nosible
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Python client for the NOSIBLE Search API
|
|
5
5
|
Home-page: https://github.com/NosibleAI/nosible-py
|
|
6
6
|
Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
|
|
@@ -31,7 +31,7 @@ Requires-Dist: polars
|
|
|
31
31
|
Requires-Dist: duckdb
|
|
32
32
|
Requires-Dist: openai
|
|
33
33
|
Requires-Dist: tantivy
|
|
34
|
-
Requires-Dist: pyrate-limiter
|
|
34
|
+
Requires-Dist: pyrate-limiter<4
|
|
35
35
|
Requires-Dist: tenacity
|
|
36
36
|
Requires-Dist: cryptography
|
|
37
37
|
Requires-Dist: pyarrow
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "nosible"
|
|
3
|
-
version = "0.3.
|
|
3
|
+
version = "0.3.10"
|
|
4
4
|
description = "Python client for the NOSIBLE Search API"
|
|
5
5
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
6
6
|
requires-python = ">=3.9"
|
|
@@ -16,7 +16,7 @@ dependencies = [
|
|
|
16
16
|
"duckdb",
|
|
17
17
|
"openai",
|
|
18
18
|
"tantivy",
|
|
19
|
-
"pyrate-limiter",
|
|
19
|
+
"pyrate-limiter<4",
|
|
20
20
|
"tenacity",
|
|
21
21
|
"cryptography",
|
|
22
22
|
"pyarrow",
|
|
@@ -29,7 +29,7 @@ from nosible.classes.search_set import SearchSet
|
|
|
29
29
|
from nosible.classes.snippet_set import SnippetSet
|
|
30
30
|
from nosible.classes.web_page import WebPageData
|
|
31
31
|
from nosible.utils.json_tools import json_loads
|
|
32
|
-
from nosible.utils.rate_limiter import
|
|
32
|
+
from nosible.utils.rate_limiter import RateLimiter, _rate_limited
|
|
33
33
|
|
|
34
34
|
# Set up a module‐level logger.
|
|
35
35
|
logger = logging.getLogger(__name__)
|
|
@@ -202,11 +202,6 @@ class Nosible:
|
|
|
202
202
|
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
203
203
|
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
204
204
|
|
|
205
|
-
self._limiters = {
|
|
206
|
-
endpoint: [RateLimiter(calls, period) for calls, period in buckets]
|
|
207
|
-
for endpoint, buckets in PLAN_RATE_LIMITS[self._get_user_plan()].items()
|
|
208
|
-
}
|
|
209
|
-
|
|
210
205
|
# Define retry decorator
|
|
211
206
|
self._post = retry(
|
|
212
207
|
reraise=True,
|
|
@@ -230,7 +225,34 @@ class Nosible:
|
|
|
230
225
|
self._executor = ThreadPoolExecutor(max_workers=self.concurrency)
|
|
231
226
|
|
|
232
227
|
# Headers
|
|
233
|
-
self.headers = {
|
|
228
|
+
self.headers = {
|
|
229
|
+
"Accept-Encoding": "gzip",
|
|
230
|
+
"Content-Type": "application/json",
|
|
231
|
+
"api-key": self.nosible_api_key
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# Wrap _get_limits with retry.
|
|
235
|
+
self._get_limits = retry(
|
|
236
|
+
reraise=True,
|
|
237
|
+
stop=stop_after_attempt(self.retries) | stop_after_delay(self.timeout),
|
|
238
|
+
wait=wait_exponential(multiplier=1, min=1, max=20),
|
|
239
|
+
retry=retry_if_exception_type(httpx.RequestError),
|
|
240
|
+
before_sleep=before_sleep_log(self.logger, logging.WARNING),
|
|
241
|
+
)(self._get_limits)
|
|
242
|
+
|
|
243
|
+
raw_limits = self._get_limits()
|
|
244
|
+
|
|
245
|
+
# Map API query_type -> your decorator endpoint keys
|
|
246
|
+
mapped_limits = {
|
|
247
|
+
"fast": raw_limits.get("fast", []),
|
|
248
|
+
"bulk": raw_limits.get("slow", []),
|
|
249
|
+
"scrape-url": raw_limits.get("visit", []),
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
self._limiters = {
|
|
253
|
+
endpoint: [RateLimiter(calls, period) for calls, period in buckets]
|
|
254
|
+
for endpoint, buckets in mapped_limits.items()
|
|
255
|
+
}
|
|
234
256
|
|
|
235
257
|
# Filters
|
|
236
258
|
self.publish_start = publish_start
|
|
@@ -1602,7 +1624,6 @@ class Nosible:
|
|
|
1602
1624
|
|
|
1603
1625
|
return filtered
|
|
1604
1626
|
|
|
1605
|
-
|
|
1606
1627
|
def close(self):
|
|
1607
1628
|
"""
|
|
1608
1629
|
Close the Nosible client, shutting down the HTTP session
|
|
@@ -1702,41 +1723,52 @@ class Nosible:
|
|
|
1702
1723
|
|
|
1703
1724
|
return response
|
|
1704
1725
|
|
|
1705
|
-
def
|
|
1726
|
+
def _get_limits(self) -> dict[str, list[tuple[int, float]]]:
|
|
1727
|
+
"""
|
|
1728
|
+
TODO
|
|
1706
1729
|
"""
|
|
1707
|
-
|
|
1730
|
+
url = "https://www.nosible.ai/search/v2/limits"
|
|
1731
|
+
resp = self._session.get(
|
|
1732
|
+
url=url,
|
|
1733
|
+
headers=self.headers,
|
|
1734
|
+
timeout=self.timeout,
|
|
1735
|
+
follow_redirects=True,
|
|
1736
|
+
)
|
|
1708
1737
|
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1738
|
+
if resp.status_code == 401:
|
|
1739
|
+
raise ValueError("Your API key is not valid.")
|
|
1740
|
+
if resp.status_code == 429:
|
|
1741
|
+
raise ValueError("You have hit your rate limit.")
|
|
1742
|
+
if resp.status_code == 409:
|
|
1743
|
+
raise ValueError("Too many concurrent searches.")
|
|
1744
|
+
if resp.status_code == 502:
|
|
1745
|
+
raise ValueError("NOSIBLE is currently restarting.")
|
|
1746
|
+
if resp.status_code == 504:
|
|
1747
|
+
raise ValueError("NOSIBLE is currently overloaded.")
|
|
1712
1748
|
|
|
1713
|
-
|
|
1714
|
-
-------
|
|
1715
|
-
str
|
|
1716
|
-
The plan you are currently on.
|
|
1749
|
+
resp.raise_for_status()
|
|
1717
1750
|
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1751
|
+
try:
|
|
1752
|
+
data = resp.json()
|
|
1753
|
+
except Exception as e:
|
|
1754
|
+
raise ValueError("Invalid JSON response from /limits") from e
|
|
1722
1755
|
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1756
|
+
limits_list = data.get("limits")
|
|
1757
|
+
if not isinstance(limits_list, list):
|
|
1758
|
+
raise ValueError(f"Invalid /limits response shape: {data!r}")
|
|
1759
|
+
|
|
1760
|
+
grouped: dict[str, list[tuple[int, float]]] = {}
|
|
1761
|
+
for item in limits_list:
|
|
1762
|
+
query_type = item.get("query_type")
|
|
1763
|
+
duration = item.get("duration_seconds")
|
|
1764
|
+
limit = item.get("limit")
|
|
1732
1765
|
|
|
1733
|
-
|
|
1734
|
-
|
|
1766
|
+
if query_type is None or duration is None or limit is None:
|
|
1767
|
+
raise ValueError(f"Invalid limit entry: {item!r}")
|
|
1735
1768
|
|
|
1736
|
-
|
|
1737
|
-
raise ValueError(f"Your API key is not valid: {prefix} is not a valid plan prefix.")
|
|
1769
|
+
grouped.setdefault(str(query_type), []).append((int(limit), float(duration)))
|
|
1738
1770
|
|
|
1739
|
-
return
|
|
1771
|
+
return grouped
|
|
1740
1772
|
|
|
1741
1773
|
def _generate_expansions(self, question: Union[str, Search]) -> list:
|
|
1742
1774
|
"""
|
|
@@ -8,77 +8,6 @@ from pyrate_limiter.exceptions import BucketFullException, LimiterDelayException
|
|
|
8
8
|
|
|
9
9
|
log = logging.getLogger(__name__)
|
|
10
10
|
|
|
11
|
-
PLAN_RATE_LIMITS = {
|
|
12
|
-
"test": {
|
|
13
|
-
# Per minute limit, then per month.
|
|
14
|
-
"scrape-url": [(60, 60), (300, 24 * 3600 * 30)],
|
|
15
|
-
"bulk": [(60, 60), (300, 24 * 3600 * 30)],
|
|
16
|
-
"fast": [(60, 60), (3000, 24 * 3600 * 30)],
|
|
17
|
-
},
|
|
18
|
-
"basic": {
|
|
19
|
-
"scrape-url": [(60, 60), (1400, 24 * 3600 * 30)],
|
|
20
|
-
"bulk": [(60, 60), (1400, 24 * 3600 * 30)],
|
|
21
|
-
"fast": [(60, 60), (14_000, 24 * 3600 * 30)],
|
|
22
|
-
},
|
|
23
|
-
"pro": {
|
|
24
|
-
"scrape-url": [(60, 60), (6700, 24 * 3600 * 30)],
|
|
25
|
-
"bulk": [(60, 60), (6700, 24 * 3600 * 30)],
|
|
26
|
-
"fast": [(60, 60), (67_000, 24 * 3600 * 30)],
|
|
27
|
-
},
|
|
28
|
-
"pro+": {
|
|
29
|
-
"scrape-url": [(60, 60), (32_000, 24 * 3600 * 30)],
|
|
30
|
-
"bulk": [(60, 60), (32_000, 24 * 3600 * 30)],
|
|
31
|
-
"fast": [(60, 60), (320_000, 24 * 3600 * 30)],
|
|
32
|
-
},
|
|
33
|
-
"bus": {
|
|
34
|
-
"scrape-url": [(60, 60), (200_000, 24 * 3600 * 30)],
|
|
35
|
-
"bulk": [(60, 60), (200_000, 24 * 3600 * 30)],
|
|
36
|
-
"fast": [(60, 60), (2_000_000, 24 * 3600 * 30)],
|
|
37
|
-
},
|
|
38
|
-
"bus+": {
|
|
39
|
-
"scrape-url": [(60, 60), (500_000, 24 * 3600 * 30)],
|
|
40
|
-
"bulk": [(60, 60), (500_000, 24 * 3600 * 30)],
|
|
41
|
-
"fast": [(120, 60), (5_000_000, 24 * 3600 * 30)],
|
|
42
|
-
},
|
|
43
|
-
"ent": {
|
|
44
|
-
"scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
45
|
-
"bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
46
|
-
"fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
|
|
47
|
-
},
|
|
48
|
-
"prod": {
|
|
49
|
-
"scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
50
|
-
"bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
51
|
-
"fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
|
|
52
|
-
},
|
|
53
|
-
# This plan is used for testing in the package
|
|
54
|
-
"chat": {
|
|
55
|
-
"scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
56
|
-
"bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
|
|
57
|
-
"fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
|
|
58
|
-
},
|
|
59
|
-
"self": {
|
|
60
|
-
"scrape-url": [(6000, 60), (1_500_000, 24 * 3600 * 30)],
|
|
61
|
-
"bulk": [(6000, 60), (1_500_000, 24 * 3600 * 30)],
|
|
62
|
-
"fast": [(36_000, 60), (15_000_000, 24 * 3600 * 30)],
|
|
63
|
-
},
|
|
64
|
-
"cons": {
|
|
65
|
-
"scrape-url": [(60, 60), (3000, 24 * 3600 * 30)],
|
|
66
|
-
"bulk": [(60, 60), (3000, 24 * 3600 * 30)],
|
|
67
|
-
"fast": [(120, 60), (30_000, 24 * 3600 * 30)],
|
|
68
|
-
},
|
|
69
|
-
"stup": {
|
|
70
|
-
"scrape-url": [(60, 60), (30_000, 24 * 3600 * 30)],
|
|
71
|
-
"bulk": [(60, 60), (30_000, 24 * 3600 * 30)],
|
|
72
|
-
"fast": [(360, 60), (300_000, 24 * 3600 * 30)],
|
|
73
|
-
},
|
|
74
|
-
# This plan is used for testing in the package
|
|
75
|
-
"busn": {
|
|
76
|
-
"scrape-url": [(60, 60), (300_000, 24 * 3600 * 30)],
|
|
77
|
-
"bulk": [(60, 60), (300_000, 24 * 3600 * 30)],
|
|
78
|
-
"fast": [(360, 60), (3_000_000, 24 * 3600 * 30)],
|
|
79
|
-
},
|
|
80
|
-
}
|
|
81
|
-
|
|
82
11
|
|
|
83
12
|
def _rate_limited(endpoint):
|
|
84
13
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nosible
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Python client for the NOSIBLE Search API
|
|
5
5
|
Home-page: https://github.com/NosibleAI/nosible-py
|
|
6
6
|
Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
|
|
@@ -31,7 +31,7 @@ Requires-Dist: polars
|
|
|
31
31
|
Requires-Dist: duckdb
|
|
32
32
|
Requires-Dist: openai
|
|
33
33
|
Requires-Dist: tantivy
|
|
34
|
-
Requires-Dist: pyrate-limiter
|
|
34
|
+
Requires-Dist: pyrate-limiter<4
|
|
35
35
|
Requires-Dist: tenacity
|
|
36
36
|
Requires-Dist: cryptography
|
|
37
37
|
Requires-Dist: pyarrow
|
|
@@ -72,11 +72,6 @@ def test_close_idempotent():
|
|
|
72
72
|
nos.close()
|
|
73
73
|
|
|
74
74
|
|
|
75
|
-
def test_invalid_api_key():
|
|
76
|
-
with pytest.raises(ValueError):
|
|
77
|
-
Nosible(nosible_api_key="test+|xyz")
|
|
78
|
-
|
|
79
|
-
|
|
80
75
|
def test_llm_key_required_for_expansions():
|
|
81
76
|
nos = Nosible(llm_api_key=None)
|
|
82
77
|
nos.llm_api_key = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|