nosible 0.2.10__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {nosible-0.2.10/src/nosible.egg-info → nosible-0.3.2}/PKG-INFO +5 -5
  2. {nosible-0.2.10 → nosible-0.3.2}/README.md +2 -2
  3. {nosible-0.2.10 → nosible-0.3.2}/pyproject.toml +2 -2
  4. {nosible-0.2.10 → nosible-0.3.2}/setup.py +1 -1
  5. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/result.py +9 -9
  6. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/nosible_client.py +86 -233
  7. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/utils/rate_limiter.py +22 -22
  8. {nosible-0.2.10 → nosible-0.3.2/src/nosible.egg-info}/PKG-INFO +5 -5
  9. {nosible-0.2.10 → nosible-0.3.2}/tests/test_01_nosible.py +37 -68
  10. {nosible-0.2.10 → nosible-0.3.2}/LICENSE +0 -0
  11. {nosible-0.2.10 → nosible-0.3.2}/setup.cfg +0 -0
  12. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/__init__.py +0 -0
  13. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/result_set.py +0 -0
  14. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/search.py +0 -0
  15. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/search_set.py +0 -0
  16. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/snippet.py +0 -0
  17. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/snippet_set.py +0 -0
  18. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/classes/web_page.py +0 -0
  19. {nosible-0.2.10 → nosible-0.3.2}/src/nosible/utils/json_tools.py +0 -0
  20. {nosible-0.2.10 → nosible-0.3.2}/src/nosible.egg-info/SOURCES.txt +0 -0
  21. {nosible-0.2.10 → nosible-0.3.2}/src/nosible.egg-info/dependency_links.txt +0 -0
  22. {nosible-0.2.10 → nosible-0.3.2}/src/nosible.egg-info/requires.txt +0 -0
  23. {nosible-0.2.10 → nosible-0.3.2}/src/nosible.egg-info/top_level.txt +0 -0
  24. {nosible-0.2.10 → nosible-0.3.2}/tests/test_02_results.py +0 -0
  25. {nosible-0.2.10 → nosible-0.3.2}/tests/test_03_search_searchset.py +0 -0
  26. {nosible-0.2.10 → nosible-0.3.2}/tests/test_04_snippets.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nosible
3
- Version: 0.2.10
3
+ Version: 0.3.2
4
4
  Summary: Python client for the NOSIBLE Search API
5
- Home-page: https://github.com/NosibleAI/nosible
5
+ Home-page: https://github.com/NosibleAI/nosible-py
6
6
  Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
7
7
  Author-email: Stuart Reid <stuart@nosible.com>, Matthew Dicks <matthew@nosible.com>, Richard Taylor <richard@nosible.com>, Gareth Warburton <gareth@nosible.com>
8
8
  License-Expression: MIT
9
- Project-URL: Homepage, https://github.com/NosibleAI/nosible
9
+ Project-URL: Homepage, https://github.com/NosibleAI/nosible-py
10
10
  Project-URL: Documentation, https://nosible-py.readthedocs.io/en/latest/
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Intended Audience :: Developers
@@ -56,7 +56,7 @@ Dynamic: requires-python
56
56
 
57
57
  # NOSIBLE Search Client
58
58
 
59
- A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v1/docs/swagger#/).
59
+ A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v2/docs/#/).
60
60
  Easily integrate the Nosible Search API into your Python projects.
61
61
 
62
62
  ### 📄 Documentation
@@ -154,7 +154,7 @@ You can find the full NOSIBLE Search Client documentation
154
154
  ### 📡 Swagger Docs
155
155
 
156
156
  You can find online endpoints to the NOSIBLE Search API Swagger Docs
157
- [here](https://www.nosible.ai/search/v1/docs/swagger#/).
157
+ [here](https://www.nosible.ai/search/v2/docs/#/).
158
158
 
159
159
 
160
160
  ---
@@ -13,7 +13,7 @@
13
13
 
14
14
  # NOSIBLE Search Client
15
15
 
16
- A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v1/docs/swagger#/).
16
+ A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v2/docs/#/).
17
17
  Easily integrate the Nosible Search API into your Python projects.
18
18
 
19
19
  ### 📄 Documentation
@@ -111,7 +111,7 @@ You can find the full NOSIBLE Search Client documentation
111
111
  ### 📡 Swagger Docs
112
112
 
113
113
  You can find online endpoints to the NOSIBLE Search API Swagger Docs
114
- [here](https://www.nosible.ai/search/v1/docs/swagger#/).
114
+ [here](https://www.nosible.ai/search/v2/docs/#/).
115
115
 
116
116
 
117
117
  ---
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nosible"
3
- version = "0.2.10"
3
+ version = "0.3.2"
4
4
  description = "Python client for the NOSIBLE Search API"
5
5
  readme = { file = "README.md", content-type = "text/markdown" }
6
6
  requires-python = ">=3.9"
@@ -45,7 +45,7 @@ classifiers = [
45
45
  ]
46
46
 
47
47
  [project.urls]
48
- Homepage = "https://github.com/NosibleAI/nosible"
48
+ Homepage = "https://github.com/NosibleAI/nosible-py"
49
49
  Documentation = "https://nosible-py.readthedocs.io/en/latest/"
50
50
 
51
51
  [build-system]
@@ -12,7 +12,7 @@ setup(
12
12
  description="Python client for the NOSIBLE Search API",
13
13
  long_description=long_description,
14
14
  long_description_content_type="text/markdown",
15
- url="https://github.com/NosibleAI/nosible",
15
+ url="https://github.com/NosibleAI/nosible-py",
16
16
  classifiers=[
17
17
  # Development
18
18
  "Development Status :: 4 - Beta",
@@ -236,9 +236,9 @@ class Result:
236
236
 
237
237
  return ResultSet([self, other])
238
238
 
239
- def visit(self, client) -> WebPageData:
239
+ def scrape_url(self, client) -> WebPageData:
240
240
  """
241
- Visit the URL associated with this Result and retrieve its content.
241
+ Scrape the URL associated with this Result and retrieve its content.
242
242
 
243
243
  This method uses the provided Nosible client to fetch the web page content for the given URL.
244
244
  The result is returned as a WebPageData object containing the page's content and metadata.
@@ -265,16 +265,16 @@ class Result:
265
265
  >>> from nosible import Nosible, Result
266
266
  >>> with Nosible() as nos:
267
267
  ... result = Result(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
268
- ... page = result.visit(client=nos)
268
+ ... page = result.scrape_url(client=nos)
269
269
  ... isinstance(page, WebPageData)
270
270
  True
271
271
  """
272
272
  if not self.url:
273
- raise ValueError("Cannot visit Result without a URL.")
273
+ raise ValueError("Cannot scrape Result without a URL.")
274
274
  try:
275
- return client.visit(url=self.url)
275
+ return client.scrape_url(url=self.url)
276
276
  except Exception as e:
277
- raise RuntimeError(f"Failed to visit URL '{self.url}': {e}") from e
277
+ raise RuntimeError(f"Failed to scrape URL '{self.url}': {e}") from e
278
278
 
279
279
  def sentiment(self, client) -> float:
280
280
  """
@@ -303,7 +303,7 @@ class Result:
303
303
  >>> class DummyClient:
304
304
  ... llm_api_key = "dummy"
305
305
  ...
306
- ... def visit(self, url):
306
+ ... def scrape_url(self, url):
307
307
  ... return "web page"
308
308
  >>> result = Result(url="https://example.com", content="This is great!")
309
309
  >>> import types
@@ -519,7 +519,6 @@ class Result:
519
519
  try:
520
520
  from nosible import Search
521
521
 
522
- exclude_companies = [self.url_hash] if not exclude_companies else exclude_companies.append(self.url_hash)
523
522
  s = Search(
524
523
  question=self.title,
525
524
  expansions=[],
@@ -554,7 +553,8 @@ class Result:
554
553
  iab_tier_4=iab_tier_4,
555
554
  instruction=instruction,
556
555
  )
557
- return client.fast_search(search=s)
556
+ results = client.fast_search(search=s)
557
+ return results
558
558
  except Exception as e:
559
559
  raise RuntimeError(f"Failed to find similar results for title '{self.title}': {e}") from e
560
560
 
@@ -211,7 +211,7 @@ class Nosible:
211
211
  self._post = retry(
212
212
  reraise=True,
213
213
  stop=stop_after_attempt(self.retries) | stop_after_delay(self.timeout),
214
- wait=wait_exponential(multiplier=1, min=1, max=10),
214
+ wait=wait_exponential(multiplier=1, min=1, max=20),
215
215
  retry=retry_if_exception_type(httpx.RequestError),
216
216
  before_sleep=before_sleep_log(self.logger, logging.WARNING),
217
217
  )(self._post)
@@ -220,7 +220,7 @@ class Nosible:
220
220
  self._generate_expansions = retry(
221
221
  reraise=True,
222
222
  stop=stop_after_attempt(self.retries) | stop_after_delay(self.timeout),
223
- wait=wait_exponential(multiplier=1, min=1, max=10),
223
+ wait=wait_exponential(multiplier=1, min=1, max=20),
224
224
  retry=retry_if_exception_type(httpx.RequestError),
225
225
  before_sleep=before_sleep_log(self.logger, logging.WARNING),
226
226
  )(self._generate_expansions)
@@ -261,6 +261,67 @@ class Nosible:
261
261
  self.iab_tier_4 = iab_tier_4
262
262
  self.instruction = instruction
263
263
 
264
+ @_rate_limited("fast")
265
+ def search(
266
+ self,
267
+ prompt: str = None,
268
+ recursions: int = 3,
269
+ agent: str = "cybernaut-1",
270
+ ) -> ResultSet:
271
+ """
272
+ Gives you access to Cybernaut-1, an AI agent with unrestricted access to everything in
273
+ NOSIBLE including every shard, algorithm, selector, reranker, and signal.
274
+ It knows what these things are and can tune them on the fly to find better results.
275
+
276
+ Parameters
277
+ ----------
278
+ prompt: str
279
+ The information you are looking for.
280
+ recursions: int
281
+ Maximum chain-of-search length.
282
+ agent: str
283
+ The search agent you want to use.
284
+
285
+ Returns
286
+ -------
287
+ ResultSet
288
+ The results of the search.
289
+
290
+ Raises
291
+ ------
292
+ ValueError
293
+ If `recursions` is not [3,10].
294
+
295
+ Examples
296
+ --------
297
+ >>> from nosible import Nosible
298
+ >>> with Nosible() as nos:
299
+ ... results = nos.search("Interesting news from AI startups last week.")
300
+ ... print(isinstance(results, ResultSet))
301
+ True
302
+ >>> with Nosible() as nos:
303
+ ... results = nos.search(
304
+ ... prompt="Interesting news from AI startups last week.",
305
+ ... recursions=20
306
+ ... ) # doctest: +ELLIPSIS
307
+ Traceback (most recent call last):
308
+ ...
309
+ ValueError: Recursions must be [3,10].
310
+ """
311
+ if recursions < 3 or recursions > 10:
312
+ raise ValueError("Recursions must be [3,10].")
313
+
314
+ payload = {
315
+ "prompt": prompt,
316
+ "recursions": recursions,
317
+ "agent": agent,
318
+ }
319
+
320
+ resp = self._post(url="https://www.nosible.ai/search/v2/search", payload=payload)
321
+ resp.raise_for_status()
322
+ items = resp.json().get("response", [])
323
+ return ResultSet.from_dicts(items)
324
+
264
325
  def fast_search(
265
326
  self,
266
327
  search: Search = None,
@@ -892,7 +953,7 @@ class Nosible:
892
953
  if val is not None:
893
954
  payload[key] = val
894
955
 
895
- resp = self._post(url="https://www.nosible.ai/search/v1/fast-search", payload=payload)
956
+ resp = self._post(url="https://www.nosible.ai/search/v2/fast-search", payload=payload)
896
957
  resp.raise_for_status()
897
958
  items = resp.json().get("response", [])[:filter_responses]
898
959
  return ResultSet.from_dicts(items)
@@ -938,7 +999,7 @@ class Nosible:
938
999
 
939
1000
  raise TypeError("`question` must be str, Search, SearchSet, or a list thereof")
940
1001
 
941
- @_rate_limited("slow")
1002
+ @_rate_limited("bulk")
942
1003
  def bulk_search(
943
1004
  self,
944
1005
  *,
@@ -1233,7 +1294,7 @@ class Nosible:
1233
1294
 
1234
1295
  # Enforce Minimums
1235
1296
  filter_responses = n_results
1236
- # Slow search must ask for at least 1 000
1297
+ # Bulk search must ask for at least 1 000
1237
1298
  n_results = max(n_results, 1000)
1238
1299
 
1239
1300
  self.logger.info(f"Performing bulk search for {question!r}...")
@@ -1271,7 +1332,7 @@ class Nosible:
1271
1332
  if val is not None:
1272
1333
  payload[key] = val
1273
1334
 
1274
- resp = self._post(url="https://www.nosible.ai/search/v1/slow-search", payload=payload)
1335
+ resp = self._post(url="https://www.nosible.ai/search/v2/bulk-search", payload=payload)
1275
1336
  try:
1276
1337
  resp.raise_for_status()
1277
1338
  except httpx.HTTPStatusError as e:
@@ -1279,7 +1340,7 @@ class Nosible:
1279
1340
 
1280
1341
  data = resp.json()
1281
1342
 
1282
- # Slow search: download & decrypt
1343
+ # Bulk search: download & decrypt
1283
1344
  download_from = data.get("download_from")
1284
1345
  if ".zstd." in download_from:
1285
1346
  download_from = download_from.replace(".zstd.", ".gzip.", 1)
@@ -1408,10 +1469,10 @@ class Nosible:
1408
1469
  # Return the generated text
1409
1470
  return "Answer:\n" + response.choices[0].message.content.strip()
1410
1471
 
1411
- @_rate_limited("visit")
1412
- def visit(self, html: str = "", recrawl: bool = False, render: bool = False, url: str = None) -> WebPageData:
1472
+ @_rate_limited("scrape-url")
1473
+ def scrape_url(self, html: str = "", recrawl: bool = False, render: bool = False, url: str = None) -> WebPageData:
1413
1474
  """
1414
- Visit a given URL and return a structured WebPageData object for the page.
1475
+ Scrape a given URL and return a structured WebPageData object for the page.
1415
1476
 
1416
1477
  Parameters
1417
1478
  ----------
@@ -1444,7 +1505,7 @@ class Nosible:
1444
1505
  --------
1445
1506
  >>> from nosible import Nosible
1446
1507
  >>> with Nosible() as nos:
1447
- ... out = nos.visit(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
1508
+ ... out = nos.scrape_url(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
1448
1509
  ... print(isinstance(out, WebPageData))
1449
1510
  ... print(hasattr(out, "languages"))
1450
1511
  ... print(hasattr(out, "page"))
@@ -1452,7 +1513,7 @@ class Nosible:
1452
1513
  True
1453
1514
  True
1454
1515
  >>> with Nosible() as nos:
1455
- ... out = nos.visit()
1516
+ ... out = nos.scrape_url()
1456
1517
  ... print(isinstance(out, type(WebPageData)))
1457
1518
  ... print(hasattr(out, "languages"))
1458
1519
  ... print(hasattr(out, "page")) # doctest: +ELLIPSIS
@@ -1463,7 +1524,7 @@ class Nosible:
1463
1524
  if url is None:
1464
1525
  raise TypeError("URL must be provided")
1465
1526
  response = self._post(
1466
- url="https://www.nosible.ai/search/v1/visit",
1527
+ url="https://www.nosible.ai/search/v2/scrape-url",
1467
1528
  payload={"html": html, "recrawl": recrawl, "render": render, "url": url},
1468
1529
  )
1469
1530
  try:
@@ -1494,7 +1555,7 @@ class Nosible:
1494
1555
  )
1495
1556
 
1496
1557
  @_rate_limited("fast")
1497
- def trend(
1558
+ def topic_trend(
1498
1559
  self,
1499
1560
  query: str,
1500
1561
  start_date: Optional[str] = None,
@@ -1502,7 +1563,7 @@ class Nosible:
1502
1563
  sql_filter: Optional[str] = None,
1503
1564
  ) -> dict:
1504
1565
  """
1505
- Extract a trend showing the volume of news surrounding your query.
1566
+ Extract a topic's trend showing the volume of news surrounding your query.
1506
1567
 
1507
1568
  Parameters
1508
1569
  ----------
@@ -1518,14 +1579,14 @@ class Nosible:
1518
1579
  Returns
1519
1580
  -------
1520
1581
  dict
1521
- The JSON-decoded trend data returned by the server.
1582
+ The JSON-decoded topic trend data returned by the server.
1522
1583
 
1523
1584
  Examples
1524
1585
  --------
1525
1586
  >>> from nosible import Nosible
1526
1587
  >>> with Nosible() as nos:
1527
- ... trends_data = nos.trend("Christmas Shopping", start_date="2005-01-01", end_date="2020-12-31")
1528
- ... print(trends_data) # doctest: +ELLIPSIS
1588
+ ... topic_trends_data = nos.topic_trend("Christmas Shopping", start_date="2005-01-01", end_date="2020-12-31")
1589
+ ... print(topic_trends_data) # doctest: +ELLIPSIS
1529
1590
  {'2005-01-31': ...'2020-12-31': ...}
1530
1591
  """
1531
1592
  # Validate dates
@@ -1541,8 +1602,8 @@ class Nosible:
1541
1602
  else:
1542
1603
  payload["sql_filter"] = "SELECT loc, published FROM engine"
1543
1604
 
1544
- # Send the POST to the /trend endpoint
1545
- response = self._post(url="https://www.nosible.ai/search/v1/trend", payload=payload)
1605
+ # Send the POST to the /topic-trend endpoint
1606
+ response = self._post(url="https://www.nosible.ai/search/v2/topic-trend", payload=payload)
1546
1607
  # Will raise ValueError on rate-limit or auth errors
1547
1608
  response.raise_for_status()
1548
1609
  payload = response.json().get("response", {})
@@ -1562,212 +1623,6 @@ class Nosible:
1562
1623
 
1563
1624
  return filtered
1564
1625
 
1565
- def version(self) -> str:
1566
- """
1567
- Retrieve the current version information for the Nosible API.
1568
-
1569
- Returns
1570
- -------
1571
- str
1572
- JSON-formatted string containing API version details.
1573
-
1574
- Examples
1575
- --------
1576
- >>> import json
1577
- >>> from nosible import Nosible
1578
- >>> with Nosible() as nos:
1579
- ... v = nos.version()
1580
- ... data = json.loads(v)
1581
- ... # top‐level object must be a dict
1582
- ... print(isinstance(data, dict))
1583
- ... # must have a "response" key mapping to another dict
1584
- ... print("response" in data and isinstance(data["response"], dict))
1585
- ... # that inner dict must have exactly the expected sub-keys
1586
- ... expected = {
1587
- ... "database",
1588
- ... "date",
1589
- ... "documents",
1590
- ... "runtime",
1591
- ... "snippets",
1592
- ... "time",
1593
- ... "tokens",
1594
- ... "version",
1595
- ... "words",
1596
- ... }
1597
- ... print(set(data["response"].keys()) == expected)
1598
- True
1599
- True
1600
- True
1601
- """
1602
- response = self._post(url="https://www.nosible.ai/search/v1/version", payload={})
1603
-
1604
- return json.dumps(response.json(), indent=2, sort_keys=True)
1605
-
1606
- def indexed(self, url: str = None) -> bool:
1607
- """
1608
- This function checks if a URL has been indexed by Nosible.
1609
-
1610
- Parameters
1611
- ----------
1612
- url : str, optional
1613
- The full URL to verify.
1614
-
1615
- Returns
1616
- -------
1617
- bool
1618
- True if the URL is in the index.
1619
- False if the URL is not in the index.
1620
-
1621
- Raises
1622
- ------
1623
-
1624
- Examples
1625
- --------
1626
- >>> from nosible import Nosible
1627
- >>> with Nosible() as nos:
1628
- ... print(nos.indexed(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/"))
1629
- True
1630
- """
1631
- response = self._post(url="https://www.nosible.ai/search/v1/indexed", payload={"url": url})
1632
-
1633
- try:
1634
- response.raise_for_status()
1635
- data = response.json()
1636
- msg = data.get("message")
1637
- if msg == "The URL is in the system.":
1638
- return True
1639
- if msg == "The URL is nowhere to be found.":
1640
- return False
1641
- if msg == "The URL could not be retrieved.":
1642
- return False
1643
- # If we reach here, the response is unexpected
1644
- return False
1645
- except httpx.HTTPError:
1646
- return False
1647
- except:
1648
- return False
1649
-
1650
- def preflight(self, url: str = None) -> str:
1651
- """
1652
- Run a preflight check for crawling/preprocessing on a URL.
1653
-
1654
- Parameters
1655
- ----------
1656
- url : str, optional
1657
- The URL to validate or prepare for indexing.
1658
-
1659
- Returns
1660
- -------
1661
- str
1662
- JSON-formatted string with errors, warnings, or recommendations.
1663
-
1664
- Examples
1665
- --------
1666
- >>> from nosible import Nosible
1667
- >>> with Nosible() as nos:
1668
- ... pf = nos.preflight(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
1669
- ... print(pf)
1670
- {
1671
- "response": {
1672
- "domain": "dailynewsegypt",
1673
- "fragment": "",
1674
- "geo": "US",
1675
- "hash": "ENNmqkF1mGNhVhvhmbUEs4U2",
1676
- "netloc": "www.dailynewsegypt.com",
1677
- "path": "/2023/09/08/g20-and-its-summits/",
1678
- "prefix": "www",
1679
- "proxy": "US",
1680
- "query": "",
1681
- "query_allowed": {},
1682
- "query_blocked": {},
1683
- "raw_url": "https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/",
1684
- "scheme": "https",
1685
- "suffix": "com",
1686
- "url": "https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits"
1687
- }
1688
- }
1689
- """
1690
- response = self._post(url="https://www.nosible.ai/search/v1/preflight", payload={"url": url})
1691
-
1692
- return json.dumps(response.json(), indent=2, sort_keys=True)
1693
-
1694
- def get_rate_limits(self) -> str:
1695
- """
1696
- Generate a plaintext summary of rate limits for every subscription plan.
1697
-
1698
- Returns
1699
- -------
1700
- str
1701
- A multi-line string containing rate limits for each plan.
1702
-
1703
- Examples
1704
- --------
1705
- >>> nos = Nosible(nosible_api_key="test|xyz")
1706
- >>> print(nos.get_rate_limits()) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
1707
- Below are the rate limits for all NOSIBLE plans.
1708
- To upgrade your package, visit https://www.nosible.ai/products.
1709
- <BLANKLINE>
1710
- Unless otherwise indicated, bulk searches are limited to one-at-a-time per API key.
1711
- <BLANKLINE>
1712
- Free: (Your current plan)
1713
- | Endpoint | Per Month | Per Minute | Effective CPM |
1714
- | ----------- | --------- | ---------- | ------------- |
1715
- | Search | 3000 | 60 | $4.00 |
1716
- | URL Visits | 300 | 60 | $4.00 |
1717
- | Bulk Search | 300 | 60 | $4.00 |
1718
- <BLANKLINE>
1719
- Basic ($49p/m):
1720
- | Endpoint | Per Month | Per Minute | Effective CPM |
1721
- ...
1722
- """
1723
- # Human-friendly plan names
1724
- display = {
1725
- "test": "Free",
1726
- "basic": "Basic ($49p/m)",
1727
- "pro": "Pro ($199p/m)",
1728
- "pro+": "Pro+ ($799p/m)",
1729
- "bus": "Business ($3999p/m)",
1730
- "bus+": "Business+ ($7499p/m)",
1731
- "ent": "Enterprise ($14999p/m)",
1732
- }
1733
-
1734
- # Human-friendly endpoint names
1735
- endpoint_name = {"fast": "Search", "visit": "URL Visits", "slow": "Bulk Search"}
1736
-
1737
- out = [
1738
- "Below are the rate limits for all NOSIBLE plans.",
1739
- "To upgrade your package, visit https://www.nosible.ai/products.\n",
1740
- "Unless otherwise indicated, bulk searches are limited to one-at-a-time per API key.\n",
1741
- ]
1742
-
1743
- user_plan = self._get_user_plan()
1744
- current_plan = ""
1745
- cpm_counter = 4.0
1746
-
1747
- # Preserve the order you care about:
1748
- for plan in ["test", "basic", "pro", "pro+", "bus", "bus+", "ent", "cons", "stup", "busn"]:
1749
- name = display.get(plan, plan)
1750
- if plan == user_plan:
1751
- current_plan = " (Your current plan)"
1752
-
1753
- out.append(f"{name}:{current_plan}")
1754
- out.append("| Endpoint | Per Month | Per Minute | Effective CPM |")
1755
- out.append("| ----------- | --------- | ---------- | ------------- |")
1756
-
1757
- for ep in ["fast", "visit", "slow"]:
1758
- buckets = PLAN_RATE_LIMITS[plan][ep]
1759
- # Find minute & day
1760
- minute = next(limit for limit, i in buckets if i == 60)
1761
- month = next(limit for limit, i in buckets if i == 24 * 3600 * 30)
1762
- cpm = f"${cpm_counter:.2f}"
1763
-
1764
- out.append(f"| {endpoint_name[ep]:<11} | {month:>9} | {minute:>10} | {cpm:>13} |")
1765
-
1766
- cpm_counter = cpm_counter - 0.5
1767
- out.append("") # Blank line
1768
- current_plan = ""
1769
-
1770
- return "\n".join(out)
1771
1626
 
1772
1627
  def close(self):
1773
1628
  """
@@ -2192,14 +2047,14 @@ class Nosible:
2192
2047
 
2193
2048
  # Include / exclude companies
2194
2049
  if include_companies:
2195
- company_list = ", ".join(f"'{c}'" for c in include_companies)
2050
+ company_list = " OR ".join(f"ARRAY_CONTAINS(companies, '{c}')" for c in include_companies)
2196
2051
  clauses.append(
2197
- f"(company_1 IN ({company_list}) OR company_2 IN ({company_list}) OR company_3 IN ({company_list}))"
2052
+ f"(companies IS NOT NULL AND ({company_list}))"
2198
2053
  )
2199
2054
  if exclude_companies:
2200
- company_list = ", ".join(f"'{c}'" for c in exclude_companies)
2055
+ company_list = " OR ".join(f"ARRAY_CONTAINS(companies, '{c}')" for c in exclude_companies)
2201
2056
  clauses.append(
2202
- f"(company_1 NOT IN ({company_list}) AND company_2 NOT IN ({company_list}) AND company_3 NOT IN ({company_list}))"
2057
+ f"(companies IS NULL OR NOT ({company_list}))"
2203
2058
  )
2204
2059
 
2205
2060
  if include_docs:
@@ -2256,9 +2111,7 @@ class Nosible:
2256
2111
  "certain",
2257
2112
  "netloc",
2258
2113
  "language",
2259
- "company_1",
2260
- "company_2",
2261
- "company_3",
2114
+ "companies"
2262
2115
  "doc_hash",
2263
2116
  ]
2264
2117
  import polars as pl # Lazy import
@@ -11,60 +11,60 @@ log = logging.getLogger(__name__)
11
11
  PLAN_RATE_LIMITS = {
12
12
  "test": {
13
13
  # Per minute limit, then per month.
14
- "visit": [(60, 60), (300, 24 * 3600 * 30)],
15
- "slow": [(60, 60), (300, 24 * 3600 * 30)],
14
+ "scrape-url": [(60, 60), (300, 24 * 3600 * 30)],
15
+ "bulk": [(60, 60), (300, 24 * 3600 * 30)],
16
16
  "fast": [(60, 60), (3000, 24 * 3600 * 30)],
17
17
  },
18
18
  "basic": {
19
- "visit": [(60, 60), (1400, 24 * 3600 * 30)],
20
- "slow": [(60, 60), (1400, 24 * 3600 * 30)],
19
+ "scrape-url": [(60, 60), (1400, 24 * 3600 * 30)],
20
+ "bulk": [(60, 60), (1400, 24 * 3600 * 30)],
21
21
  "fast": [(60, 60), (14_000, 24 * 3600 * 30)],
22
22
  },
23
23
  "pro": {
24
- "visit": [(60, 60), (6700, 24 * 3600 * 30)],
25
- "slow": [(60, 60), (6700, 24 * 3600 * 30)],
24
+ "scrape-url": [(60, 60), (6700, 24 * 3600 * 30)],
25
+ "bulk": [(60, 60), (6700, 24 * 3600 * 30)],
26
26
  "fast": [(60, 60), (67_000, 24 * 3600 * 30)],
27
27
  },
28
28
  "pro+": {
29
- "visit": [(60, 60), (32_000, 24 * 3600 * 30)],
30
- "slow": [(60, 60), (32_000, 24 * 3600 * 30)],
29
+ "scrape-url": [(60, 60), (32_000, 24 * 3600 * 30)],
30
+ "bulk": [(60, 60), (32_000, 24 * 3600 * 30)],
31
31
  "fast": [(60, 60), (320_000, 24 * 3600 * 30)],
32
32
  },
33
33
  "bus": {
34
- "visit": [(60, 60), (200_000, 24 * 3600 * 30)],
35
- "slow": [(60, 60), (200_000, 24 * 3600 * 30)],
34
+ "scrape-url": [(60, 60), (200_000, 24 * 3600 * 30)],
35
+ "bulk": [(60, 60), (200_000, 24 * 3600 * 30)],
36
36
  "fast": [(60, 60), (2_000_000, 24 * 3600 * 30)],
37
37
  },
38
38
  "bus+": {
39
- "visit": [(60, 60), (500_000, 24 * 3600 * 30)],
40
- "slow": [(60, 60), (500_000, 24 * 3600 * 30)],
39
+ "scrape-url": [(60, 60), (500_000, 24 * 3600 * 30)],
40
+ "bulk": [(60, 60), (500_000, 24 * 3600 * 30)],
41
41
  "fast": [(120, 60), (5_000_000, 24 * 3600 * 30)],
42
42
  },
43
43
  "ent": {
44
- "visit": [(60, 60), (1_500_000, 24 * 3600 * 30)],
45
- "slow": [(60, 60), (1_500_000, 24 * 3600 * 30)],
44
+ "scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
45
+ "bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
46
46
  "fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
47
47
  },
48
48
  # This plan is used for testing in the package
49
49
  "chat": {
50
- "visit": [(60, 60), (1_500_000, 24 * 3600 * 30)],
51
- "slow": [(60, 60), (1_500_000, 24 * 3600 * 30)],
50
+ "scrape-url": [(60, 60), (1_500_000, 24 * 3600 * 30)],
51
+ "bulk": [(60, 60), (1_500_000, 24 * 3600 * 30)],
52
52
  "fast": [(360, 60), (15_000_000, 24 * 3600 * 30)],
53
53
  },
54
54
  "cons": {
55
- "visit": [(60, 60), (3000, 24 * 3600 * 30)],
56
- "slow": [(60, 60), (3000, 24 * 3600 * 30)],
55
+ "scrape-url": [(60, 60), (3000, 24 * 3600 * 30)],
56
+ "bulk": [(60, 60), (3000, 24 * 3600 * 30)],
57
57
  "fast": [(120, 60), (30_000, 24 * 3600 * 30)],
58
58
  },
59
59
  "stup": {
60
- "visit": [(60, 60), (30_000, 24 * 3600 * 30)],
61
- "slow": [(60, 60), (30_000, 24 * 3600 * 30)],
60
+ "scrape-url": [(60, 60), (30_000, 24 * 3600 * 30)],
61
+ "bulk": [(60, 60), (30_000, 24 * 3600 * 30)],
62
62
  "fast": [(360, 60), (300_000, 24 * 3600 * 30)],
63
63
  },
64
64
  # This plan is used for testing in the package
65
65
  "busn": {
66
- "visit": [(60, 60), (300_000, 24 * 3600 * 30)],
67
- "slow": [(60, 60), (300_000, 24 * 3600 * 30)],
66
+ "scrape-url": [(60, 60), (300_000, 24 * 3600 * 30)],
67
+ "bulk": [(60, 60), (300_000, 24 * 3600 * 30)],
68
68
  "fast": [(360, 60), (3_000_000, 24 * 3600 * 30)],
69
69
  },
70
70
  }
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nosible
3
- Version: 0.2.10
3
+ Version: 0.3.2
4
4
  Summary: Python client for the NOSIBLE Search API
5
- Home-page: https://github.com/NosibleAI/nosible
5
+ Home-page: https://github.com/NosibleAI/nosible-py
6
6
  Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
7
7
  Author-email: Stuart Reid <stuart@nosible.com>, Matthew Dicks <matthew@nosible.com>, Richard Taylor <richard@nosible.com>, Gareth Warburton <gareth@nosible.com>
8
8
  License-Expression: MIT
9
- Project-URL: Homepage, https://github.com/NosibleAI/nosible
9
+ Project-URL: Homepage, https://github.com/NosibleAI/nosible-py
10
10
  Project-URL: Documentation, https://nosible-py.readthedocs.io/en/latest/
11
11
  Classifier: Development Status :: 4 - Beta
12
12
  Classifier: Intended Audience :: Developers
@@ -56,7 +56,7 @@ Dynamic: requires-python
56
56
 
57
57
  # NOSIBLE Search Client
58
58
 
59
- A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v1/docs/swagger#/).
59
+ A high-level Python client for the [NOSIBLE Search API](https://www.nosible.ai/search/v2/docs/#/).
60
60
  Easily integrate the Nosible Search API into your Python projects.
61
61
 
62
62
  ### 📄 Documentation
@@ -154,7 +154,7 @@ You can find the full NOSIBLE Search Client documentation
154
154
  ### 📡 Swagger Docs
155
155
 
156
156
  You can find online endpoints to the NOSIBLE Search API Swagger Docs
157
- [here](https://www.nosible.ai/search/v1/docs/swagger#/).
157
+ [here](https://www.nosible.ai/search/v2/docs/#/).
158
158
 
159
159
 
160
160
  ---
@@ -59,44 +59,13 @@ def test_bulk_search_errors_and_success(bulk_search_data):
59
59
  assert len(bulk_search_data) == 1000
60
60
 
61
61
 
62
- def test_visit_success_and_error(visit_data):
63
- assert isinstance(visit_data, WebPageData)
64
- assert hasattr(visit_data, "languages")
65
- assert hasattr(visit_data, "page")
62
+ def test_scrape_url_success_and_error(scrape_url_data):
63
+ assert isinstance(scrape_url_data, WebPageData)
64
+ assert hasattr(scrape_url_data, "languages")
65
+ assert hasattr(scrape_url_data, "page")
66
66
  nos = Nosible()
67
67
  with pytest.raises(TypeError):
68
- nos.visit()
69
-
70
-
71
- def test_version_structure():
72
- nos = Nosible()
73
- v = nos.version()
74
- data = json.loads(v)
75
- assert isinstance(data, dict)
76
- assert "response" in data and isinstance(data["response"], dict)
77
- expected = {"database", "date", "documents", "runtime", "snippets", "time", "tokens", "version", "words"}
78
- assert set(data["response"].keys()) == expected
79
-
80
-
81
- def test_indexed_fixture(indexed_data):
82
- assert indexed_data is True
83
-
84
-
85
- def test_preflight_output():
86
- nos = Nosible()
87
- pf = nos.preflight(url="https://www.dailynewsegypt.com/2023/09/08/g20-and-its-summits/")
88
- # Turn pf str into a dict
89
- pf = json.loads(pf)
90
- assert isinstance(pf, dict)
91
- assert "response" in pf and isinstance(pf["response"], dict)
92
- for key in ("domain", "netloc", "raw_url", "scheme", "path", "suffix", "hash"):
93
- assert key in pf["response"]
94
-
95
-
96
- def test_get_rate_limits_contains_plans():
97
- nos = Nosible(nosible_api_key="test|xyz")
98
- rl = nos.get_rate_limits()
99
- assert "Free:" in rl and "Basic ($49p/m):" in rl
68
+ nos.scrape_url()
100
69
 
101
70
 
102
71
  def test_close_idempotent():
@@ -131,70 +100,70 @@ def test_search_minimal(search_data):
131
100
  assert isinstance(search_data, ResultSet)
132
101
 
133
102
 
134
- def test_visit_full_attributes(visit_data):
103
+ def test_scrape_url_full_attributes(scrape_url_data):
135
104
  # all the extra attributes you wanted
136
- assert isinstance(visit_data.full_text, str)
137
- assert isinstance(visit_data.languages, dict)
138
- assert isinstance(visit_data.metadata, dict)
139
- assert isinstance(visit_data.page, dict)
140
- assert isinstance(visit_data.request, dict)
141
- assert isinstance(visit_data.snippets, SnippetSet)
142
- assert isinstance(visit_data.statistics, dict)
143
- assert isinstance(visit_data.structured, list)
144
- assert isinstance(visit_data.url_tree, dict)
145
-
146
-
147
- def test_visit_save_load(tmp_path, visit_data):
105
+ assert isinstance(scrape_url_data.full_text, str)
106
+ assert isinstance(scrape_url_data.languages, dict)
107
+ assert isinstance(scrape_url_data.metadata, dict)
108
+ assert isinstance(scrape_url_data.page, dict)
109
+ assert isinstance(scrape_url_data.request, dict)
110
+ assert isinstance(scrape_url_data.snippets, SnippetSet)
111
+ assert isinstance(scrape_url_data.statistics, dict)
112
+ assert isinstance(scrape_url_data.structured, list)
113
+ assert isinstance(scrape_url_data.url_tree, dict)
114
+
115
+
116
+ def test_scrape_url_save_load(tmp_path, scrape_url_data):
148
117
  # save to JSON and reload
149
- path = tmp_path / "visit_data.json"
150
- visit_data.write_json(path)
118
+ path = tmp_path / "scrape_url_data.json"
119
+ scrape_url_data.write_json(path)
151
120
  loaded = WebPageData.read_json(path)
152
121
  assert isinstance(loaded, WebPageData)
153
- assert loaded == visit_data
122
+ assert loaded == scrape_url_data
154
123
  assert isinstance(loaded.snippets, SnippetSet)
155
124
 
156
125
 
157
- def test_visit_write_json_roundtrip(tmp_path, visit_data):
126
+ def test_scrape_url_write_json_roundtrip(tmp_path, scrape_url_data):
158
127
  # write_json / read_json
159
- s = visit_data.write_json(tmp_path / "visit_data.json")
128
+ s = scrape_url_data.write_json(tmp_path / "scrape_url_data.json")
160
129
  assert isinstance(s, str)
161
- rehydrated = WebPageData.read_json(tmp_path / "visit_data.json")
130
+ rehydrated = WebPageData.read_json(tmp_path / "scrape_url_data.json")
162
131
  assert isinstance(rehydrated, WebPageData)
163
132
  assert isinstance(rehydrated.snippets, SnippetSet)
164
133
 
165
134
 
166
- def test_trend_success(trend_data):
167
- # trend_data fixture should give the full payload as a dict
168
- assert isinstance(trend_data, dict)
169
- assert trend_data # non‐empty
135
+ def test_topic_trend_success(topic_trend_data):
136
+ # topic_trend_data fixture should give the full payload as a dict
137
+ assert isinstance(topic_trend_data, dict)
138
+ assert topic_trend_data # non‐empty
170
139
  # keys should look like ISO dates, values numeric
171
- for date_str, count in trend_data.items():
140
+ for date_str, count in topic_trend_data.items():
172
141
  assert re.match(r"^\d{4}-\d{2}-\d{2}$", date_str)
173
142
  assert isinstance(count, (int, float))
174
143
 
175
144
 
176
- def test_trend_date_window(trend_data):
145
+ def test_topic_trend_date_window(topic_trend_data):
177
146
  """
178
147
  When start_date/end_date exactly cover the full range,
179
- trend() should return the same set of dates (keys), regardless of values.
148
+ topic_trend() should return the same set of dates (keys), regardless of values.
180
149
  """
181
- dates = sorted(trend_data.keys())
150
+ dates = sorted(topic_trend_data.keys())
182
151
  start, end = dates[0], dates[-1]
183
152
 
184
153
  with Nosible() as nos:
185
- windowed = nos.trend(query="any query", start_date=start, end_date=end)
154
+ windowed = nos.topic_trend(query="any query", start_date=start, end_date=end)
186
155
  # Compare only the dates (keys), not the counts
187
- assert set(windowed.keys()) == set(trend_data.keys())
156
+ assert set(windowed.keys()) == set(topic_trend_data.keys())
188
157
  # And in the same order if you care about ordering
189
158
  assert sorted(windowed.keys()) == dates
190
159
 
191
160
 
192
- def test_trend_invalid_date_format():
161
+ def test_topic_trend_invalid_date_format():
193
162
  with Nosible() as nos:
194
163
  with pytest.raises(ValueError):
195
- nos.trend(query="q", start_date="20210101") # Missing hyphens
164
+ nos.topic_trend(query="q", start_date="20210101") # Missing hyphens
196
165
  with pytest.raises(ValueError):
197
- nos.trend(query="q", end_date="2021/01/01") # Wrong separator
166
+ nos.topic_trend(query="q", end_date="2021/01/01") # Wrong separator
198
167
 
199
168
 
200
169
  def test_search_min_similarity(search_data):
File without changes
File without changes