nosible 0.1.8__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {nosible-0.1.8/src/nosible.egg-info → nosible-0.1.9}/PKG-INFO +9 -45
  2. {nosible-0.1.8 → nosible-0.1.9}/README.md +8 -44
  3. {nosible-0.1.8 → nosible-0.1.9}/pyproject.toml +17 -11
  4. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/result.py +65 -106
  5. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/result_set.py +119 -113
  6. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/search.py +68 -89
  7. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/search_set.py +27 -12
  8. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/snippet.py +57 -74
  9. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/snippet_set.py +62 -63
  10. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/classes/web_page.py +39 -103
  11. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/nosible_client.py +224 -224
  12. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/utils/json_tools.py +51 -2
  13. nosible-0.1.9/src/nosible/utils/question_builder.py +131 -0
  14. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/utils/rate_limiter.py +30 -24
  15. {nosible-0.1.8 → nosible-0.1.9/src/nosible.egg-info}/PKG-INFO +9 -45
  16. {nosible-0.1.8 → nosible-0.1.9}/src/nosible.egg-info/SOURCES.txt +1 -0
  17. {nosible-0.1.8 → nosible-0.1.9}/LICENSE +0 -0
  18. {nosible-0.1.8 → nosible-0.1.9}/setup.cfg +0 -0
  19. {nosible-0.1.8 → nosible-0.1.9}/setup.py +0 -0
  20. {nosible-0.1.8 → nosible-0.1.9}/src/nosible/__init__.py +0 -0
  21. {nosible-0.1.8 → nosible-0.1.9}/src/nosible.egg-info/dependency_links.txt +0 -0
  22. {nosible-0.1.8 → nosible-0.1.9}/src/nosible.egg-info/requires.txt +0 -0
  23. {nosible-0.1.8 → nosible-0.1.9}/src/nosible.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nosible
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Python client for the NOSIBLE Search API
5
5
  Home-page: https://github.com/NosibleAI/nosible
6
6
  Author: Stuart Reid, Matthew Dicks, Richard Taylor, Gareth Warburton
@@ -77,26 +77,11 @@ pip install nosible
77
77
  * tantivy
78
78
  * openai
79
79
 
80
- ### ⚙️ Configuration
81
-
82
- You can specify a custom base URL for all endpoints (e.g., OpenRouter, Google, or your own proxy):
83
-
84
- ```python
85
- from nosible import Nosible
86
-
87
- client = Nosible(
88
- nosible_api_key="basic|abcd1234...",
89
- llm_api_key="sk-...",
90
- base_url="https://api.openrouter.ai/v1"
91
- )
92
- ```
93
-
94
80
  ### 🔑 Authentication
95
81
 
96
- 1. Sign in to nosible.ai and grab your free API key.
82
+ 1. Sign in to [NOSIBLE.AI](https://www.nosible.ai/) and grab your free API key.
97
83
  2. Set it as an environment variable or pass directly:
98
84
 
99
-
100
85
  On Windows
101
86
 
102
87
  ```powershell
@@ -250,7 +235,7 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
250
235
 
251
236
  #### Sentiment Analysis
252
237
 
253
- Compute sentiment for a single result (Uses GPT-4o; requires LLM API key):
238
+ Compute sentiment for a single result (uses GPT-4o; requires an LLM API key):
254
239
 
255
240
  ```python
256
241
  from nosible import Nosible
@@ -297,6 +282,10 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
297
282
  rs_ndjson = ResultSet.from_ndjson("all_news.ndjson")
298
283
  ```
299
284
 
285
+ #### More Examples
286
+
287
+ For more examples, checkout `/examples` for in-depth usage of the NOSIBLE Client Package
288
+
300
289
  ### 📡 Swagger Docs
301
290
 
302
291
  You can find online endpoints to the NOSIBLE Search API Swagger Docs
@@ -309,33 +298,8 @@ Inspect your current limits at runtime:
309
298
  ```python
310
299
  client.get_ratelimits()
311
300
  ```
312
- Default limits by plan:
313
-
314
- | Plan | Period | Fast Searches | URL Visits | Slow Searches | Cost | CPM |
315
- |----------------|------------|---------------|------------|---------------|----------|------|
316
- | **Free** | Monthly | 3,000 | 300 | 300 | \$0 | $0 |
317
- | | Daily | 100 | 10 | 10 | | |
318
- | | Per-Minute | 10 | 1 | 1 | | |
319
- | **Basic** | Monthly | 30,000 | 3,000 | 3,000 | \$120 | $4 |
320
- | | Daily | 1,000 | 100 | 100 | | |
321
- | | Per-Minute | 10 | 1 | 1 | | |
322
- | **Pro** | Monthly | 150,000 | 7,500 | 7,500 | \$450 | $3 |
323
- | | Daily | 5,000 | 250 | 250 | | |
324
- | | Per-Minute | 10 | 1 | 1 | | |
325
- | **Pro+** | Monthly | 300,000 | 15,000 | 15,000 | \$750 | $2.5 |
326
- | | Daily | 10,000 | 500 | 500 | | |
327
- | | Per-Minute | 10 | 2 | 1 | | |
328
- | **Business** | Monthly | 1,500,000 | 30,000 | 30,000 | \$3,000 | $2 |
329
- | | Daily | 50,000 | 1,000 | 1,000 | | |
330
- | | Per-Minute | 35 | 2 | 2 | | |
331
- | **Business+** | Monthly | 3,000,000 | 60,000 | 60,000 | \$4,500 | $1.5 |
332
- | | Daily | 100,000 | 2,000 | 2,000 | | |
333
- | | Per-Minute | 100 | 3 | 3 | | |
334
- | **Enterprise** | Monthly | 15,000,000 | 150,000 | 150,000 | \$15,000 | $1 |
335
- | | Daily | 500,000 | 5,000 | 5,000 | | |
336
- | | Per-Minute | 400 | 5 | 5 | | |
337
-
338
- *All endpoints are automatically throttled
301
+
302
+ Or you can view them on the [docs](https://nosible-py.readthedocs.io/en/latest/rate_limits.html).
339
303
 
340
304
  ---
341
305
 
@@ -33,26 +33,11 @@ pip install nosible
33
33
  * tantivy
34
34
  * openai
35
35
 
36
- ### ⚙️ Configuration
37
-
38
- You can specify a custom base URL for all endpoints (e.g., OpenRouter, Google, or your own proxy):
39
-
40
- ```python
41
- from nosible import Nosible
42
-
43
- client = Nosible(
44
- nosible_api_key="basic|abcd1234...",
45
- llm_api_key="sk-...",
46
- base_url="https://api.openrouter.ai/v1"
47
- )
48
- ```
49
-
50
36
  ### 🔑 Authentication
51
37
 
52
- 1. Sign in to nosible.ai and grab your free API key.
38
+ 1. Sign in to [NOSIBLE.AI](https://www.nosible.ai/) and grab your free API key.
53
39
  2. Set it as an environment variable or pass directly:
54
40
 
55
-
56
41
  On Windows
57
42
 
58
43
  ```powershell
@@ -206,7 +191,7 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
206
191
 
207
192
  #### Sentiment Analysis
208
193
 
209
- Compute sentiment for a single result (Uses GPT-4o; requires LLM API key):
194
+ Compute sentiment for a single result (uses GPT-4o; requires an LLM API key):
210
195
 
211
196
  ```python
212
197
  from nosible import Nosible
@@ -253,6 +238,10 @@ with Nosible(nosible_api_key="basic|abcd1234...") as client:
253
238
  rs_ndjson = ResultSet.from_ndjson("all_news.ndjson")
254
239
  ```
255
240
 
241
+ #### More Examples
242
+
243
+ For more examples, checkout `/examples` for in-depth usage of the NOSIBLE Client Package
244
+
256
245
  ### 📡 Swagger Docs
257
246
 
258
247
  You can find online endpoints to the NOSIBLE Search API Swagger Docs
@@ -265,33 +254,8 @@ Inspect your current limits at runtime:
265
254
  ```python
266
255
  client.get_ratelimits()
267
256
  ```
268
- Default limits by plan:
269
-
270
- | Plan | Period | Fast Searches | URL Visits | Slow Searches | Cost | CPM |
271
- |----------------|------------|---------------|------------|---------------|----------|------|
272
- | **Free** | Monthly | 3,000 | 300 | 300 | \$0 | $0 |
273
- | | Daily | 100 | 10 | 10 | | |
274
- | | Per-Minute | 10 | 1 | 1 | | |
275
- | **Basic** | Monthly | 30,000 | 3,000 | 3,000 | \$120 | $4 |
276
- | | Daily | 1,000 | 100 | 100 | | |
277
- | | Per-Minute | 10 | 1 | 1 | | |
278
- | **Pro** | Monthly | 150,000 | 7,500 | 7,500 | \$450 | $3 |
279
- | | Daily | 5,000 | 250 | 250 | | |
280
- | | Per-Minute | 10 | 1 | 1 | | |
281
- | **Pro+** | Monthly | 300,000 | 15,000 | 15,000 | \$750 | $2.5 |
282
- | | Daily | 10,000 | 500 | 500 | | |
283
- | | Per-Minute | 10 | 2 | 1 | | |
284
- | **Business** | Monthly | 1,500,000 | 30,000 | 30,000 | \$3,000 | $2 |
285
- | | Daily | 50,000 | 1,000 | 1,000 | | |
286
- | | Per-Minute | 35 | 2 | 2 | | |
287
- | **Business+** | Monthly | 3,000,000 | 60,000 | 60,000 | \$4,500 | $1.5 |
288
- | | Daily | 100,000 | 2,000 | 2,000 | | |
289
- | | Per-Minute | 100 | 3 | 3 | | |
290
- | **Enterprise** | Monthly | 15,000,000 | 150,000 | 150,000 | \$15,000 | $1 |
291
- | | Daily | 500,000 | 5,000 | 5,000 | | |
292
- | | Per-Minute | 400 | 5 | 5 | | |
293
-
294
- *All endpoints are automatically throttled
257
+
258
+ Or you can view them on the [docs](https://nosible-py.readthedocs.io/en/latest/rate_limits.html).
295
259
 
296
260
  ---
297
261
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nosible"
3
- version = "0.1.8"
3
+ version = "0.1.9"
4
4
  description = "Python client for the NOSIBLE Search API"
5
5
  readme = { file = "README.md", content-type = "text/markdown" }
6
6
  requires-python = ">=3.9"
@@ -12,16 +12,16 @@ authors = [
12
12
  ]
13
13
 
14
14
  dependencies = [
15
- 'requests',
16
- 'polars',
17
- 'duckdb',
18
- 'openai',
19
- 'tantivy',
20
- 'pyrate-limiter',
21
- 'tenacity',
22
- 'cryptography',
23
- 'pandas',
24
- 'pyarrow',
15
+ "requests",
16
+ "polars",
17
+ "duckdb",
18
+ "openai",
19
+ "tantivy",
20
+ "pyrate-limiter",
21
+ "tenacity",
22
+ "cryptography",
23
+ "pandas",
24
+ "pyarrow",
25
25
  ]
26
26
 
27
27
  license = "MIT"
@@ -55,3 +55,9 @@ build-backend = "setuptools.build_meta"
55
55
 
56
56
  [tool.setuptools.packages.find]
57
57
  where = ["src"]
58
+
59
+ [tool.uv]
60
+ dev-dependencies = [
61
+ "pytest",
62
+ "pytest-doctestplus",
63
+ ]
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import asdict, dataclass
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from openai import OpenAI
@@ -12,6 +13,7 @@ else:
12
13
  ResultSet = None
13
14
 
14
15
 
16
+ @dataclass(init=True, repr=True, eq=True, frozen=False)
15
17
  class Result:
16
18
  """
17
19
  Represents a single search result, including metadata and content.
@@ -61,31 +63,28 @@ class Result:
61
63
  ['author', 'content', 'description', 'language', 'netloc', 'published', ... 'visited']
62
64
  """
63
65
 
64
- def __init__(
65
- self,
66
- url=None,
67
- title=None,
68
- description=None,
69
- netloc=None,
70
- published=None,
71
- visited=None,
72
- author=None,
73
- content=None,
74
- language=None,
75
- similarity=None,
76
- url_hash=None,
77
- ):
78
- self.url = url
79
- self.title = title
80
- self.description = description
81
- self.netloc = netloc
82
- self.published = published
83
- self.visited = visited
84
- self.author = author
85
- self.content = content
86
- self.language = language
87
- self.similarity = similarity
88
- self.url_hash = url_hash
66
+ url: str | None = None
67
+ """The URL of the search result."""
68
+ title: str | None = None
69
+ """The title of the search result."""
70
+ description: str | None = None
71
+ """A brief description or summary of the search result."""
72
+ netloc: str | None = None
73
+ """The network location (domain) of the URL."""
74
+ published: str | None = None
75
+ """The publication date of the search result."""
76
+ visited: str | None = None
77
+ """The date and time when the result was visited."""
78
+ author: str | None = None
79
+ """The author of the content."""
80
+ content: str | None = None
81
+ """The main content or body of the search result."""
82
+ language: str | None = None
83
+ """The language code of the content (e.g., 'en' for English)."""
84
+ similarity: float | None = None
85
+ """Similarity score with respect to a query or reference."""
86
+ url_hash: str | None = None
87
+ """A hash of the URL for quick comparisons."""
89
88
 
90
89
  def __str__(self) -> str:
91
90
  """
@@ -109,25 +108,6 @@ class Result:
109
108
  title = self.title or "No Title"
110
109
  return f"{similarity:>6} | {title}"
111
110
 
112
- def __repr__(self):
113
- """
114
- Return a detailed string representation for debugging.
115
-
116
- Returns
117
- -------
118
- str
119
- A string mimicking dataclass auto-generated repr, listing all fields and their values.
120
-
121
- Examples
122
- --------
123
- >>> result = Result(url="https://example.com", title="Example Domain")
124
- >>> print(repr(result)) # doctest: +ELLIPSIS
125
- Result(url='https://example.com', title='Example Domain', ... url_hash=None)
126
- """
127
- # like dataclass’s auto-generated repr
128
- fields = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
129
- return f"{self.__class__.__name__}({fields})"
130
-
131
111
  def __getitem__(self, key: str) -> str | float | bool | None:
132
112
  """
133
113
  Retrieve the value of a field by its key.
@@ -166,43 +146,35 @@ class Result:
166
146
  except AttributeError as err:
167
147
  raise KeyError(f"Key '{key}' not found in Result") from err
168
148
 
169
- def __getattr__(self, item: str) -> str | float | bool | None:
149
+ def __add__(self, other: Result) -> ResultSet:
170
150
  """
171
- Retrieve the value of an attribute by its name using __getitem__.
151
+ Combine two Result instances into a ResultSet.
152
+
153
+ This method allows you to add two Result objects together, returning a ResultSet
154
+ containing both results.
172
155
 
173
156
  Parameters
174
157
  ----------
175
- item : str
176
- The name of the attribute to retrieve.
158
+ other : Result
159
+ Another Result instance to combine with this one.
177
160
 
178
161
  Returns
179
162
  -------
180
- str or float or bool or None
181
- The value of the requested attribute.
182
-
183
- Raises
184
- ------
185
- AttributeError
186
- If the attribute does not exist in the object.
163
+ ResultSet
164
+ A ResultSet containing both this and the other Result.
187
165
 
188
166
  Examples
189
167
  --------
190
- >>> result = Result(title="Example Domain", similarity=0.98)
191
- >>> result.__getattr__("title")
192
- 'Example Domain'
193
- >>> result.__getattr__("similarity")
194
- 0.98
195
- >>> result.__getattr__("url") is None
168
+ >>> from nosible import Result, ResultSet
169
+ >>> r1 = Result(title="First Result", similarity=0.9)
170
+ >>> r2 = Result(title="Second Result", similarity=0.8)
171
+ >>> combined = r1 + r2
172
+ >>> isinstance(combined, ResultSet)
196
173
  True
197
- >>> result.__getattr__("nonexistent")
198
- Traceback (most recent call last):
199
- ...
200
- AttributeError: Attribute 'nonexistent' not found in Result
201
174
  """
202
- try:
203
- return self.__getitem__(item)
204
- except KeyError as err:
205
- raise AttributeError(f"Attribute '{item}' not found in Result") from err
175
+ from nosible.classes.result_set import ResultSet
176
+
177
+ return ResultSet([self, other])
206
178
 
207
179
  def visit(self, client) -> WebPageData:
208
180
  """
@@ -347,7 +319,7 @@ class Result:
347
319
  def similar(
348
320
  self,
349
321
  client,
350
- sql_filter: list[str] = None,
322
+ sql_filter: str = None,
351
323
  n_results: int = 100,
352
324
  n_probes: int = 30,
353
325
  n_contextify: int = 128,
@@ -376,40 +348,40 @@ class Result:
376
348
  An instance of the Nosible client to use for finding similar results.
377
349
  sql_filter : list of str, optional
378
350
  SQL‐style filter clauses.
379
- n_results : int, default=100
351
+ n_results : int
380
352
  Max number of results (max 100).
381
- n_probes : int, default=30
353
+ n_probes : int
382
354
  Number of index shards to probe.
383
- n_contextify : int, default=128
355
+ n_contextify : int
384
356
  Context window size per result.
385
- algorithm : str, default="hybrid-2"
357
+ algorithm : str
386
358
  Search algorithm type.
387
359
  publish_start : str, optional
388
- Earliest publish date filter (ISO formatted date).
360
+ Start date for when the document was published (ISO format).
389
361
  publish_end : str, optional
390
- Latest publish date filter (ISO formatted date).
391
- include_netlocs : list of str, optional
392
- Domains to include.
393
- exclude_netlocs : list of str, optional
394
- Domains to exclude.
362
+ End date for when the document was published (ISO format).
395
363
  visited_start : str, optional
396
- Earliest visit date filter (ISO formatted date).
364
+ Start date for when the document was visited by NOSIBLE (ISO format).
397
365
  visited_end : str, optional
398
- Latest visit date filter (ISO formatted date).
366
+ End date for when the document was visited by NOSIBLE (ISO format).
399
367
  certain : bool, optional
400
- True if we are 100% sure of the date.
368
+ Only include documents where we are 100% sure of the date.
369
+ include_netlocs : list of str, optional
370
+ List of netlocs (domains) to include in the search. (Max: 50)
371
+ exclude_netlocs : list of str, optional
372
+ List of netlocs (domains) to exclude in the search. (Max: 50)
401
373
  include_languages : list of str, optional
402
- Language codes to include.
374
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
403
375
  exclude_languages : list of str, optional
404
- Language codes to exclude.
376
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
405
377
  include_companies : list of str, optional
406
- Google KG IDs of public companies to require.
378
+ Google KG IDs of public companies to require (Max: 50).
407
379
  exclude_companies : list of str, optional
408
- Google KG IDs of public companies to forbid.
380
+ Google KG IDs of public companies to forbid (Max: 50).
409
381
  include_docs : list of str, optional
410
- URL hashes of docs to include.
382
+ URL hashes of docs to include (Max: 50).
411
383
  exclude_docs : list of str, optional
412
- URL hashes of docs to exclude.
384
+ URL hashes of docs to exclude (Max: 50).
413
385
 
414
386
  Returns
415
387
  -------
@@ -425,9 +397,9 @@ class Result:
425
397
 
426
398
  Examples
427
399
  --------
428
- >>> from nosible import Nosible, Result
429
- >>> with Nosible() as nos:
430
- ... result = Result(url="https://example.com", title="Example Domain")
400
+ >>> from nosible import Nosible, Result # doctest: +SKIP
401
+ >>> with Nosible() as nos: # doctest: +SKIP
402
+ ... result = Result(url="https://example.com", title="Example Domain") # doctest: +SKIP
431
403
  ... similar_results = result.similar(client=nos) # doctest: +SKIP
432
404
  """
433
405
  if client is None:
@@ -492,20 +464,7 @@ class Result:
492
464
  >>> d["visited"]
493
465
  '2024-01-01'
494
466
  """
495
- # manual replacement for asdict()
496
- return {
497
- "url": self.url,
498
- "title": self.title,
499
- "description": self.description,
500
- "netloc": self.netloc,
501
- "published": self.published,
502
- "visited": self.visited,
503
- "author": self.author,
504
- "content": self.content,
505
- "language": self.language,
506
- "similarity": self.similarity,
507
- "url_hash": self.url_hash,
508
- }
467
+ return asdict(self, dict_factory=dict)
509
468
 
510
469
  @classmethod
511
470
  def from_dict(cls, data: dict) -> Result: