nosible 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nosible/classes/result.py +69 -106
- nosible/classes/result_set.py +121 -115
- nosible/classes/search.py +83 -88
- nosible/classes/search_set.py +27 -12
- nosible/classes/snippet.py +57 -74
- nosible/classes/snippet_set.py +62 -63
- nosible/classes/web_page.py +39 -103
- nosible/nosible_client.py +551 -234
- nosible/utils/json_tools.py +58 -8
- nosible/utils/question_builder.py +131 -0
- nosible/utils/rate_limiter.py +30 -24
- {nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/METADATA +27 -49
- nosible-0.2.1.dist-info/RECORD +17 -0
- nosible-0.1.8.dist-info/RECORD +0 -16
- {nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/WHEEL +0 -0
- {nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {nosible-0.1.8.dist-info → nosible-0.2.1.dist-info}/top_level.txt +0 -0
nosible/classes/result.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from dataclasses import asdict, dataclass
|
|
3
4
|
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
from openai import OpenAI
|
|
@@ -12,6 +13,7 @@ else:
|
|
|
12
13
|
ResultSet = None
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
@dataclass(init=True, repr=True, eq=True, frozen=False)
|
|
15
17
|
class Result:
|
|
16
18
|
"""
|
|
17
19
|
Represents a single search result, including metadata and content.
|
|
@@ -61,31 +63,28 @@ class Result:
|
|
|
61
63
|
['author', 'content', 'description', 'language', 'netloc', 'published', ... 'visited']
|
|
62
64
|
"""
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
self.language = language
|
|
87
|
-
self.similarity = similarity
|
|
88
|
-
self.url_hash = url_hash
|
|
66
|
+
url: str | None = None
|
|
67
|
+
"""The URL of the search result."""
|
|
68
|
+
title: str | None = None
|
|
69
|
+
"""The title of the search result."""
|
|
70
|
+
description: str | None = None
|
|
71
|
+
"""A brief description or summary of the search result."""
|
|
72
|
+
netloc: str | None = None
|
|
73
|
+
"""The network location (domain) of the URL."""
|
|
74
|
+
published: str | None = None
|
|
75
|
+
"""The publication date of the search result."""
|
|
76
|
+
visited: str | None = None
|
|
77
|
+
"""The date and time when the result was visited."""
|
|
78
|
+
author: str | None = None
|
|
79
|
+
"""The author of the content."""
|
|
80
|
+
content: str | None = None
|
|
81
|
+
"""The main content or body of the search result."""
|
|
82
|
+
language: str | None = None
|
|
83
|
+
"""The language code of the content (e.g., 'en' for English)."""
|
|
84
|
+
similarity: float | None = None
|
|
85
|
+
"""Similarity score with respect to a query or reference."""
|
|
86
|
+
url_hash: str | None = None
|
|
87
|
+
"""A hash of the URL for quick comparisons."""
|
|
89
88
|
|
|
90
89
|
def __str__(self) -> str:
|
|
91
90
|
"""
|
|
@@ -109,25 +108,6 @@ class Result:
|
|
|
109
108
|
title = self.title or "No Title"
|
|
110
109
|
return f"{similarity:>6} | {title}"
|
|
111
110
|
|
|
112
|
-
def __repr__(self):
|
|
113
|
-
"""
|
|
114
|
-
Return a detailed string representation for debugging.
|
|
115
|
-
|
|
116
|
-
Returns
|
|
117
|
-
-------
|
|
118
|
-
str
|
|
119
|
-
A string mimicking dataclass auto-generated repr, listing all fields and their values.
|
|
120
|
-
|
|
121
|
-
Examples
|
|
122
|
-
--------
|
|
123
|
-
>>> result = Result(url="https://example.com", title="Example Domain")
|
|
124
|
-
>>> print(repr(result)) # doctest: +ELLIPSIS
|
|
125
|
-
Result(url='https://example.com', title='Example Domain', ... url_hash=None)
|
|
126
|
-
"""
|
|
127
|
-
# like dataclass’s auto-generated repr
|
|
128
|
-
fields = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
|
|
129
|
-
return f"{self.__class__.__name__}({fields})"
|
|
130
|
-
|
|
131
111
|
def __getitem__(self, key: str) -> str | float | bool | None:
|
|
132
112
|
"""
|
|
133
113
|
Retrieve the value of a field by its key.
|
|
@@ -166,43 +146,35 @@ class Result:
|
|
|
166
146
|
except AttributeError as err:
|
|
167
147
|
raise KeyError(f"Key '{key}' not found in Result") from err
|
|
168
148
|
|
|
169
|
-
def
|
|
149
|
+
def __add__(self, other: Result) -> ResultSet:
|
|
170
150
|
"""
|
|
171
|
-
|
|
151
|
+
Combine two Result instances into a ResultSet.
|
|
152
|
+
|
|
153
|
+
This method allows you to add two Result objects together, returning a ResultSet
|
|
154
|
+
containing both results.
|
|
172
155
|
|
|
173
156
|
Parameters
|
|
174
157
|
----------
|
|
175
|
-
|
|
176
|
-
|
|
158
|
+
other : Result
|
|
159
|
+
Another Result instance to combine with this one.
|
|
177
160
|
|
|
178
161
|
Returns
|
|
179
162
|
-------
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
Raises
|
|
184
|
-
------
|
|
185
|
-
AttributeError
|
|
186
|
-
If the attribute does not exist in the object.
|
|
163
|
+
ResultSet
|
|
164
|
+
A ResultSet containing both this and the other Result.
|
|
187
165
|
|
|
188
166
|
Examples
|
|
189
167
|
--------
|
|
190
|
-
>>>
|
|
191
|
-
>>>
|
|
192
|
-
|
|
193
|
-
>>>
|
|
194
|
-
|
|
195
|
-
>>> result.__getattr__("url") is None
|
|
168
|
+
>>> from nosible import Result, ResultSet
|
|
169
|
+
>>> r1 = Result(title="First Result", similarity=0.9)
|
|
170
|
+
>>> r2 = Result(title="Second Result", similarity=0.8)
|
|
171
|
+
>>> combined = r1 + r2
|
|
172
|
+
>>> isinstance(combined, ResultSet)
|
|
196
173
|
True
|
|
197
|
-
>>> result.__getattr__("nonexistent")
|
|
198
|
-
Traceback (most recent call last):
|
|
199
|
-
...
|
|
200
|
-
AttributeError: Attribute 'nonexistent' not found in Result
|
|
201
174
|
"""
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
raise AttributeError(f"Attribute '{item}' not found in Result") from err
|
|
175
|
+
from nosible.classes.result_set import ResultSet
|
|
176
|
+
|
|
177
|
+
return ResultSet([self, other])
|
|
206
178
|
|
|
207
179
|
def visit(self, client) -> WebPageData:
|
|
208
180
|
"""
|
|
@@ -347,7 +319,7 @@ class Result:
|
|
|
347
319
|
def similar(
|
|
348
320
|
self,
|
|
349
321
|
client,
|
|
350
|
-
sql_filter:
|
|
322
|
+
sql_filter: str = None,
|
|
351
323
|
n_results: int = 100,
|
|
352
324
|
n_probes: int = 30,
|
|
353
325
|
n_contextify: int = 128,
|
|
@@ -363,6 +335,8 @@ class Result:
|
|
|
363
335
|
exclude_languages: list = None,
|
|
364
336
|
include_companies: list = None,
|
|
365
337
|
exclude_companies: list = None,
|
|
338
|
+
include_docs: list = None,
|
|
339
|
+
exclude_docs: list = None,
|
|
366
340
|
) -> ResultSet:
|
|
367
341
|
"""
|
|
368
342
|
Find similar search results based on the content or metadata of this Result.
|
|
@@ -376,40 +350,40 @@ class Result:
|
|
|
376
350
|
An instance of the Nosible client to use for finding similar results.
|
|
377
351
|
sql_filter : list of str, optional
|
|
378
352
|
SQL‐style filter clauses.
|
|
379
|
-
n_results : int
|
|
353
|
+
n_results : int
|
|
380
354
|
Max number of results (max 100).
|
|
381
|
-
n_probes : int
|
|
355
|
+
n_probes : int
|
|
382
356
|
Number of index shards to probe.
|
|
383
|
-
n_contextify : int
|
|
357
|
+
n_contextify : int
|
|
384
358
|
Context window size per result.
|
|
385
|
-
algorithm : str
|
|
359
|
+
algorithm : str
|
|
386
360
|
Search algorithm type.
|
|
387
361
|
publish_start : str, optional
|
|
388
|
-
|
|
362
|
+
Start date for when the document was published (ISO format).
|
|
389
363
|
publish_end : str, optional
|
|
390
|
-
|
|
391
|
-
include_netlocs : list of str, optional
|
|
392
|
-
Domains to include.
|
|
393
|
-
exclude_netlocs : list of str, optional
|
|
394
|
-
Domains to exclude.
|
|
364
|
+
End date for when the document was published (ISO format).
|
|
395
365
|
visited_start : str, optional
|
|
396
|
-
|
|
366
|
+
Start date for when the document was visited by NOSIBLE (ISO format).
|
|
397
367
|
visited_end : str, optional
|
|
398
|
-
|
|
368
|
+
End date for when the document was visited by NOSIBLE (ISO format).
|
|
399
369
|
certain : bool, optional
|
|
400
|
-
|
|
370
|
+
Only include documents where we are 100% sure of the date.
|
|
371
|
+
include_netlocs : list of str, optional
|
|
372
|
+
List of netlocs (domains) to include in the search. (Max: 50)
|
|
373
|
+
exclude_netlocs : list of str, optional
|
|
374
|
+
List of netlocs (domains) to exclude in the search. (Max: 50)
|
|
401
375
|
include_languages : list of str, optional
|
|
402
|
-
|
|
376
|
+
Languages to include in the search. (Max: 50, ISO 639-1 language codes).
|
|
403
377
|
exclude_languages : list of str, optional
|
|
404
|
-
Language codes to exclude.
|
|
378
|
+
Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
|
|
405
379
|
include_companies : list of str, optional
|
|
406
|
-
Google KG IDs of public companies to require.
|
|
380
|
+
Google KG IDs of public companies to require (Max: 50).
|
|
407
381
|
exclude_companies : list of str, optional
|
|
408
|
-
Google KG IDs of public companies to forbid.
|
|
382
|
+
Google KG IDs of public companies to forbid (Max: 50).
|
|
409
383
|
include_docs : list of str, optional
|
|
410
|
-
URL hashes of docs to include.
|
|
384
|
+
URL hashes of docs to include (Max: 50).
|
|
411
385
|
exclude_docs : list of str, optional
|
|
412
|
-
URL hashes of docs to exclude.
|
|
386
|
+
URL hashes of docs to exclude (Max: 50).
|
|
413
387
|
|
|
414
388
|
Returns
|
|
415
389
|
-------
|
|
@@ -425,9 +399,9 @@ class Result:
|
|
|
425
399
|
|
|
426
400
|
Examples
|
|
427
401
|
--------
|
|
428
|
-
>>> from nosible import Nosible, Result
|
|
429
|
-
>>> with Nosible() as nos:
|
|
430
|
-
... result = Result(url="https://example.com", title="Example Domain")
|
|
402
|
+
>>> from nosible import Nosible, Result # doctest: +SKIP
|
|
403
|
+
>>> with Nosible() as nos: # doctest: +SKIP
|
|
404
|
+
... result = Result(url="https://example.com", title="Example Domain") # doctest: +SKIP
|
|
431
405
|
... similar_results = result.similar(client=nos) # doctest: +SKIP
|
|
432
406
|
"""
|
|
433
407
|
if client is None:
|
|
@@ -457,6 +431,8 @@ class Result:
|
|
|
457
431
|
exclude_languages=exclude_languages,
|
|
458
432
|
include_companies=include_companies,
|
|
459
433
|
exclude_companies=exclude_companies,
|
|
434
|
+
include_docs=include_docs,
|
|
435
|
+
exclude_docs=exclude_docs,
|
|
460
436
|
)
|
|
461
437
|
return client.search(search=s)
|
|
462
438
|
except Exception as e:
|
|
@@ -492,20 +468,7 @@ class Result:
|
|
|
492
468
|
>>> d["visited"]
|
|
493
469
|
'2024-01-01'
|
|
494
470
|
"""
|
|
495
|
-
|
|
496
|
-
return {
|
|
497
|
-
"url": self.url,
|
|
498
|
-
"title": self.title,
|
|
499
|
-
"description": self.description,
|
|
500
|
-
"netloc": self.netloc,
|
|
501
|
-
"published": self.published,
|
|
502
|
-
"visited": self.visited,
|
|
503
|
-
"author": self.author,
|
|
504
|
-
"content": self.content,
|
|
505
|
-
"language": self.language,
|
|
506
|
-
"similarity": self.similarity,
|
|
507
|
-
"url_hash": self.url_hash,
|
|
508
|
-
}
|
|
471
|
+
return asdict(self, dict_factory=dict)
|
|
509
472
|
|
|
510
473
|
@classmethod
|
|
511
474
|
def from_dict(cls, data: dict) -> Result:
|