nosible 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nosible/classes/result.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import asdict, dataclass
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from openai import OpenAI
@@ -12,6 +13,7 @@ else:
12
13
  ResultSet = None
13
14
 
14
15
 
16
+ @dataclass(init=True, repr=True, eq=True, frozen=False)
15
17
  class Result:
16
18
  """
17
19
  Represents a single search result, including metadata and content.
@@ -61,31 +63,28 @@ class Result:
61
63
  ['author', 'content', 'description', 'language', 'netloc', 'published', ... 'visited']
62
64
  """
63
65
 
64
- def __init__(
65
- self,
66
- url=None,
67
- title=None,
68
- description=None,
69
- netloc=None,
70
- published=None,
71
- visited=None,
72
- author=None,
73
- content=None,
74
- language=None,
75
- similarity=None,
76
- url_hash=None,
77
- ):
78
- self.url = url
79
- self.title = title
80
- self.description = description
81
- self.netloc = netloc
82
- self.published = published
83
- self.visited = visited
84
- self.author = author
85
- self.content = content
86
- self.language = language
87
- self.similarity = similarity
88
- self.url_hash = url_hash
66
+ url: str | None = None
67
+ """The URL of the search result."""
68
+ title: str | None = None
69
+ """The title of the search result."""
70
+ description: str | None = None
71
+ """A brief description or summary of the search result."""
72
+ netloc: str | None = None
73
+ """The network location (domain) of the URL."""
74
+ published: str | None = None
75
+ """The publication date of the search result."""
76
+ visited: str | None = None
77
+ """The date and time when the result was visited."""
78
+ author: str | None = None
79
+ """The author of the content."""
80
+ content: str | None = None
81
+ """The main content or body of the search result."""
82
+ language: str | None = None
83
+ """The language code of the content (e.g., 'en' for English)."""
84
+ similarity: float | None = None
85
+ """Similarity score with respect to a query or reference."""
86
+ url_hash: str | None = None
87
+ """A hash of the URL for quick comparisons."""
89
88
 
90
89
  def __str__(self) -> str:
91
90
  """
@@ -109,25 +108,6 @@ class Result:
109
108
  title = self.title or "No Title"
110
109
  return f"{similarity:>6} | {title}"
111
110
 
112
- def __repr__(self):
113
- """
114
- Return a detailed string representation for debugging.
115
-
116
- Returns
117
- -------
118
- str
119
- A string mimicking dataclass auto-generated repr, listing all fields and their values.
120
-
121
- Examples
122
- --------
123
- >>> result = Result(url="https://example.com", title="Example Domain")
124
- >>> print(repr(result)) # doctest: +ELLIPSIS
125
- Result(url='https://example.com', title='Example Domain', ... url_hash=None)
126
- """
127
- # like dataclass’s auto-generated repr
128
- fields = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
129
- return f"{self.__class__.__name__}({fields})"
130
-
131
111
  def __getitem__(self, key: str) -> str | float | bool | None:
132
112
  """
133
113
  Retrieve the value of a field by its key.
@@ -166,43 +146,35 @@ class Result:
166
146
  except AttributeError as err:
167
147
  raise KeyError(f"Key '{key}' not found in Result") from err
168
148
 
169
- def __getattr__(self, item: str) -> str | float | bool | None:
149
+ def __add__(self, other: Result) -> ResultSet:
170
150
  """
171
- Retrieve the value of an attribute by its name using __getitem__.
151
+ Combine two Result instances into a ResultSet.
152
+
153
+ This method allows you to add two Result objects together, returning a ResultSet
154
+ containing both results.
172
155
 
173
156
  Parameters
174
157
  ----------
175
- item : str
176
- The name of the attribute to retrieve.
158
+ other : Result
159
+ Another Result instance to combine with this one.
177
160
 
178
161
  Returns
179
162
  -------
180
- str or float or bool or None
181
- The value of the requested attribute.
182
-
183
- Raises
184
- ------
185
- AttributeError
186
- If the attribute does not exist in the object.
163
+ ResultSet
164
+ A ResultSet containing both this and the other Result.
187
165
 
188
166
  Examples
189
167
  --------
190
- >>> result = Result(title="Example Domain", similarity=0.98)
191
- >>> result.__getattr__("title")
192
- 'Example Domain'
193
- >>> result.__getattr__("similarity")
194
- 0.98
195
- >>> result.__getattr__("url") is None
168
+ >>> from nosible import Result, ResultSet
169
+ >>> r1 = Result(title="First Result", similarity=0.9)
170
+ >>> r2 = Result(title="Second Result", similarity=0.8)
171
+ >>> combined = r1 + r2
172
+ >>> isinstance(combined, ResultSet)
196
173
  True
197
- >>> result.__getattr__("nonexistent")
198
- Traceback (most recent call last):
199
- ...
200
- AttributeError: Attribute 'nonexistent' not found in Result
201
174
  """
202
- try:
203
- return self.__getitem__(item)
204
- except KeyError as err:
205
- raise AttributeError(f"Attribute '{item}' not found in Result") from err
175
+ from nosible.classes.result_set import ResultSet
176
+
177
+ return ResultSet([self, other])
206
178
 
207
179
  def visit(self, client) -> WebPageData:
208
180
  """
@@ -347,7 +319,7 @@ class Result:
347
319
  def similar(
348
320
  self,
349
321
  client,
350
- sql_filter: list[str] = None,
322
+ sql_filter: str = None,
351
323
  n_results: int = 100,
352
324
  n_probes: int = 30,
353
325
  n_contextify: int = 128,
@@ -376,40 +348,40 @@ class Result:
376
348
  An instance of the Nosible client to use for finding similar results.
377
349
  sql_filter : list of str, optional
378
350
  SQL‐style filter clauses.
379
- n_results : int, default=100
351
+ n_results : int
380
352
  Max number of results (max 100).
381
- n_probes : int, default=30
353
+ n_probes : int
382
354
  Number of index shards to probe.
383
- n_contextify : int, default=128
355
+ n_contextify : int
384
356
  Context window size per result.
385
- algorithm : str, default="hybrid-2"
357
+ algorithm : str
386
358
  Search algorithm type.
387
359
  publish_start : str, optional
388
- Earliest publish date filter (ISO formatted date).
360
+ Start date for when the document was published (ISO format).
389
361
  publish_end : str, optional
390
- Latest publish date filter (ISO formatted date).
391
- include_netlocs : list of str, optional
392
- Domains to include.
393
- exclude_netlocs : list of str, optional
394
- Domains to exclude.
362
+ End date for when the document was published (ISO format).
395
363
  visited_start : str, optional
396
- Earliest visit date filter (ISO formatted date).
364
+ Start date for when the document was visited by NOSIBLE (ISO format).
397
365
  visited_end : str, optional
398
- Latest visit date filter (ISO formatted date).
366
+ End date for when the document was visited by NOSIBLE (ISO format).
399
367
  certain : bool, optional
400
- True if we are 100% sure of the date.
368
+ Only include documents where we are 100% sure of the date.
369
+ include_netlocs : list of str, optional
370
+ List of netlocs (domains) to include in the search. (Max: 50)
371
+ exclude_netlocs : list of str, optional
372
+ List of netlocs (domains) to exclude in the search. (Max: 50)
401
373
  include_languages : list of str, optional
402
- Language codes to include.
374
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
403
375
  exclude_languages : list of str, optional
404
- Language codes to exclude.
376
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
405
377
  include_companies : list of str, optional
406
- Google KG IDs of public companies to require.
378
+ Google KG IDs of public companies to require (Max: 50).
407
379
  exclude_companies : list of str, optional
408
- Google KG IDs of public companies to forbid.
380
+ Google KG IDs of public companies to forbid (Max: 50).
409
381
  include_docs : list of str, optional
410
- URL hashes of docs to include.
382
+ URL hashes of docs to include (Max: 50).
411
383
  exclude_docs : list of str, optional
412
- URL hashes of docs to exclude.
384
+ URL hashes of docs to exclude (Max: 50).
413
385
 
414
386
  Returns
415
387
  -------
@@ -425,9 +397,9 @@ class Result:
425
397
 
426
398
  Examples
427
399
  --------
428
- >>> from nosible import Nosible, Result
429
- >>> with Nosible() as nos:
430
- ... result = Result(url="https://example.com", title="Example Domain")
400
+ >>> from nosible import Nosible, Result # doctest: +SKIP
401
+ >>> with Nosible() as nos: # doctest: +SKIP
402
+ ... result = Result(url="https://example.com", title="Example Domain") # doctest: +SKIP
431
403
  ... similar_results = result.similar(client=nos) # doctest: +SKIP
432
404
  """
433
405
  if client is None:
@@ -492,20 +464,7 @@ class Result:
492
464
  >>> d["visited"]
493
465
  '2024-01-01'
494
466
  """
495
- # manual replacement for asdict()
496
- return {
497
- "url": self.url,
498
- "title": self.title,
499
- "description": self.description,
500
- "netloc": self.netloc,
501
- "published": self.published,
502
- "visited": self.visited,
503
- "author": self.author,
504
- "content": self.content,
505
- "language": self.language,
506
- "similarity": self.similarity,
507
- "url_hash": self.url_hash,
508
- }
467
+ return asdict(self, dict_factory=dict)
509
468
 
510
469
  @classmethod
511
470
  def from_dict(cls, data: dict) -> Result: