nosible 0.1.8__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nosible/classes/result.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import asdict, dataclass
3
4
  from typing import TYPE_CHECKING
4
5
 
5
6
  from openai import OpenAI
@@ -12,6 +13,7 @@ else:
12
13
  ResultSet = None
13
14
 
14
15
 
16
+ @dataclass(init=True, repr=True, eq=True, frozen=False)
15
17
  class Result:
16
18
  """
17
19
  Represents a single search result, including metadata and content.
@@ -61,31 +63,28 @@ class Result:
61
63
  ['author', 'content', 'description', 'language', 'netloc', 'published', ... 'visited']
62
64
  """
63
65
 
64
- def __init__(
65
- self,
66
- url=None,
67
- title=None,
68
- description=None,
69
- netloc=None,
70
- published=None,
71
- visited=None,
72
- author=None,
73
- content=None,
74
- language=None,
75
- similarity=None,
76
- url_hash=None,
77
- ):
78
- self.url = url
79
- self.title = title
80
- self.description = description
81
- self.netloc = netloc
82
- self.published = published
83
- self.visited = visited
84
- self.author = author
85
- self.content = content
86
- self.language = language
87
- self.similarity = similarity
88
- self.url_hash = url_hash
66
+ url: str | None = None
67
+ """The URL of the search result."""
68
+ title: str | None = None
69
+ """The title of the search result."""
70
+ description: str | None = None
71
+ """A brief description or summary of the search result."""
72
+ netloc: str | None = None
73
+ """The network location (domain) of the URL."""
74
+ published: str | None = None
75
+ """The publication date of the search result."""
76
+ visited: str | None = None
77
+ """The date and time when the result was visited."""
78
+ author: str | None = None
79
+ """The author of the content."""
80
+ content: str | None = None
81
+ """The main content or body of the search result."""
82
+ language: str | None = None
83
+ """The language code of the content (e.g., 'en' for English)."""
84
+ similarity: float | None = None
85
+ """Similarity score with respect to a query or reference."""
86
+ url_hash: str | None = None
87
+ """A hash of the URL for quick comparisons."""
89
88
 
90
89
  def __str__(self) -> str:
91
90
  """
@@ -109,25 +108,6 @@ class Result:
109
108
  title = self.title or "No Title"
110
109
  return f"{similarity:>6} | {title}"
111
110
 
112
- def __repr__(self):
113
- """
114
- Return a detailed string representation for debugging.
115
-
116
- Returns
117
- -------
118
- str
119
- A string mimicking dataclass auto-generated repr, listing all fields and their values.
120
-
121
- Examples
122
- --------
123
- >>> result = Result(url="https://example.com", title="Example Domain")
124
- >>> print(repr(result)) # doctest: +ELLIPSIS
125
- Result(url='https://example.com', title='Example Domain', ... url_hash=None)
126
- """
127
- # like dataclass’s auto-generated repr
128
- fields = ", ".join(f"{k}={v!r}" for k, v in self.to_dict().items())
129
- return f"{self.__class__.__name__}({fields})"
130
-
131
111
  def __getitem__(self, key: str) -> str | float | bool | None:
132
112
  """
133
113
  Retrieve the value of a field by its key.
@@ -166,43 +146,35 @@ class Result:
166
146
  except AttributeError as err:
167
147
  raise KeyError(f"Key '{key}' not found in Result") from err
168
148
 
169
- def __getattr__(self, item: str) -> str | float | bool | None:
149
+ def __add__(self, other: Result) -> ResultSet:
170
150
  """
171
- Retrieve the value of an attribute by its name using __getitem__.
151
+ Combine two Result instances into a ResultSet.
152
+
153
+ This method allows you to add two Result objects together, returning a ResultSet
154
+ containing both results.
172
155
 
173
156
  Parameters
174
157
  ----------
175
- item : str
176
- The name of the attribute to retrieve.
158
+ other : Result
159
+ Another Result instance to combine with this one.
177
160
 
178
161
  Returns
179
162
  -------
180
- str or float or bool or None
181
- The value of the requested attribute.
182
-
183
- Raises
184
- ------
185
- AttributeError
186
- If the attribute does not exist in the object.
163
+ ResultSet
164
+ A ResultSet containing both this and the other Result.
187
165
 
188
166
  Examples
189
167
  --------
190
- >>> result = Result(title="Example Domain", similarity=0.98)
191
- >>> result.__getattr__("title")
192
- 'Example Domain'
193
- >>> result.__getattr__("similarity")
194
- 0.98
195
- >>> result.__getattr__("url") is None
168
+ >>> from nosible import Result, ResultSet
169
+ >>> r1 = Result(title="First Result", similarity=0.9)
170
+ >>> r2 = Result(title="Second Result", similarity=0.8)
171
+ >>> combined = r1 + r2
172
+ >>> isinstance(combined, ResultSet)
196
173
  True
197
- >>> result.__getattr__("nonexistent")
198
- Traceback (most recent call last):
199
- ...
200
- AttributeError: Attribute 'nonexistent' not found in Result
201
174
  """
202
- try:
203
- return self.__getitem__(item)
204
- except KeyError as err:
205
- raise AttributeError(f"Attribute '{item}' not found in Result") from err
175
+ from nosible.classes.result_set import ResultSet
176
+
177
+ return ResultSet([self, other])
206
178
 
207
179
  def visit(self, client) -> WebPageData:
208
180
  """
@@ -347,7 +319,7 @@ class Result:
347
319
  def similar(
348
320
  self,
349
321
  client,
350
- sql_filter: list[str] = None,
322
+ sql_filter: str = None,
351
323
  n_results: int = 100,
352
324
  n_probes: int = 30,
353
325
  n_contextify: int = 128,
@@ -363,6 +335,8 @@ class Result:
363
335
  exclude_languages: list = None,
364
336
  include_companies: list = None,
365
337
  exclude_companies: list = None,
338
+ include_docs: list = None,
339
+ exclude_docs: list = None,
366
340
  ) -> ResultSet:
367
341
  """
368
342
  Find similar search results based on the content or metadata of this Result.
@@ -376,40 +350,40 @@ class Result:
376
350
  An instance of the Nosible client to use for finding similar results.
377
351
  sql_filter : list of str, optional
378
352
  SQL‐style filter clauses.
379
- n_results : int, default=100
353
+ n_results : int
380
354
  Max number of results (max 100).
381
- n_probes : int, default=30
355
+ n_probes : int
382
356
  Number of index shards to probe.
383
- n_contextify : int, default=128
357
+ n_contextify : int
384
358
  Context window size per result.
385
- algorithm : str, default="hybrid-2"
359
+ algorithm : str
386
360
  Search algorithm type.
387
361
  publish_start : str, optional
388
- Earliest publish date filter (ISO formatted date).
362
+ Start date for when the document was published (ISO format).
389
363
  publish_end : str, optional
390
- Latest publish date filter (ISO formatted date).
391
- include_netlocs : list of str, optional
392
- Domains to include.
393
- exclude_netlocs : list of str, optional
394
- Domains to exclude.
364
+ End date for when the document was published (ISO format).
395
365
  visited_start : str, optional
396
- Earliest visit date filter (ISO formatted date).
366
+ Start date for when the document was visited by NOSIBLE (ISO format).
397
367
  visited_end : str, optional
398
- Latest visit date filter (ISO formatted date).
368
+ End date for when the document was visited by NOSIBLE (ISO format).
399
369
  certain : bool, optional
400
- True if we are 100% sure of the date.
370
+ Only include documents where we are 100% sure of the date.
371
+ include_netlocs : list of str, optional
372
+ List of netlocs (domains) to include in the search. (Max: 50)
373
+ exclude_netlocs : list of str, optional
374
+ List of netlocs (domains) to exclude in the search. (Max: 50)
401
375
  include_languages : list of str, optional
402
- Language codes to include.
376
+ Languages to include in the search. (Max: 50, ISO 639-1 language codes).
403
377
  exclude_languages : list of str, optional
404
- Language codes to exclude.
378
+ Language codes to exclude in the search (Max: 50, ISO 639-1 language codes).
405
379
  include_companies : list of str, optional
406
- Google KG IDs of public companies to require.
380
+ Google KG IDs of public companies to require (Max: 50).
407
381
  exclude_companies : list of str, optional
408
- Google KG IDs of public companies to forbid.
382
+ Google KG IDs of public companies to forbid (Max: 50).
409
383
  include_docs : list of str, optional
410
- URL hashes of docs to include.
384
+ URL hashes of docs to include (Max: 50).
411
385
  exclude_docs : list of str, optional
412
- URL hashes of docs to exclude.
386
+ URL hashes of docs to exclude (Max: 50).
413
387
 
414
388
  Returns
415
389
  -------
@@ -425,9 +399,9 @@ class Result:
425
399
 
426
400
  Examples
427
401
  --------
428
- >>> from nosible import Nosible, Result
429
- >>> with Nosible() as nos:
430
- ... result = Result(url="https://example.com", title="Example Domain")
402
+ >>> from nosible import Nosible, Result # doctest: +SKIP
403
+ >>> with Nosible() as nos: # doctest: +SKIP
404
+ ... result = Result(url="https://example.com", title="Example Domain") # doctest: +SKIP
431
405
  ... similar_results = result.similar(client=nos) # doctest: +SKIP
432
406
  """
433
407
  if client is None:
@@ -457,6 +431,8 @@ class Result:
457
431
  exclude_languages=exclude_languages,
458
432
  include_companies=include_companies,
459
433
  exclude_companies=exclude_companies,
434
+ include_docs=include_docs,
435
+ exclude_docs=exclude_docs,
460
436
  )
461
437
  return client.search(search=s)
462
438
  except Exception as e:
@@ -492,20 +468,7 @@ class Result:
492
468
  >>> d["visited"]
493
469
  '2024-01-01'
494
470
  """
495
- # manual replacement for asdict()
496
- return {
497
- "url": self.url,
498
- "title": self.title,
499
- "description": self.description,
500
- "netloc": self.netloc,
501
- "published": self.published,
502
- "visited": self.visited,
503
- "author": self.author,
504
- "content": self.content,
505
- "language": self.language,
506
- "similarity": self.similarity,
507
- "url_hash": self.url_hash,
508
- }
471
+ return asdict(self, dict_factory=dict)
509
472
 
510
473
  @classmethod
511
474
  def from_dict(cls, data: dict) -> Result: