nosible 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections.abc import Iterator
4
+ from dataclasses import dataclass, field
4
5
 
5
6
  import duckdb
6
7
  import pandas as pd
@@ -11,6 +12,7 @@ from nosible.classes.result import Result
11
12
  from nosible.utils.json_tools import json_dumps, json_loads
12
13
 
13
14
 
15
+ @dataclass(frozen=True)
14
16
  class ResultSet(Iterator[Result]):
15
17
  """
16
18
  Container class for managing and processing a sequence of Result objects.
@@ -57,33 +59,10 @@ class ResultSet(Iterator[Result]):
57
59
  "url_hash",
58
60
  ]
59
61
 
60
- def __init__(self, results: list[Result] | None = None) -> None:
61
- self.results: list[Result] = results or []
62
- self._index: int = 0
63
-
64
- def _as_dicts(self):
65
- """
66
- Convert the ResultSet to a list of dictionaries.
67
-
68
- Returns
69
- -------
70
- list of dict
71
- List of dictionaries representing each Result.
72
-
73
- """
74
- # dataclass.asdict handles nested structures too
75
- return [r.to_dict() for r in self.results]
76
-
77
- def _as_columns(self):
78
- """
79
- Convert the ResultSet to a dictionary of lists, suitable for DataFrame creation.
80
-
81
- Returns
82
- -------
83
- dict
84
- Dictionary where keys are field names and values are lists of field values.
85
- """
86
- return {f: [getattr(r, f) for r in self.results] for f in self._FIELDS}
62
+ results: list[Result] = field(default_factory=list)
63
+ """ List of Result objects contained in this ResultSet."""
64
+ _index: int = field(default=0, init=False, repr=False, compare=False)
65
+ """ Internal index for iteration over results."""
87
66
 
88
67
  def __len__(self) -> int:
89
68
  """
@@ -116,8 +95,8 @@ class ResultSet(Iterator[Result]):
116
95
  >>> print(search_results) # doctest: +NORMALIZE_WHITESPACE
117
96
  Idx | Similarity | Title
118
97
  ------------------------
119
- 0 | 0.95 | Example Domain
120
- 1 | 0.99 | OpenAI
98
+ 0 | 0.95 | Example Domain
99
+ 1 | 0.99 | OpenAI
121
100
 
122
101
  >>> empty = ResultSet([])
123
102
  >>> print(empty)
@@ -129,29 +108,18 @@ class ResultSet(Iterator[Result]):
129
108
  # Create a formatted string for each result
130
109
  lines = []
131
110
  for idx, result in enumerate(self.results):
132
- similarity = f"{result.similarity:.2f}" if result.similarity is not None else "N/A"
111
+ similarity = f"{result.similarity:.2f}" if result.similarity is not None else " N/A"
133
112
  title = result.title or "No Title"
134
- lines.append(f"{idx:>3} | {similarity:>6} | {title}")
113
+ lines.append(f"{idx:>3} | {similarity:>10} | {title}")
135
114
 
136
- # Add a header
137
- header = "Idx | Similarity | Title"
115
+ # Add a header with matching column widths
116
+ header = f"{'Idx':>3} | {'Similarity':>10} | Title"
138
117
  separator = "-" * len(header)
139
118
  lines.insert(0, header)
140
119
  lines.insert(1, separator)
141
120
  # Join all lines into a single string
142
121
  return "\n".join(lines)
143
122
 
144
- def __repr__(self) -> str:
145
- """
146
- Returns a string representation of the object for interactive sessions.
147
-
148
- Returns
149
- -------
150
- str
151
- The string representation of the object, as returned by `__str__()`.
152
- """
153
- return self.__str__()
154
-
155
123
  def __iter__(self) -> ResultSet:
156
124
  """
157
125
  Reset iteration and return self.
@@ -161,7 +129,7 @@ class ResultSet(Iterator[Result]):
161
129
  ResultSet
162
130
  Iterator over the ResultSet instance.
163
131
  """
164
- self._index = 0
132
+ object.__setattr__(self, "_index", 0)
165
133
  return self
166
134
 
167
135
  def __next__(self) -> Result:
@@ -179,10 +147,29 @@ class ResultSet(Iterator[Result]):
179
147
  """
180
148
  if self._index < len(self.results):
181
149
  item = self.results[self._index]
182
- self._index += 1
150
+ object.__setattr__(self, "_index", self._index + 1)
183
151
  return item
184
152
  raise StopIteration
185
153
 
154
+ def __eq__(self, value):
155
+ """
156
+ Comapre set of url_hashes to determine equality.
157
+ Two ResultSet instances are considered equal if they contain the same set of url_hashes.
158
+
159
+ Parameters
160
+ ----------
161
+ value : ResultSet
162
+ The ResultSet instance to compare against.
163
+ Returns
164
+ -------
165
+ bool
166
+ True if both ResultSet instances contain the same set of url_hashes, False otherwise.
167
+ """
168
+ if not isinstance(value, ResultSet):
169
+ return False
170
+ # Compare the sets of url_hashes
171
+ return {r.url_hash for r in self.results} == {r.url_hash for r in value.results}
172
+
186
173
  def __enter__(self) -> ResultSet:
187
174
  """
188
175
  Enters the runtime context related to this object.
@@ -217,29 +204,9 @@ class ResultSet(Iterator[Result]):
217
204
  if 0 <= key < len(self.results):
218
205
  return self.results[key]
219
206
  raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
207
+ raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
220
208
 
221
- def __setitem__(self, key: int, value: Result) -> None:
222
- """
223
- Set a Result at a specific index.
224
-
225
- Parameters
226
- ----------
227
- key : int
228
- Index to set the result at.
229
- value : Result
230
- Result to set at the specified index.
231
-
232
- Raises
233
- ------
234
- IndexError
235
- If index is out of range.
236
- """
237
- if 0 <= key < len(self.results):
238
- self.results[key] = value
239
- else:
240
- raise IndexError(f"Index {key} out of range for ResultSet with length {len(self.results)}.")
241
-
242
- def __add__(self, other: ResultSet) -> ResultSet:
209
+ def __add__(self, other: ResultSet | Result) -> ResultSet:
243
210
  """
244
211
  Concatenate two ResultSet instances.
245
212
 
@@ -265,9 +232,12 @@ class ResultSet(Iterator[Result]):
265
232
  >>> len(combined)
266
233
  2
267
234
  """
268
- if not isinstance(other, ResultSet):
269
- raise TypeError("Can only concatenate ResultSet with another ResultSet.")
270
- return ResultSet(self.results + other.results)
235
+ if isinstance(other, ResultSet):
236
+ return ResultSet(self.results + other.results)
237
+ if isinstance(other, Result):
238
+ # If other is a single Result, create a new ResultSet with it
239
+ return ResultSet(self.results.append(other))
240
+ raise TypeError("Can only concatenate ResultSet with another ResultSet.")
271
241
 
272
242
  def __sub__(self, other: ResultSet) -> ResultSet:
273
243
  """
@@ -321,7 +291,7 @@ class ResultSet(Iterator[Result]):
321
291
  ----------
322
292
  query : str
323
293
  The search string to rank within these results.
324
- top_k : int, default=10
294
+ top_k : int
325
295
  Number of top results to return.
326
296
 
327
297
  Returns
@@ -435,6 +405,39 @@ class ResultSet(Iterator[Result]):
435
405
  ... summary = results.analyze(by="language")
436
406
  ... print(summary)
437
407
  {'en': 100}
408
+ >>> import polars as pl
409
+ >>> from nosible.classes.result_set import Result, ResultSet
410
+
411
+ # -- date grouping (published) --------------------------------------------
412
+ >>> data = [
413
+ ... {"published": "2021-01-15", "netloc": "a.com", "author": "", "language": "en", "similarity": 0.5},
414
+ ... {"published": "2021-02-20", "netloc": "a.com", "author": "", "language": "en", "similarity": 0.8},
415
+ ... {"published": "2021-02-25", "netloc": "b.org", "author": "", "language": "fr", "similarity": 0.2},
416
+ ... ]
417
+ >>> results = ResultSet([Result(**d) for d in data])
418
+ >>> results.analyze(by="published") # doctest: +NORMALIZE_WHITESPACE
419
+ {'2021-01': 1, '2021-02': 2}
420
+
421
+ # -- numeric stats (similarity) ------------------------------------------
422
+ >>> stats = results.analyze(by="similarity")
423
+ >>> set(stats) == {"count", "null_count", "mean", "std", "min", "25%", "50%", "75%", "max"}
424
+ True
425
+ >>> round(stats["mean"], 2)
426
+ 0.5
427
+
428
+ # -- categorical counts (language) --------------------------------------
429
+ >>> results.analyze(by="language")
430
+ {'en': 2, 'fr': 1}
431
+
432
+ # -- author special case ------------------------------------------------
433
+ # empty author strings get mapped to "Author Unknown"
434
+ >>> results.analyze(by="author")
435
+ {'Author Unknown': 3}
436
+
437
+ # -- invalid field -------------------------------------------------------
438
+ >>> results.analyze(by="foobar") # doctest: +IGNORE_EXCEPTION_DETAIL
439
+ Traceback (most recent call last):
440
+ ValueError: Cannot analyze by 'foobar' - not a valid field.
438
441
  """
439
442
  # Convert to Polars DataFrame
440
443
  df: pl.DataFrame = self.to_polars()
@@ -451,7 +454,7 @@ class ResultSet(Iterator[Result]):
451
454
  # Handle author unknown
452
455
  if by == "author":
453
456
  df = df.with_columns(
454
- pl.when(pl.col("author").str.strip() == "")
457
+ pl.when(pl.col("author") == "")
455
458
  .then(pl.lit("Author Unknown"))
456
459
  .otherwise(pl.col("author"))
457
460
  .alias("author")
@@ -464,7 +467,7 @@ class ResultSet(Iterator[Result]):
464
467
  # Extract year-month
465
468
  df = df.with_columns(pl.col(by).dt.strftime("%Y-%m").alias("year_month"))
466
469
  # Count per month
467
- vc = df.groupby("year_month").agg(pl.count().alias("count")).sort("year_month")
470
+ vc = df.group_by("year_month").agg(pl.count().alias("count")).sort("year_month")
468
471
  rows = vc.rows()
469
472
  if not rows:
470
473
  return {}
@@ -477,13 +480,15 @@ class ResultSet(Iterator[Result]):
477
480
  result[month] = cnt
478
481
  return result
479
482
 
483
+ # Numeric stats for similarity
484
+ if by == "similarity":
485
+ desc_df = df["similarity"].describe()
486
+ # print({row[0]: float(row[1]) for row in desc_df.rows()})
487
+ return {row[0]: float(row[1]) for row in desc_df.rows()}
488
+
480
489
  # Non-date: analyze numeric vs. categorical Non-date: analyze numeric vs. categorical
481
490
  series = df[by]
482
- dtype = series.dtype
483
- # Numeric analysis: descriptive stats
484
- if dtype in (pl.Float64, pl.Float32, pl.Int64, pl.Int32):
485
- desc_df = series.describe()
486
- return {row[0]: float(row[1]) for row in desc_df.rows()}
491
+
487
492
  # Categorical/value counts
488
493
  vc = series.value_counts()
489
494
  _, count_col = vc.columns
@@ -502,11 +507,11 @@ class ResultSet(Iterator[Result]):
502
507
  Parameters
503
508
  ----------
504
509
  file_path : str or None, optional
505
- Path to save the CSV file. If None, defaults to "search_results.csv".
510
+ Path to save the CSV file.
506
511
  delimiter : str, optional
507
- Delimiter to use in the CSV file. Default is ','.
512
+ Delimiter to use in the CSV file.
508
513
  encoding : str, optional
509
- Encoding for the CSV file. Default is "utf-8".
514
+ Encoding for the CSV file.
510
515
 
511
516
  Returns
512
517
  -------
@@ -566,7 +571,7 @@ class ResultSet(Iterator[Result]):
566
571
  >>> "url" in df.columns
567
572
  True
568
573
  """
569
- return pl.DataFrame(self._as_columns())
574
+ return pl.DataFrame(self.to_dicts())
570
575
 
571
576
  def to_pandas(self) -> pd.DataFrame:
572
577
  """
@@ -602,7 +607,7 @@ class ResultSet(Iterator[Result]):
602
607
  except Exception as e:
603
608
  raise RuntimeError(f"Failed to convert search results to Pandas DataFrame: {e}") from e
604
609
 
605
- def to_json(self, file_path: str | None = None) -> str:
610
+ def to_json(self, file_path: str | None = None) -> str | bytes:
606
611
  """
607
612
  Serialize the search results to a JSON string and optionally write to disk.
608
613
 
@@ -637,7 +642,7 @@ class ResultSet(Iterator[Result]):
637
642
  True
638
643
  """
639
644
  try:
640
- json_bytes = json_dumps(self._as_dicts())
645
+ json_bytes = json_dumps(self.to_dicts())
641
646
  if file_path:
642
647
  try:
643
648
  with open(file_path, "w") as f:
@@ -684,9 +689,9 @@ class ResultSet(Iterator[Result]):
684
689
  True
685
690
  """
686
691
  try:
687
- return self._as_dicts()
692
+ return [result.to_dict() for result in self.results]
688
693
  except Exception as e:
689
- raise RuntimeError(f"Failed to convert results to list of dicts: {e}") from e
694
+ raise RuntimeError(f"Failed to convert results to list of dictionaries: {e}") from e
690
695
 
691
696
  def to_dict(self) -> dict:
692
697
  """
@@ -738,7 +743,6 @@ class ResultSet(Iterator[Result]):
738
743
  ----------
739
744
  file_path : str or None, optional
740
745
  Path to save the NDJSON file. If None, returns the NDJSON string.
741
- Default is None.
742
746
 
743
747
  Returns
744
748
  -------
@@ -766,18 +770,22 @@ class ResultSet(Iterator[Result]):
766
770
  >>> path.endswith(".ndjson")
767
771
  True
768
772
  """
769
- try:
770
- lines = "\n".join(json_dumps(d) for d in self._as_dicts())
771
- if file_path:
772
- try:
773
- with open(file_path, "w") as f:
774
- f.write(lines)
775
- return file_path
776
- except Exception as e:
777
- raise RuntimeError(f"Failed to write NDJSON to '{file_path}': {e}") from e
778
- return lines
779
- except Exception as e:
780
- raise RuntimeError(f"Failed to serialize results to NDJSON: {e}") from e
773
+
774
+ ndjson_lines = []
775
+ for result in self.results:
776
+ try:
777
+ ndjson_lines.append(json_dumps(result.to_dict()))
778
+ except Exception as e:
779
+ raise RuntimeError(f"Failed to serialize Result to NDJSON: {e}") from e
780
+
781
+ if file_path:
782
+ try:
783
+ with open(file_path, "w", encoding="utf-8") as f:
784
+ f.write("\n".join(ndjson_lines) + "\n")
785
+ return file_path
786
+ except Exception as e:
787
+ raise RuntimeError(f"Failed to write NDJSON to '{file_path}': {e}") from e
788
+ return "\n".join(ndjson_lines) + "\n"
781
789
 
782
790
  def to_parquet(self, file_path: str | None = None) -> str:
783
791
  """
@@ -789,7 +797,7 @@ class ResultSet(Iterator[Result]):
789
797
  Parameters
790
798
  ----------
791
799
  file_path : str or None, optional
792
- Path to save the Parquet file. If None, defaults to "results.parquet".
800
+ Path to save the Parquet file.
793
801
 
794
802
  Returns
795
803
  -------
@@ -830,7 +838,7 @@ class ResultSet(Iterator[Result]):
830
838
  Parameters
831
839
  ----------
832
840
  file_path : str or None, optional
833
- Path to save the Arrow IPC file. If None, defaults to "results.arrow".
841
+ Path to save the Arrow IPC file.
834
842
 
835
843
  Returns
836
844
  -------
@@ -872,9 +880,9 @@ class ResultSet(Iterator[Result]):
872
880
  Parameters
873
881
  ----------
874
882
  file_path : str or None, optional
875
- Path to save the DuckDB file. If None, defaults to "results.duckdb".
883
+ Path to save the DuckDB file.
876
884
  table_name : str, optional
877
- Name of the table to write the results to. Default is "results".
885
+ Name of the table to write the results to.
878
886
 
879
887
  Returns
880
888
  -------
@@ -1006,11 +1014,6 @@ class ResultSet(Iterator[Result]):
1006
1014
  --------
1007
1015
  >>> import json
1008
1016
  >>> from nosible import ResultSet
1009
- >>> # Suppose 'data.json' contains:
1010
- >>> # [
1011
- >>> # {"url": "https://example.com", "title": "Example Domain"},
1012
- >>> # {"url": "https://openai.com", "title": "OpenAI"}
1013
- >>> # ]
1014
1017
  >>> with open("data.json", "w") as f:
1015
1018
  ... json.dump(
1016
1019
  ... [
@@ -1097,20 +1100,23 @@ class ResultSet(Iterator[Result]):
1097
1100
 
1098
1101
  @classmethod
1099
1102
  def from_pandas(cls, df: pd.DataFrame) -> ResultSet:
1100
- """Create a ResultSet instance from a pandas DataFrame.
1103
+ """
1104
+ Create a ResultSet instance from a pandas DataFrame.
1101
1105
  This class method converts a given pandas DataFrame to a Polars DataFrame
1102
1106
  and then constructs a ResultSet object from it. This is useful for
1103
1107
  integrating with workflows that use pandas for data manipulation.
1108
+
1104
1109
  Parameters
1105
1110
  ----------
1106
1111
  df : pandas.DataFrame
1107
- DataFrame containing the search result fields. Each row should represent
1108
- a single search result, with columns corresponding to the expected fields
1109
- of ResultSet.
1112
+ DataFrame containing the search result fields. Each row should represent a single search result, with
1113
+ columns corresponding to the expected fields of ResultSet.
1114
+
1110
1115
  Returns
1111
1116
  -------
1112
1117
  ResultSet
1113
1118
  An instance of ResultSet containing the data from the input DataFrame.
1119
+
1114
1120
  Examples
1115
1121
  --------
1116
1122
  >>> data = [{"url": "https://example.com", "title": "Example"}]
nosible/classes/search.py CHANGED
@@ -1,13 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import asdict, dataclass
3
4
  from typing import TYPE_CHECKING
4
5
 
5
- from nosible.utils.json_tools import json_dumps, json_loads
6
+ from nosible.utils.json_tools import json_dumps, json_loads, print_dict
6
7
 
7
8
  if TYPE_CHECKING:
8
9
  from nosible.classes.search_set import SearchSet
9
10
 
10
11
 
12
+ @dataclass(init=True, repr=True, eq=True)
11
13
  class Search:
12
14
  """
13
15
  Represents the parameters for a search operation.
@@ -31,45 +33,38 @@ class Search:
31
33
  Number of context documents to retrieve.
32
34
  algorithm : str, optional
33
35
  Search algorithm to use.
34
- output_type : str, optional
35
- Type of output to produce.
36
36
  autogenerate_expansions : bool, default=False
37
37
  Do you want to generate expansions automatically using a LLM?
38
38
  publish_start : str, optional
39
- Start date for published documents (ISO format).
39
+ Start date for when the document was published (ISO format).
40
40
  publish_end : str, optional
41
- End date for published documents (ISO format).
42
- include_netlocs : list of str, optional
43
- List of netlocs (domains) to include in the search.
44
- exclude_netlocs : list of str, optional
45
- List of netlocs (domains) to exclude from the search.
41
+ End date for when the document was published (ISO format).
46
42
  visited_start : str, optional
47
- Start date for visited documents (ISO format).
43
+ Start date for when the document was visited by NOSIBLE (ISO format).
48
44
  visited_end : str, optional
49
- End date for visited documents (ISO format).
45
+ End date for when the document was visited by NOSIBLE (ISO format).
50
46
  certain : bool, optional
51
- Whether to only include certain results.
52
- include_languages : list of str, optional
53
- Languages to include in the search (Max: 50).
54
- exclude_languages : list of str, optional
55
- Languages to exclude from the search (Max: 50).
47
+ Only include documents where we are 100% sure of the date.
56
48
  include_netlocs : list of str, optional
57
- Only include results from these domains (Max: 50).
49
+ List of netlocs (domains) to include in the search. (Max 50)
58
50
  exclude_netlocs : list of str, optional
59
- Exclude results from these domains (Max: 50).
51
+ List of netlocs (domains) to exclude in the search. (Max 50)
52
+ include_languages : list of str, optional
53
+ Languages to include in the search. (Max 50, ISO 639-1 language codes).
54
+ exclude_languages : list of str, optional
55
+ Language codes to exclude in the search (Max 50, ISO 639-1 language codes).
60
56
  include_companies : list of str, optional
61
- Companies to include in the search (Max: 50).
57
+ Google KG IDs of public companies to require (Max 50).
62
58
  exclude_companies : list of str, optional
63
- Companies to exclude from the search (Max: 50).
59
+ Google KG IDs of public companies to forbid (Max 50).
64
60
  include_docs : list of str, optional
65
- Document IDs to include in the search (Max: 50).
61
+ URL hashes of docs to include (Max 50).
66
62
  exclude_docs : list of str, optional
67
- Document IDs to exclude from the search (Max: 50).
63
+ URL hashes of docs to exclude (Max 50).
68
64
 
69
65
  Examples
70
66
  --------
71
67
  Create a search with specific parameters:
72
-
73
68
  >>> search = Search(
74
69
  ... question="What is Python?",
75
70
  ... n_results=5,
@@ -82,6 +77,49 @@ class Search:
82
77
  What is Python?
83
78
  """
84
79
 
80
+ question: str | None = None
81
+ """The main search question or query."""
82
+ expansions: list[str] | None = None
83
+ """List of query expansions or related terms."""
84
+ sql_filter: str | None = None
85
+ """Additional SQL filter to apply to the search."""
86
+ n_results: int | None = None
87
+ """Number of results to return."""
88
+ n_probes: int | None = None
89
+ """Number of probe queries to use."""
90
+ n_contextify: int | None = None
91
+ """Number of context documents to retrieve."""
92
+ algorithm: str | None = None
93
+ """Search algorithm to use."""
94
+ autogenerate_expansions: bool = False
95
+ """Do you want to generate expansions automatically using a LLM?"""
96
+ publish_start: str | None = None
97
+ """Start date for when the document was published."""
98
+ publish_end: str | None = None
99
+ """End date for when the document was published."""
100
+ visited_start: str | None = None
101
+ """Start date for when the document was visited by NOSIBLE."""
102
+ visited_end: str | None = None
103
+ """End date for when the document was visited by NOSIBLE."""
104
+ certain: bool | None = None
105
+ """Only include documents where we are 100% sure of the date."""
106
+ include_netlocs: list[str] | None = None
107
+ """List of netlocs (domains) to include in the search (Max 50)."""
108
+ exclude_netlocs: list[str] | None = None
109
+ """List of netlocs (domains) to exclude in the search (Max 50)."""
110
+ include_languages: list[str] | None = None
111
+ """Languages to include in the search. (Max 50)"""
112
+ exclude_languages: list[str] | None = None
113
+ """Language codes to exclude in the search (Max 50)"""
114
+ include_companies: list[str] | None = None
115
+ """Google KG IDs of public companies to require (Max 50)."""
116
+ exclude_companies: list[str] | None = None
117
+ """Google KG IDs of public companies to forbid (Max 50)."""
118
+ include_docs: list[str] | None = None
119
+ """URL hashes of docs to include (Max 50)."""
120
+ exclude_docs: list[str] | None = None
121
+ """URL hashes of docs to exclude (Max 50)."""
122
+
85
123
  _FIELDS = [
86
124
  "question",
87
125
  "expansions",
@@ -90,7 +128,6 @@ class Search:
90
128
  "n_probes",
91
129
  "n_contextify",
92
130
  "algorithm",
93
- "output_type",
94
131
  "autogenerate_expansions",
95
132
  "publish_start",
96
133
  "publish_end",
@@ -107,67 +144,17 @@ class Search:
107
144
  "exclude_docs",
108
145
  ]
109
146
 
110
- def __init__(
111
- self,
112
- question: str = None,
113
- expansions: list[str] = None,
114
- sql_filter: str = None,
115
- n_results: int = None,
116
- n_probes: int = None,
117
- n_contextify: int = None,
118
- algorithm: str = None,
119
- output_type: str = None,
120
- autogenerate_expansions: bool = False,
121
- publish_start: str = None,
122
- publish_end: str = None,
123
- include_netlocs: list[str] = None,
124
- exclude_netlocs: list[str] = None,
125
- visited_start: str = None,
126
- visited_end: str = None,
127
- certain: bool = None,
128
- include_languages: list[str] = None,
129
- exclude_languages: list[str] = None,
130
- include_companies: list[str] = None,
131
- exclude_companies: list[str] = None,
132
- include_docs: list[str] = None,
133
- exclude_docs: list[str] = None,
134
- ) -> None:
135
- self.question = question
136
- self.expansions = expansions
137
- self.sql_filter = sql_filter
138
- self.n_results = n_results
139
- self.n_probes = n_probes
140
- self.n_contextify = n_contextify
141
- self.algorithm = algorithm
142
- self.output_type = output_type
143
- self.autogenerate_expansions = autogenerate_expansions
144
- self.publish_start = publish_start
145
- self.publish_end = publish_end
146
- self.include_netlocs = include_netlocs
147
- self.exclude_netlocs = exclude_netlocs
148
- self.visited_start = visited_start
149
- self.visited_end = visited_end
150
- self.certain = certain
151
- self.include_languages = include_languages
152
- self.exclude_languages = exclude_languages
153
- self.include_companies = include_companies
154
- self.exclude_companies = exclude_companies
155
- self.include_docs = include_docs
156
- self.exclude_docs = exclude_docs
157
-
158
147
  def __str__(self) -> str:
159
148
  """
160
149
  Return a readable string representation of the search parameters.
161
150
  Only non-None fields are shown, each on its own line for clarity.
151
+
152
+ Returns
153
+ -------
154
+ str
155
+ A string representation of the Search instance, showing only the
162
156
  """
163
- attrs = []
164
- for attr in self._FIELDS:
165
- value = getattr(self, attr)
166
- if value is not None:
167
- attrs.append(f" {attr} = {value!r}")
168
- if not attrs:
169
- return "Search()"
170
- return "Search(\n" + ",\n".join(attrs) + "\n)"
157
+ return print_dict(self.to_dict())
171
158
 
172
159
  def __add__(self, other: Search) -> SearchSet:
173
160
  """
@@ -222,7 +209,7 @@ class Search:
222
209
  >>> search.to_dict()["question"]
223
210
  'What is Python?'
224
211
  """
225
- return {field: getattr(self, field) for field in self._FIELDS}
212
+ return asdict(self, dict_factory=dict)
226
213
 
227
214
  @classmethod
228
215
  def from_dict(cls, data: dict) -> Search:
@@ -267,10 +254,6 @@ class Search:
267
254
 
268
255
  Raises
269
256
  ------
270
- IOError
271
- If the file cannot be written.
272
- TypeError
273
- If serialization of the search parameters fails.
274
257
 
275
258
  Examples
276
259
  --------
@@ -304,10 +287,6 @@ class Search:
304
287
 
305
288
  Raises
306
289
  ------
307
- IOError
308
- If the file cannot be read.
309
- json.JSONDecodeError
310
- If the file content is not valid JSON.
311
290
 
312
291
  Examples
313
292
  --------