nosible 0.1.8__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,11 @@
1
1
  from collections.abc import Iterator
2
+ from dataclasses import dataclass, field
2
3
 
3
4
  from nosible.classes.search import Search
4
5
  from nosible.utils.json_tools import json_dumps, json_loads
5
6
 
6
7
 
8
+ @dataclass()
7
9
  class SearchSet(Iterator[Search]):
8
10
  """
9
11
  Manages an iterable collection of Search objects.
@@ -32,9 +34,10 @@ class SearchSet(Iterator[Search]):
32
34
  What is AI?
33
35
  """
34
36
 
35
- def __init__(self, searches: list[Search] = None) -> None:
36
- self.searches = searches or []
37
- self._index = 0
37
+ searches: list[Search] = field(default_factory=list)
38
+ """ A list of Search objects in the collection."""
39
+ _index: int = field(default=0, init=False, repr=False, compare=False)
40
+ """ Internal index for iteration over searches."""
38
41
 
39
42
  def __iter__(self) -> "SearchSet":
40
43
  """
@@ -199,7 +202,7 @@ class SearchSet(Iterator[Search]):
199
202
  """
200
203
  del self.searches[index]
201
204
 
202
- def to_list(self) -> list[dict]:
205
+ def to_dicts(self) -> list[dict]:
203
206
  """
204
207
  Convert all Search objects in the collection to a list of dictionaries.
205
208
 
@@ -219,7 +222,7 @@ class SearchSet(Iterator[Search]):
219
222
  >>> s1 = Search(question="What is Python?", n_results=3)
220
223
  >>> s2 = Search(question="What is PEP8?", n_results=2)
221
224
  >>> searches = SearchSet([s1, s2])
222
- >>> searches.to_list()[1]["question"]
225
+ >>> searches.to_dicts()[1]["question"]
223
226
  'What is PEP8?'
224
227
  """
225
228
  return [s.to_dict() for s in self.searches]
@@ -242,6 +245,10 @@ class SearchSet(Iterator[Search]):
242
245
  str
243
246
  A JSON string representation of the SearchSet collection if no path is provided.
244
247
 
248
+ Raises
249
+ -------
250
+ RuntimeError
251
+ If there is an error during serialization or file writing.
245
252
 
246
253
  Examples
247
254
  --------
@@ -251,14 +258,22 @@ class SearchSet(Iterator[Search]):
251
258
  >>> json_str = searches.to_json()
252
259
  >>> isinstance(json_str, str)
253
260
  True
254
- >>> searches.to_json("searches.json") # The file 'searches.json' will contain both search queries in JSON format.
261
+ >>> searches.to_json(
262
+ ... "searches.json"
263
+ ... ) # The file 'searches.json' will contain both search queries in JSON format.
255
264
  """
256
- data = json_dumps(self.to_list())
257
- if path:
258
- with open(path, "w") as f:
259
- f.write(data)
260
- return None
261
- return data
265
+ try:
266
+ json_bytes = json_dumps(self.to_dicts())
267
+ if path:
268
+ try:
269
+ with open(path, "w") as f:
270
+ f.write(json_bytes)
271
+ return None
272
+ except Exception as e:
273
+ raise RuntimeError(f"Failed to write JSON to '{path}': {e}") from e
274
+ return json_bytes
275
+ except Exception as e:
276
+ raise RuntimeError(f"Failed to serialize results to JSON: {e}") from e
262
277
 
263
278
  @classmethod
264
279
  def from_json(cls, path: str) -> "SearchSet":
@@ -1,6 +1,9 @@
1
- from nosible.utils.json_tools import json_dumps
1
+ from dataclasses import asdict, dataclass, field
2
2
 
3
+ from nosible.utils.json_tools import json_dumps, print_dict
3
4
 
5
+
6
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
4
7
  class Snippet:
5
8
  """
6
9
  A class representing a snippet of text, typically extracted from a web page.
@@ -25,6 +28,11 @@ class Snippet:
25
28
  Hash of the URL from which the snippet was extracted.
26
29
  words : str or None
27
30
  The words in the snippet.
31
+ links : list or None
32
+ List of links associated with the snippet.
33
+ companies : list or None
34
+ List of companies mentioned in the snippet.
35
+
28
36
 
29
37
  Examples
30
38
  --------
@@ -34,67 +42,28 @@ class Snippet:
34
42
 
35
43
  """
36
44
 
37
- def __init__(
38
- self,
39
- *,
40
- companies: list = None,
41
- content: str = None,
42
- images: list = None,
43
- language: str = None,
44
- next_snippet_hash: str = None,
45
- prev_snippet_hash: str = None,
46
- snippet_hash: str = None,
47
- statistics: dict = None,
48
- url_hash: str = None,
49
- words: str = None,
50
- ):
51
- """
52
- Initialize a Snippet instance.
53
-
54
- Parameters
55
- ----------
56
- companies : list, optional
57
- A list of companies mentioned in the snippet, if applicable. (GKIDS)
58
- content : str
59
- The text content of the snippet.
60
- images : list, optional
61
- List of image URLs associated with the snippet.
62
- language : str, optional
63
- The language of the snippet.
64
- snippet_hash : str, optional
65
- A unique hash for the snippet.
66
- statistics : dict, optional
67
- Statistical information about the snippet (e.g., word count).
68
- words : str, optional
69
- The words in the snippet.
70
-
71
- Examples
72
- --------
73
- >>> snippet = Snippet(content="Example snippet", language="en")
74
- >>> print(snippet.content)
75
- Example snippet
76
- """
77
- self.companies = companies or []
78
- self.content = content
79
- self.images = images
80
- self.language = language
81
- self.snippet_hash = snippet_hash
82
- self.statistics = statistics
83
- self.words = words
84
- self.url_hash = url_hash
85
- self.next_snippet_hash = next_snippet_hash
86
- self.prev_snippet_hash = prev_snippet_hash
87
-
88
- def __repr__(self):
89
- """
90
- Returns a string representation of the Snippet object.
91
-
92
- Returns
93
- -------
94
- str
95
- A string representation of the Snippet.
96
- """
97
- return f"Snippet(content={self.content[:30]}, language={self.language}, snippet_hash={self.snippet_hash})"
45
+ content: str = field(default=None, repr=True, compare=True)
46
+ """The text content of the snippet."""
47
+ images: list = field(default=None, repr=True, compare=False)
48
+ """List of image URLs associated with the snippet."""
49
+ language: str = field(default=None, repr=True, compare=False)
50
+ """The language of the snippet."""
51
+ next_snippet_hash: str = field(default=None, repr=True, compare=False)
52
+ """Hash of the next snippet in sequence."""
53
+ prev_snippet_hash: str = field(default=None, repr=True, compare=False)
54
+ """Hash of the previous snippet in sequence."""
55
+ snippet_hash: str = field(default=None, repr=True, compare=True)
56
+ """A unique hash for the snippet."""
57
+ statistics: dict = field(default=None, repr=False, compare=False)
58
+ """Statistical information about the snippet."""
59
+ url_hash: str = field(default=None, repr=True, compare=False)
60
+ """Hash of the URL from which the snippet was extracted."""
61
+ words: str = field(default=None, repr=False, compare=False)
62
+ """The words in the snippet."""
63
+ links: list = field(default=None, repr=False, compare=False)
64
+ """List of links associated with the snippet."""
65
+ companies: list = field(default=None, repr=False, compare=False)
66
+ """List of companies mentioned in the snippet."""
98
67
 
99
68
  def __str__(self):
100
69
  """
@@ -105,7 +74,7 @@ class Snippet:
105
74
  str
106
75
  A string representation of the Snippet.
107
76
  """
108
- return f"Snippet: {self.content}"
77
+ return print_dict(self.to_dict())
109
78
 
110
79
  def __getitem__(self, key: str):
111
80
  """
@@ -146,17 +115,31 @@ class Snippet:
146
115
  >>> isinstance(snippet_dict, dict)
147
116
  True
148
117
  """
149
- return {
150
- "content": self.content,
151
- "images": self.images,
152
- "language": self.language,
153
- "snippet_hash": self.snippet_hash,
154
- "statistics": self.statistics,
155
- "words": self.words,
156
- "url_hash": self.url_hash,
157
- "next_snippet_hash": self.next_snippet_hash,
158
- "prev_snippet_hash": self.prev_snippet_hash,
159
- }
118
+ return asdict(self, dict_factory=dict)
119
+
120
+ @classmethod
121
+ def from_dict(cls, data: dict) -> "Snippet":
122
+ """
123
+ Create a Snippet instance from a dictionary.
124
+
125
+ Parameters
126
+ ----------
127
+ data : dict
128
+ Dictionary containing snippet data.
129
+
130
+ Returns
131
+ -------
132
+ Snippet
133
+ An instance of Snippet populated with the provided data.
134
+
135
+ Examples
136
+ --------
137
+ >>> snippet_data = {"content": "Example snippet", "snippet_hash": "hash1"}
138
+ >>> snippet = Snippet.from_dict(snippet_data)
139
+ >>> isinstance(snippet, Snippet)
140
+ True
141
+ """
142
+ return cls(**data)
160
143
 
161
144
  def to_json(self) -> str:
162
145
  """
@@ -1,9 +1,13 @@
1
- from typing import Iterator
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from dataclasses import dataclass, field
2
5
 
3
6
  from nosible.classes.snippet import Snippet
4
7
  from nosible.utils.json_tools import json_dumps
5
8
 
6
9
 
10
+ @dataclass()
7
11
  class SnippetSet(Iterator[Snippet]):
8
12
  """
9
13
  An iterator and container for a collection of Snippet objects.
@@ -19,64 +23,50 @@ class SnippetSet(Iterator[Snippet]):
19
23
  Examples
20
24
  --------
21
25
  >>> snippets_data = {
22
- ... "hash1": {"content": "Example snippet", "snippet_hash": "hash1"}
26
+ ... "hash1": {"content": "Example snippet", "snippet_hash": "hash1"},
27
+ ... "hash2": {"content": "Another snippet", "snippet_hash": "hash2"},
23
28
  ... }
24
- >>> snippets = SnippetSet(snippets_data)
29
+ >>> snippets = SnippetSet().from_dict(snippets_data)
25
30
  >>> for snippet in snippets:
26
31
  ... print(snippet.content)
27
32
  Example snippet
33
+ Another snippet
28
34
  """
29
35
 
30
- def __init__(self, snippets: dict) -> None:
31
- self._snippets = []
32
-
33
- for key, value in snippets.items():
34
- self._snippets.append(
35
- Snippet(
36
- companies=value.get("companies", []),
37
- content=value.get("content", ""),
38
- images=value.get("images", []),
39
- language=value.get("language", ""),
40
- next_snippet_hash=value.get("next_snippet_hash", ""),
41
- prev_snippet_hash=value.get("prev_snippet_hash", ""),
42
- snippet_hash=key,
43
- statistics=value.get("statistics", {}),
44
- url_hash=value.get("url_hash", ""),
45
- words=value.get("words", ""),
46
- )
47
- )
48
-
49
- self._index = 0
50
-
51
- def __iter__(self):
52
- """
53
- Initialize the iterator.
36
+ snippets: list[Snippet] = field(default_factory=list)
37
+ """ List of `Snippet` objects contained in this ResultSet."""
38
+ _index: int = field(default=0, init=False, repr=False, compare=False)
39
+ """ Internal index for iteration over snippets."""
40
+
41
+ def __iter__(self) -> SnippetSet:
42
+ """
43
+ Reset iteration and return self.
44
+
54
45
  Returns
55
46
  -------
56
- SnippetSet
57
- The iterator itself.
47
+ ResultSet
48
+ Iterator over the ResultSet instance.
58
49
  """
59
- self._index = 0
50
+ object.__setattr__(self, "_index", 0)
60
51
  return self
61
52
 
62
53
  def __next__(self) -> Snippet:
63
54
  """
64
- Returns the next Snippet object from the collection.
55
+ Returns the next Result in the sequence.
65
56
 
66
57
  Returns
67
58
  -------
68
- Snippet
69
- The next snippet in the sequence.
70
-
59
+ Result
60
+ The next Result object in the sequence.
71
61
  Raises
72
62
  ------
73
63
  StopIteration
74
- If there are no more snippets to return.
64
+ If the end of the sequence is reached.
75
65
  """
76
- if self._index < len(self._snippets):
77
- snippet = self._snippets[self._index]
78
- self._index += 1
79
- return snippet
66
+ if self._index < len(self.snippets):
67
+ item = self.snippets[self._index]
68
+ object.__setattr__(self, "_index", self._index + 1)
69
+ return item
80
70
  raise StopIteration
81
71
 
82
72
  def __len__(self) -> int:
@@ -88,7 +78,7 @@ class SnippetSet(Iterator[Snippet]):
88
78
  int
89
79
  The number of snippets.
90
80
  """
91
- return len(self._snippets)
81
+ return len(self.snippets)
92
82
 
93
83
  def __getitem__(self, index: int) -> Snippet:
94
84
  """
@@ -109,9 +99,9 @@ class SnippetSet(Iterator[Snippet]):
109
99
  IndexError
110
100
  If the index is out of range.
111
101
  """
112
- if index < 0 or index >= len(self._snippets):
113
- raise IndexError("Index out of range.")
114
- return self._snippets[index]
102
+ if 0 <= index < len(self.snippets):
103
+ return self.snippets[index]
104
+ raise IndexError(f"Index {index} out of range for SnippetSet of length {len(self.snippets)}.")
115
105
 
116
106
  def __str__(self):
117
107
  """
@@ -122,17 +112,6 @@ class SnippetSet(Iterator[Snippet]):
122
112
  """
123
113
  return "\n".join(str(s) for s in self)
124
114
 
125
- def __repr__(self):
126
- """
127
- Returns a string representation of the SnippetSet object.
128
-
129
- Returns
130
- -------
131
- str
132
- A string representation of the SnippetSet.
133
- """
134
- return f"SnippetSet(snippets={len(self._snippets)})"
135
-
136
115
  def to_dict(self) -> dict:
137
116
  """
138
117
  Convert the SnippetSet to a dictionary representation.
@@ -144,15 +123,13 @@ class SnippetSet(Iterator[Snippet]):
144
123
 
145
124
  Examples
146
125
  --------
147
- >>> snippets_data = {
148
- ... "hash1": {"content": "Example snippet", "snippet_hash": "hash1"}
149
- ... }
150
- >>> snippets = SnippetSet(snippets_data)
126
+ >>> snippets_data = {"hash1": {"content": "Example snippet", "snippet_hash": "hash1"}}
127
+ >>> snippets = SnippetSet().from_dict(snippets_data)
151
128
  >>> snippets_dict = snippets.to_dict()
152
129
  >>> isinstance(snippets_dict, dict)
153
130
  True
154
131
  """
155
- return {s.snippet_hash: s.to_dict() for s in self._snippets} if self._snippets else {}
132
+ return {s.snippet_hash: s.to_dict() for s in self.snippets} if self.snippets else {}
156
133
 
157
134
  def to_json(self) -> str:
158
135
  """
@@ -165,12 +142,34 @@ class SnippetSet(Iterator[Snippet]):
165
142
 
166
143
  Examples
167
144
  --------
168
- >>> snippets_data = {
169
- ... "hash1": {"content": "Example snippet", "snippet_hash": "hash1"}
170
- ... }
171
- >>> snippets = SnippetSet(snippets_data)
145
+ >>> snippets_data = {"hash1": {"content": "Example snippet", "snippet_hash": "hash1"}}
146
+ >>> snippets = SnippetSet().from_dict(snippets_data)
172
147
  >>> json_str = snippets.to_json()
173
148
  >>> isinstance(json_str, str)
174
149
  True
175
150
  """
176
151
  return json_dumps(self.to_dict())
152
+
153
+ @classmethod
154
+ def from_dict(cls, data: dict) -> SnippetSet:
155
+ """
156
+ Create a SnippetSet instance from a dictionary.
157
+
158
+ Parameters
159
+ ----------
160
+ data : dict
161
+ Dictionary containing snippet data.
162
+
163
+ Returns
164
+ -------
165
+ SnippetSet
166
+ An instance of SnippetSet populated with the provided data.
167
+
168
+ Examples
169
+ --------
170
+ >>> snippets_data = {"hash1": {"content": "Example snippet", "snippet_hash": "hash1"}}
171
+ >>> snippets = SnippetSet.from_dict(snippets_data)
172
+ >>> isinstance(snippets, SnippetSet)
173
+ True
174
+ """
175
+ return cls([Snippet.from_dict(s) for s in data.values()])
@@ -1,7 +1,10 @@
1
+ from dataclasses import asdict, dataclass, field
2
+
1
3
  from nosible.classes.snippet_set import SnippetSet
2
4
  from nosible.utils.json_tools import json_dumps, json_loads
3
5
 
4
6
 
7
+ @dataclass(init=True, repr=True, eq=True, frozen=True)
5
8
  class WebPageData:
6
9
  """
7
10
  A data container for all extracted and processed information about a web page.
@@ -36,64 +39,26 @@ class WebPageData:
36
39
  {'description': 'Example'}
37
40
  """
38
41
 
39
- def __init__(
40
- self,
41
- *,
42
- companies: list = None,
43
- full_text: str = None,
44
- languages: dict = None,
45
- metadata: dict = None,
46
- page: dict = None,
47
- request: dict = None,
48
- snippets: dict = None,
49
- statistics: dict = None,
50
- structured: list = None,
51
- url_tree: dict = None,
52
- ):
53
- """
54
- Initialize a WebPageData instance.
55
-
56
- Parameters
57
- ----------
58
- companies : list, optional
59
- A list of companies mentioned in the webpage, if applicable. (GKIDS)
60
- full_text : str, optional
61
- The full text content of the webpage.
62
- languages : dict, optional
63
- Detected languages and their probabilities or counts.
64
- metadata : dict, optional
65
- Metadata extracted from the webpage (e.g., description, keywords).
66
- page : dict, optional
67
- Page-specific details such as title, canonical URL, etc.
68
- request : dict, optional
69
- Information about the HTTP request/response.
70
- snippets : list, optional
71
- Extracted text snippets or highlights from the page.
72
- statistics : dict, optional
73
- Statistical information about the page (e.g., word count).
74
- structured : list, optional
75
- Structured data (e.g., schema.org, OpenGraph).
76
- url_tree : dict, optional
77
- Hierarchical representation of the URL structure.
78
-
79
- Examples
80
- --------
81
- >>> data = WebPageData(full_text="Example Domain", languages={"en": 1})
82
- >>> data.languages
83
- {'en': 1}
84
- """
85
- self.companies = companies or []
86
- if snippets is None:
87
- snippets = {}
88
- self.full_text = full_text
89
- self.languages = languages or {}
90
- self.metadata = metadata or {}
91
- self.page = page or {}
92
- self.request = request or {}
93
- self.snippets = SnippetSet(snippets)
94
- self.statistics = statistics or {}
95
- self.structured = structured or []
96
- self.url_tree = url_tree or {}
42
+ companies: list = None
43
+ """A list of companies mentioned in the webpage, if applicable. (GKIDS)"""
44
+ full_text: str = None
45
+ """The full text content of the webpage."""
46
+ languages: dict = None
47
+ """Detected languages and their probabilities or counts."""
48
+ metadata: dict = None
49
+ """Metadata extracted from the webpage (e.g., description, keywords)."""
50
+ page: dict = None
51
+ """Page-specific details such as title, canonical URL, etc."""
52
+ request: dict = None
53
+ """Information about the HTTP request/response."""
54
+ snippets: SnippetSet = field(init=True, default_factory=SnippetSet)
55
+ """Extracted text snippets or highlights from the page."""
56
+ statistics: dict = None
57
+ """Statistical information about the page (e.g., word count)."""
58
+ structured: list = None
59
+ """Structured data (e.g., schema.org, OpenGraph)."""
60
+ url_tree: dict = None
61
+ """Hierarchical representation of the URL structure."""
97
62
 
98
63
  def __str__(self):
99
64
  """Return a string representation of the WebPageData.
@@ -109,24 +74,6 @@ class WebPageData:
109
74
  f"statistics={self.statistics}, structured={self.structured}, url_tree={self.url_tree})"
110
75
  )
111
76
 
112
- def __repr__(self):
113
- """
114
- Return a JSON-formatted string representation of the WebPageData instance.
115
-
116
- Returns
117
- -------
118
- str
119
- JSON string representing the WebPageData for easy readability and debugging.
120
-
121
- Examples
122
- --------
123
- >>> data = WebPageData(languages={"en": 1}, metadata={"description": "Example"})
124
- >>> repr_str = repr(data)
125
- >>> isinstance(repr_str, str)
126
- True
127
- """
128
- return json_dumps(self.to_dict())
129
-
130
77
  def to_dict(self) -> dict:
131
78
  """
132
79
  Convert the WebPageData instance to a dictionary.
@@ -145,18 +92,10 @@ class WebPageData:
145
92
  >>> d["languages"] == {"en": 1}
146
93
  True
147
94
  """
148
- return {
149
- "companies": self.companies,
150
- "full_text": self.full_text,
151
- "languages": self.languages,
152
- "metadata": self.metadata,
153
- "page": self.page,
154
- "request": self.request,
155
- "snippets": self.snippets.to_dict(),
156
- "statistics": self.statistics,
157
- "structured": self.structured,
158
- "url_tree": self.url_tree,
159
- }
95
+ data = asdict(self)
96
+ # snippets is still a SnippetSet instance, so convert it:
97
+ data["snippets"] = self.snippets.to_dict()
98
+ return data
160
99
 
161
100
  def to_json(self) -> str:
162
101
  """
@@ -225,19 +164,12 @@ class WebPageData:
225
164
  >>> webpage_data.languages
226
165
  {'en': 1}
227
166
  """
228
- parsed_data = json_loads(data)
229
- return cls(
230
- companies=parsed_data.get("companies", []),
231
- full_text=parsed_data.get("full_text"),
232
- languages=parsed_data.get("languages"),
233
- metadata=parsed_data.get("metadata"),
234
- page=parsed_data.get("page"),
235
- request=parsed_data.get("request"),
236
- snippets=parsed_data.get("snippets", {}),
237
- statistics=parsed_data.get("statistics"),
238
- structured=parsed_data.get("structured"),
239
- url_tree=parsed_data.get("url_tree"),
240
- )
167
+ data_dict = json_loads(data)
168
+ # Handle snippets separately to avoid passing it twice
169
+ snippets_data = data_dict.pop("snippets", None)
170
+ if snippets_data is not None:
171
+ data_dict["snippets"] = SnippetSet.from_dict(snippets_data)
172
+ return cls(**data_dict)
241
173
 
242
174
  @classmethod
243
175
  def load(cls, path: str) -> "WebPageData":
@@ -265,5 +197,9 @@ class WebPageData:
265
197
  {'en': 1}
266
198
  """
267
199
  with open(path, encoding="utf-8") as f:
268
- data = f.read()
269
- return cls.from_json(data)
200
+ data = json_loads(f.read())
201
+ # Handle snippets separately to avoid passing it twice
202
+ snippets_data = data.pop("snippets", None)
203
+ if snippets_data is not None:
204
+ data["snippets"] = SnippetSet.from_dict(snippets_data)
205
+ return cls(**data)