diffbot-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffbot/kg.py ADDED
@@ -0,0 +1,90 @@
1
+ """Diffbot Knowledge Graph APIs: DQL search and entity enhancement."""
2
+
3
+ import pathlib
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Union
6
+
7
+ if TYPE_CHECKING:
8
+ from .client import Diffbot, DiffbotAsync
9
+
10
+ KG_DQL_ENDPOINT = "https://kg.diffbot.com/kg/v3/dql"
11
+ KG_ONTOLOGY_ENDPOINT = "https://kg.diffbot.com/kg/ontology"
12
+
13
+
14
+ def _build_dql_params(
15
+ client: Any,
16
+ query: str,
17
+ size: int,
18
+ from_: int,
19
+ format: str,
20
+ filter: Optional[str],
21
+ exportspec: Optional[str],
22
+ extra: Optional[Dict[str, str]],
23
+ ) -> Dict[str, Any]:
24
+ params: Dict[str, Any] = {"token": client.token, "query": query, "size": size}
25
+ if from_:
26
+ params["from"] = from_
27
+ if format != "json":
28
+ params["format"] = format
29
+ if filter is not None:
30
+ params["filter"] = filter
31
+ if exportspec is not None:
32
+ params["exportspec"] = exportspec
33
+ if extra:
34
+ params.update(extra)
35
+ return params
36
+
37
+
38
+ def dql(
39
+ client: "Diffbot",
40
+ query: str,
41
+ *,
42
+ size: int = 10,
43
+ from_: int = 0,
44
+ format: str = "json",
45
+ filter: Optional[str] = None,
46
+ exportspec: Optional[str] = None,
47
+ extra: Optional[Dict[str, str]] = None,
48
+ raw: bool = False,
49
+ ) -> Union[Dict[str, Any], bytes]:
50
+ params = _build_dql_params(client, query, size, from_, format, filter, exportspec, extra)
51
+ response = client._http.get(KG_DQL_ENDPOINT, params=params)
52
+ client._raise_for_status(response)
53
+ return response.content if raw else response.json()
54
+
55
+
56
+ async def dql_async(
57
+ client: "DiffbotAsync",
58
+ query: str,
59
+ *,
60
+ size: int = 10,
61
+ from_: int = 0,
62
+ format: str = "json",
63
+ filter: Optional[str] = None,
64
+ exportspec: Optional[str] = None,
65
+ extra: Optional[Dict[str, str]] = None,
66
+ raw: bool = False,
67
+ ) -> Union[Dict[str, Any], bytes]:
68
+ params = _build_dql_params(client, query, size, from_, format, filter, exportspec, extra)
69
+ response = await client._http.get(KG_DQL_ENDPOINT, params=params)
70
+ client._raise_for_status(response)
71
+ return response.content if raw else response.json()
72
+
73
+
74
+ def dql_parallel(
75
+ client: "Diffbot",
76
+ queries: Sequence[Dict[str, Any]],
77
+ *,
78
+ workers: int = 8,
79
+ ) -> List[Union[Dict[str, Any], bytes]]:
80
+ if not queries:
81
+ return []
82
+ with ThreadPoolExecutor(max_workers=min(workers, len(queries))) as ex:
83
+ return list(ex.map(lambda q: dql(client, **q), queries))
84
+
85
+
86
+ def dql_refresh_ontology(client: "Diffbot", dest: pathlib.Path) -> None:
87
+ response = client._http.get(KG_ONTOLOGY_ENDPOINT)
88
+ client._raise_for_status(response)
89
+ dest.parent.mkdir(parents=True, exist_ok=True)
90
+ dest.write_bytes(response.content)
diffbot/nlp.py ADDED
@@ -0,0 +1,37 @@
1
+ """Diffbot NLP API: entity identification, resolution, and sentiment."""
2
+
3
+ from typing import TYPE_CHECKING, Any, Dict
4
+
5
+ if TYPE_CHECKING:
6
+ from .client import Diffbot, DiffbotAsync
7
+
8
+ NLP_BASE = "https://nl.diffbot.com/v1/"
9
+ NLP_FIELDS = "entities,sentiment"
10
+
11
+
12
+ def entities(
13
+ client: "Diffbot",
14
+ text: str,
15
+ *,
16
+ lang: str = "auto",
17
+ ) -> Dict[str, Any]:
18
+ params = {"token": client.token, "fields": NLP_FIELDS}
19
+ payload = [{"lang": lang, "format": "plain text", "content": text}]
20
+ response = client._http.post(client.nlp_url, params=params, json=payload)
21
+ client._raise_for_status(response)
22
+ data = response.json()
23
+ return data[0] if isinstance(data, list) else data
24
+
25
+
26
+ async def entities_async(
27
+ client: "DiffbotAsync",
28
+ text: str,
29
+ *,
30
+ lang: str = "auto",
31
+ ) -> Dict[str, Any]:
32
+ params = {"token": client.token, "fields": NLP_FIELDS}
33
+ payload = [{"lang": lang, "format": "plain text", "content": text}]
34
+ response = await client._http.post(client.nlp_url, params=params, json=payload)
35
+ client._raise_for_status(response)
36
+ data = response.json()
37
+ return data[0] if isinstance(data, list) else data
diffbot/web_search.py ADDED
@@ -0,0 +1,44 @@
1
+ """Diffbot web search API."""
2
+
3
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
+
5
+ if TYPE_CHECKING:
6
+ from .client import Diffbot, DiffbotAsync
7
+
8
+ WEB_SEARCH_BASE = "https://llm.diffbot.com/api/v1/web_search"
9
+
10
+
11
+ def web_search(
12
+ client: "Diffbot",
13
+ text: str,
14
+ *,
15
+ num_results: Optional[int] = None,
16
+ max_tokens: Optional[int] = None,
17
+ ) -> Dict[str, Any]:
18
+ headers = {"Authorization": f"Bearer {client.token}"}
19
+ params: Dict[str, Any] = {"text": text}
20
+ if num_results is not None:
21
+ params["num_results"] = num_results
22
+ if max_tokens is not None:
23
+ params["maxTokens"] = max_tokens
24
+ response = client._http.get(client.web_search_url, headers=headers, params=params)
25
+ client._raise_for_status(response)
26
+ return response.json()
27
+
28
+
29
+ async def web_search_async(
30
+ client: "DiffbotAsync",
31
+ text: str,
32
+ *,
33
+ num_results: Optional[int] = None,
34
+ max_tokens: Optional[int] = None,
35
+ ) -> Dict[str, Any]:
36
+ headers = {"Authorization": f"Bearer {client.token}"}
37
+ params: Dict[str, Any] = {"text": text}
38
+ if num_results is not None:
39
+ params["num_results"] = num_results
40
+ if max_tokens is not None:
41
+ params["maxTokens"] = max_tokens
42
+ response = await client._http.get(client.web_search_url, headers=headers, params=params)
43
+ client._raise_for_status(response)
44
+ return response.json()
@@ -0,0 +1,218 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffbot-python
3
+ Version: 0.1.0
4
+ Summary: Python client library for Diffbot APIs
5
+ Project-URL: Homepage, https://github.com/diffbot/diffbot-python
6
+ Project-URL: Repository, https://github.com/diffbot/diffbot-python
7
+ Project-URL: Issues, https://github.com/diffbot/diffbot-python/issues
8
+ Author-email: Jerome Choo <jerome@diffbot.com>, Mike Tung <miket@diffbot.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Topic :: Software Development :: Libraries
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: click>=8.1.0
18
+ Requires-Dist: httpx>=0.27.0
19
+ Requires-Dist: rich>=13.0.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=8.0.0; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Diffbot Python Library
25
+
26
+ Python client library for [Diffbot](https://www.diffbot.com) APIs.
27
+
28
+
29
+ ## Installation
30
+
31
+ ```bash
32
+ pip install git+https://github.com/diffbot/diffbot-python.git
33
+ ```
34
+
35
+ Or, for local development:
36
+
37
+ ```bash
38
+ pip install -e ".[dev]"
39
+ ```
40
+
41
+ ## Usage
42
+
43
+ ### Authentication
44
+ Set your Diffbot API token in your environment or .env.
45
+
46
+ ```bash
47
+ export DIFFBOT_API_TOKEN=<TOKEN>
48
+ ```
49
+
50
+ ### Extract structured content
51
+ ```python
52
+ from diffbot import Diffbot
53
+
54
+ db = Diffbot(token="YOUR_TOKEN")
55
+ data = db.extract("https://www.example.com")
56
+ ```
57
+
58
+ ### Ask Diffbot LLM
59
+ ```python
60
+ from diffbot import Diffbot
61
+
62
+ db = Diffbot(token="YOUR_TOKEN")
63
+ for chunk in db.ask([{"role": "user", "content": "What's the capital of France?"}]):
64
+ print(chunk, end="")
65
+ ```
66
+
67
+ ### Crawl a site for structured content
68
+ ```python
69
+ from diffbot import Diffbot
70
+
71
+ db = Diffbot(token="YOUR_TOKEN")
72
+ for event in db.crawl("https://www.example.com", hops=1):
73
+ print(event)
74
+ ```
75
+
76
+ ### Query the Knowledge Graph
77
+ ```python
78
+ from diffbot import Diffbot
79
+
80
+ db = Diffbot(token="YOUR_TOKEN")
81
+ results = db.dql('type:Organization name:"Diffbot"')
82
+ ```
83
+
84
+ ### Web Search
85
+ ```python
86
+ from diffbot import Diffbot
87
+
88
+ db = Diffbot(token="YOUR_TOKEN")
89
+ results = db.web_search("diffbot knowledge graph")
90
+ for r in results["search_results"]:
91
+ print(r["score"], r["title"], r["pageUrl"])
92
+ print(r["content"])
93
+ ```
94
+
95
+ ### Entities (NLP)
96
+ ```python
97
+ from diffbot import Diffbot
98
+
99
+ db = Diffbot(token="YOUR_TOKEN")
100
+ result = db.entities("Apple CEO Tim Cook announced record quarterly earnings.")
101
+ for entity in result["entities"]:
102
+ print(entity["name"], entity.get("type"), entity.get("id"))
103
+ print("sentiment:", result.get("sentiment"))
104
+ ```
105
+
106
+ ## Async Usage
107
+
108
+ ### Extract structured content
109
+ ```python
110
+ import asyncio
111
+ from diffbot import DiffbotAsync
112
+
113
+ async def main():
114
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
115
+ data = await db.extract("https://www.example.com")
116
+ print(data)
117
+
118
+ asyncio.run(main())
119
+ ```
120
+
121
+ ### Ask Diffbot LLM
122
+ ```python
123
+ import asyncio
124
+ from diffbot import DiffbotAsync
125
+
126
+ async def main():
127
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
128
+ async for chunk in db.ask([{"role": "user", "content": "What's the capital of France?"}]):
129
+ print(chunk, end="")
130
+
131
+ asyncio.run(main())
132
+ ```
133
+
134
+ ### Crawl a site for structured content
135
+ ```python
136
+ import asyncio
137
+ from diffbot import DiffbotAsync
138
+
139
+ async def main():
140
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
141
+ async for event in db.crawl("https://www.example.com", hops=1):
142
+ print(event)
143
+
144
+ asyncio.run(main())
145
+ ```
146
+
147
+ ### Query the Knowledge Graph
148
+ ```python
149
+ import asyncio
150
+ from diffbot import DiffbotAsync
151
+
152
+ async def main():
153
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
154
+ results = await db.dql('type:Organization name:"Diffbot"')
155
+ print(results)
156
+
157
+ asyncio.run(main())
158
+ ```
159
+
160
+ ### Web Search
161
+ ```python
162
+ import asyncio
163
+ from diffbot import DiffbotAsync
164
+
165
+ async def main():
166
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
167
+ results = await db.web_search("diffbot knowledge graph")
168
+ for r in results["search_results"]:
169
+ print(r["score"], r["title"], r["pageUrl"])
170
+ print(r["content"])
171
+
172
+ asyncio.run(main())
173
+ ```
174
+
175
+ ### Entities (NLP)
176
+ ```python
177
+ import asyncio
178
+ from diffbot import DiffbotAsync
179
+
180
+ async def main():
181
+ async with DiffbotAsync(token="YOUR_TOKEN") as db:
182
+ result = await db.entities("Apple CEO Tim Cook announced record quarterly earnings.")
183
+ for entity in result["entities"]:
184
+ print(entity["name"], entity.get("type"), entity.get("id"))
185
+ print("sentiment:", result.get("sentiment"))
186
+
187
+ asyncio.run(main())
188
+ ```
189
+
190
+ ## CLI
191
+
192
+ This library also includes a CLI.
193
+
194
+ ```bash
195
+ export DIFFBOT_API_TOKEN=your-token-here
196
+
197
+ db extract https://www.example.com
198
+ db ask "What's the capital of France?"
199
+ db crawl https://www.example.com --hops 1
200
+ db crawl-list-jobs
201
+ db crawl-delete-job crawl-1234567890
202
+ db web-search "diffbot knowledge graph"
203
+ db web-search "diffbot knowledge graph" -n 5 -f json
204
+ db entities "Apple CEO Tim Cook announced record quarterly earnings."
205
+ db entities "Apple CEO Tim Cook announced record quarterly earnings." -f dql
206
+ ```
207
+
208
+ ## Tests
209
+
210
+ Run the mock test suite:
211
+ ```bash
212
+ python -m pytest
213
+ ```
214
+
215
+ Run live integration tests against the real API (requires a valid token):
216
+ ```bash
217
+ DIFFBOT_TOKEN=your_token python -m pytest -m live
218
+ ```
@@ -0,0 +1,20 @@
1
+ diffbot/__init__.py,sha256=WyzW2kAw75vgEmAyvrFkfcu_m0gL8uhOixeYVCKaHf4,518
2
+ diffbot/ask.py,sha256=iNv613j4CoIfdDTOE-pl9KUkjqI-2AxGDMR1prm3DGM,1853
3
+ diffbot/client.py,sha256=C5MIhyx8YZGgD4GjKj5QrIg-eRtUF3qhmUzYW77Sh9Q,10426
4
+ diffbot/crawl.py,sha256=iYMFmf7HKrbefJrGg14VnlfiBFLOE_Z1pfO4Rn_cDXc,8893
5
+ diffbot/errors.py,sha256=5-AceX5MyNVUhe9pvR_4rnQQmBhvLfwWmrRl7dRZUSg,1576
6
+ diffbot/extract.py,sha256=R9SVxaOi4FjHOQIX5ho_75OwaJ7VdX-mdAoV_UY-lrM,1452
7
+ diffbot/kg.py,sha256=Y7XTrPNAfPdX9vvhFmgmU4G4KTF9fwpYkm2Hh3c6DLA,2708
8
+ diffbot/nlp.py,sha256=lZJW4MkjhVklIEM2OBfhc8LvresXTy7RsuImPteYsOA,1153
9
+ diffbot/web_search.py,sha256=1sKBojzsslZj2zzl2kJ4s43AUaRn1i1fGgPUjFdsW6Q,1370
10
+ diffbot/cli/__init__.py,sha256=UVGD3uevKTHmqEdbDhhR2PzO6-3i0xu8d7D_94jLLRo,16488
11
+ diffbot/cli/__main__.py,sha256=5BjNuyet8AY-POwoF5rGt722rHQ7tJ0Vf0UFUfzzi-I,58
12
+ diffbot/cli/_common.py,sha256=0I-oHnKVM9zLCaVNDc-7qf17b5u827IK7ezXITjsdW4,1006
13
+ diffbot/cli/dql.py,sha256=lJzAEjTIoF1l1xTwMcVIlq36pKZKrvgZYz0RuX5jUGc,11419
14
+ diffbot/cli/entities.py,sha256=tsHKexF0b6NnsoUEZJUS2rJidTIE8lt6xrcSenPbwtY,5630
15
+ diffbot/cli/ontology.py,sha256=FLIIe6ZY34zLHLt_bB9Zci0zlrKEAIIgNSdz5KwXqzw,3773
16
+ diffbot_python-0.1.0.dist-info/METADATA,sha256=KrMqxyqa2g6GPee1zc8A6J1SPXDLukELFslwbuMj9CM,5281
17
+ diffbot_python-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
18
+ diffbot_python-0.1.0.dist-info/entry_points.txt,sha256=FCxJhrbl7VNEsTK7zl7qYvXID7gQ1_wxhiw5_Vllb_M,40
19
+ diffbot_python-0.1.0.dist-info/licenses/LICENSE,sha256=UZlamI1XGeiG0Mit8dsHssNhOuMGKfmNOp5qpf1533w,1063
20
+ diffbot_python-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ db = diffbot.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Diffbot
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.