dsslab-wdc-client 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: dsslab-wdc-client
|
|
3
|
+
Version: 0.10.0
|
|
4
|
+
Summary: A small client for the WDC-Rest-API.
|
|
5
|
+
Author: Tino Schöllhorn
|
|
6
|
+
Author-email: t.schoellhorn@uni-mannheim.de
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Provides-Extra: docs
|
|
14
|
+
Requires-Dist: Sphinx (==7.4.7) ; extra == "docs"
|
|
15
|
+
Requires-Dist: networkx (>=3.0)
|
|
16
|
+
Requires-Dist: pandas[excel, plot] (>=2.2,<3.0)
|
|
17
|
+
Requires-Dist: python-dotenv (>=1.1.0)
|
|
18
|
+
Requires-Dist: requests (>=2.3)
|
|
19
|
+
Requires-Dist: sphinx-rtd-theme (==3.0.2) ; extra == "docs"
|
|
20
|
+
Requires-Dist: sphinxcontrib-napoleon (==0.7) ; extra == "docs"
|
|
21
|
+
Project-URL: Documentation, https://dss-wdc.wiso.uni-hamburg.de/dss_wdc_client_docs/html/index.html
|
|
22
|
+
Project-URL: Homepage, https://dss-wdc.wiso.uni-hamburg.de/
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Description
|
|
26
|
+
This project includes a *very* small client to access data form
|
|
27
|
+
the WebDataCollector-API. It is meant to provide a simple means
|
|
28
|
+
of accessing the data in Json or as Panda-DataFrames.
|
|
29
|
+
|
|
30
|
+
# Usage
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
from dsslab.wdc_client import *
|
|
34
|
+
|
|
35
|
+
# Somehow initialize environment variables for
|
|
36
|
+
# "WDC_HOST" and "WDC_TOKEN".
|
|
37
|
+
load_dotenv()
|
|
38
|
+
|
|
39
|
+
client = WDCClient.fromEnv()
|
|
40
|
+
df = client.loadAsDataFrame(
|
|
41
|
+
'api/endpoint...', {'param1', 'value will be encoded'})
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
For more information about the client and usable endpoints,
|
|
45
|
+
see the project homepage of WDC or directly consult the Documentation.
|
|
46
|
+
|
|
47
|
+
# Changelog
|
|
48
|
+
- 0.10.0 Moved to new package structure with the namespace dsslab.
|
|
49
|
+
|
|
50
|
+
# Old changelog
|
|
51
|
+
- 0.9.0 Add method WDCClient#put to create PUT-Requests
|
|
52
|
+
- 0.8.2 Fix for duplicate parameters when paging.
|
|
53
|
+
- 0.8.1 Simplify new methods for loading a DomainGraph.
|
|
54
|
+
- 0.8.0 Added new methods for loading DomainGraphs.
|
|
55
|
+
- 0.7.3 Fix README
|
|
56
|
+
- 0.7.2 Include link for generated documentation
|
|
57
|
+
- 0.7.1 Added generated documentation
|
|
58
|
+
- 0.7.0 Added new method WDCClient.loadDomainGraph for loading a DomainGraph as NetworkX-Object
|
|
59
|
+
- 0.6.0 WDCClient throws an WDCException if a request to the server fails
|
|
60
|
+
- 0.5.0 New signatures and methods taking care of encodings and working on large results
|
|
61
|
+
- 0.4.3 Add dependencies pandas["excel, plot"] as they are likely to be used.
|
|
62
|
+
- 0.4.2 Enhance README with Changelog and code-example.
|
|
63
|
+
- 0.4.1 Include a preferred variant for creating WDCClients from the Environment
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Description
|
|
2
|
+
This project includes a *very* small client to access data form
|
|
3
|
+
the WebDataCollector-API. It is meant to provide a simple means
|
|
4
|
+
of accessing the data in Json or as Panda-DataFrames.
|
|
5
|
+
|
|
6
|
+
# Usage
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
from dsslab.wdc_client import *
|
|
10
|
+
|
|
11
|
+
# Somehow initialize environment variables for
|
|
12
|
+
# "WDC_HOST" and "WDC_TOKEN".
|
|
13
|
+
load_dotenv()
|
|
14
|
+
|
|
15
|
+
client = WDCClient.fromEnv()
|
|
16
|
+
df = client.loadAsDataFrame(
|
|
17
|
+
'api/endpoint...', {'param1', 'value will be encoded'})
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
For more information about the client and usable endpoints,
|
|
21
|
+
see the project homepage of WDC or directly consult the Documentation.
|
|
22
|
+
|
|
23
|
+
# Changelog
|
|
24
|
+
- 0.10.0 Moved to new package structure with the namespace dsslab.
|
|
25
|
+
|
|
26
|
+
# Old changelog
|
|
27
|
+
- 0.9.0 Add method WDCClient#put to create PUT-Requests
|
|
28
|
+
- 0.8.2 Fix for duplicate parameters when paging.
|
|
29
|
+
- 0.8.1 Simplify new methods for loading a DomainGraph.
|
|
30
|
+
- 0.8.0 Added new methods for loading DomainGraphs.
|
|
31
|
+
- 0.7.3 Fix README
|
|
32
|
+
- 0.7.2 Include link for generated documentation
|
|
33
|
+
- 0.7.1 Added generated documentation
|
|
34
|
+
- 0.7.0 Added new method WDCClient.loadDomainGraph for loading a DomainGraph as NetworkX-Object
|
|
35
|
+
- 0.6.0 WDCClient throws an WDCException if a request to the server fails
|
|
36
|
+
- 0.5.0 New signatures and methods taking care of encodings and working on large results
|
|
37
|
+
- 0.4.3 Add dependencies pandas["excel, plot"] as they are likely to be used.
|
|
38
|
+
- 0.4.2 Enhance README with Changelog and code-example.
|
|
39
|
+
- 0.4.1 Include a preferred variant for creating WDCClients from the Environment
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "dsslab-wdc-client"
|
|
3
|
+
packages = [
|
|
4
|
+
{ include = "dsslab", from = "src" }
|
|
5
|
+
]
|
|
6
|
+
version = "0.10.0"
|
|
7
|
+
description = "A small client for the WDC-Rest-API."
|
|
8
|
+
authors = ["Tino Schöllhorn <t.schoellhorn@uni-mannheim.de>"]
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
homepage = "https://dss-wdc.wiso.uni-hamburg.de/"
|
|
11
|
+
documentation = "https://dss-wdc.wiso.uni-hamburg.de/dss_wdc_client_docs/html/index.html"
|
|
12
|
+
|
|
13
|
+
[tool.poetry.dependencies]
|
|
14
|
+
python = ">=3.10"
|
|
15
|
+
requests = ">=2.3"
|
|
16
|
+
python-dotenv = ">=1.1.0"
|
|
17
|
+
pandas = {extras = ["excel, plot"], version = "^2.2"}
|
|
18
|
+
networkx = ">=3.0"
|
|
19
|
+
|
|
20
|
+
# Docs
|
|
21
|
+
Sphinx = { version = "7.4.7", optional = true }
|
|
22
|
+
sphinx-rtd-theme = { version = "3.0.2", optional = true }
|
|
23
|
+
sphinxcontrib-napoleon = { version = "0.7", optional = true }
|
|
24
|
+
|
|
25
|
+
[tool.poetry.extras]
|
|
26
|
+
docs = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-napoleon"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
[tool.poetry.group.dev.dependencies]
|
|
30
|
+
pytest = "^8.3.3"
|
|
31
|
+
|
|
32
|
+
[tool.pytest.ini_options]
|
|
33
|
+
log_cli = true
|
|
34
|
+
log_cli_level = "DEBUG"
|
|
35
|
+
log_cli_format = "%(asctime)s [%(levelname)8s-%(name)s]: %(message)s (%(filename)s:%(lineno)s)"
|
|
36
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
37
|
+
|
|
38
|
+
[build-system]
|
|
39
|
+
requires = ["poetry-core"]
|
|
40
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .client import *
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"""
|
|
2
|
+
client.py
|
|
3
|
+
====================================
|
|
4
|
+
The core module provides access to the WDC-API.
|
|
5
|
+
It provides a consistent interface for handling
|
|
6
|
+
the data as JSON, DataFrame or Graph and transparently handles
|
|
7
|
+
paging for large result-sets.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
from typing import Any
|
|
15
|
+
from collections.abc import Callable
|
|
16
|
+
import networkx as nx
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
class WDCException(Exception):
|
|
21
|
+
"""
|
|
22
|
+
An exception which is raised when an error within the WDCClient
|
|
23
|
+
occurs.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, message, query = None, state = None):
|
|
27
|
+
super().__init__(message)
|
|
28
|
+
self.query = query
|
|
29
|
+
self.state = state;
|
|
30
|
+
|
|
31
|
+
def __str__(self):
|
|
32
|
+
return (
|
|
33
|
+
super().__str__() +
|
|
34
|
+
", query: " + self.query +
|
|
35
|
+
", state: " + self.state)
|
|
36
|
+
|
|
37
|
+
class WDCClient:
|
|
38
|
+
"""
|
|
39
|
+
Client for the WDC-API.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def fromEnv():
|
|
44
|
+
"""
|
|
45
|
+
Creates a WDCClient from the 'Environment'.
|
|
46
|
+
|
|
47
|
+
Uses the environment variables 'WDC_HOST' and 'WDC_TOKEN' from the current
|
|
48
|
+
environment. Thus, you can make use of modules such as python-dotenv
|
|
49
|
+
or other variants more easily.
|
|
50
|
+
|
|
51
|
+
Remember: Using passwords or tokens in source code is dangerous!
|
|
52
|
+
|
|
53
|
+
:return: A new WDCClient configured from the environment values.
|
|
54
|
+
"""
|
|
55
|
+
_host = os.getenv('WDC_HOST')
|
|
56
|
+
_token = os.getenv('WDC_TOKEN')
|
|
57
|
+
|
|
58
|
+
client = WDCClient(host = _host, token = _token)
|
|
59
|
+
|
|
60
|
+
return client
|
|
61
|
+
|
|
62
|
+
def __init__(self, host: str, token = None):
|
|
63
|
+
self.logger = logging.getLogger(__name__)
|
|
64
|
+
self.host = host
|
|
65
|
+
self.token = token
|
|
66
|
+
|
|
67
|
+
if self.host == None:
|
|
68
|
+
raise WDCException("Could not create WDCClient with host = None")
|
|
69
|
+
|
|
70
|
+
self.session = requests.Session()
|
|
71
|
+
if self.token != None:
|
|
72
|
+
self.session.headers.update({'token': self.token})
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def loadAsDataFrame(self, endpoint: str, params: dict[str, Any] = {}) -> pd.DataFrame:
|
|
76
|
+
"""
|
|
77
|
+
Loads the *complete* tabular data from the endpoint and returns a
|
|
78
|
+
Pandas-DataFrame. The method transparently pages through the
|
|
79
|
+
complete results.
|
|
80
|
+
|
|
81
|
+
:param endpoint: the endpoint
|
|
82
|
+
:param params: a dictionary with possible parameters for the
|
|
83
|
+
query-string of the request. Values will be properly encoded.
|
|
84
|
+
|
|
85
|
+
:return: the data as Pands-DataFrame
|
|
86
|
+
"""
|
|
87
|
+
json = self.loadAsJson(endpoint, params);
|
|
88
|
+
|
|
89
|
+
return pd.json_normalize(json)
|
|
90
|
+
|
|
91
|
+
def loadAsJson(self, endpoint: str, params: dict[str, Any] = {}) -> []:
|
|
92
|
+
"""
|
|
93
|
+
Loads the tabular data from the endpoint and returns it as
|
|
94
|
+
JSON-Array. The method transparently pages through the
|
|
95
|
+
complete results.
|
|
96
|
+
|
|
97
|
+
:param endpoint: the endpoint
|
|
98
|
+
:param params: a dictionary with possible parameters for the
|
|
99
|
+
query-string of the request. Values will be properly encoded.
|
|
100
|
+
|
|
101
|
+
:return: the data as JSON-Array
|
|
102
|
+
"""
|
|
103
|
+
res = []
|
|
104
|
+
|
|
105
|
+
def collect_it(e, pos, maxPos):
|
|
106
|
+
nonlocal res
|
|
107
|
+
res.append(e)
|
|
108
|
+
|
|
109
|
+
self.loadForEach(endpoint, params, collect_it)
|
|
110
|
+
|
|
111
|
+
return res
|
|
112
|
+
|
|
113
|
+
def loadForEach(self, endpoint: str, params: dict[str, Any] = {}, f: Callable[[Any, int, int], None] = None) -> None:
|
|
114
|
+
"""
|
|
115
|
+
Provides the means to work on larger resultsets by providing a Callback.
|
|
116
|
+
|
|
117
|
+
:param endpoint: the endpoint
|
|
118
|
+
:param params: a dictionary with possible parameters for the
|
|
119
|
+
query-string of the request. Values will be properly encoded.
|
|
120
|
+
:param f: a Callable-Object (function, ...) with the signature (row, currentPos, maxPos) as a callback to work on
|
|
121
|
+
each entry in the dataset.
|
|
122
|
+
"""
|
|
123
|
+
url = self.host + "/" + endpoint
|
|
124
|
+
|
|
125
|
+
self.logger.debug('endpoint:' + url + ', params:' + str(params))
|
|
126
|
+
|
|
127
|
+
counter = 1
|
|
128
|
+
while url != None:
|
|
129
|
+
# nur beim ersten request dürfen die Params genutzt werden
|
|
130
|
+
# Ansonsten kommt es ja über den nextLink
|
|
131
|
+
if counter == 1:
|
|
132
|
+
response = self.session.get(url, params = params)
|
|
133
|
+
else:
|
|
134
|
+
response = self.session.get(url)
|
|
135
|
+
|
|
136
|
+
self.logger.debug("headers: %s", response.headers)
|
|
137
|
+
|
|
138
|
+
json = response.json()
|
|
139
|
+
|
|
140
|
+
# Everything ok?
|
|
141
|
+
if json['responseHeader']['state'] != 'OK':
|
|
142
|
+
raise WDCException(
|
|
143
|
+
json['responseHeader']['msg'],
|
|
144
|
+
query = json['responseHeader']['query'],
|
|
145
|
+
state = json['responseHeader']['state'])
|
|
146
|
+
|
|
147
|
+
self.logger.debug("json: %s", json)
|
|
148
|
+
|
|
149
|
+
for e in json["content"]:
|
|
150
|
+
f(e, counter, json['page']['totalElements'])
|
|
151
|
+
counter += 1
|
|
152
|
+
|
|
153
|
+
# gehts weiter?
|
|
154
|
+
if 'links' in json and 'next' in json['links']:
|
|
155
|
+
url = json['links']['next']
|
|
156
|
+
self.logger.debug("nextLink %s", url)
|
|
157
|
+
else:
|
|
158
|
+
url = None
|
|
159
|
+
|
|
160
|
+
def put(self, endpoint: str, body: str, params: dict[str, Any] = {}) -> None:
|
|
161
|
+
"""
|
|
162
|
+
Executes a PUT request to the specified endpoint with a body.
|
|
163
|
+
Raises a WDCException if the response is not "OK".
|
|
164
|
+
|
|
165
|
+
:param endpoint: the endpoint
|
|
166
|
+
:param body: the body to send with the PUT-Request
|
|
167
|
+
"""
|
|
168
|
+
url = self.host + "/" + endpoint
|
|
169
|
+
|
|
170
|
+
response = self.session.put(url, data=body, **params)
|
|
171
|
+
|
|
172
|
+
#self.logger.debug("headers: %s", response.headers)
|
|
173
|
+
#self.logger.debug("response: %s", response.status_code)
|
|
174
|
+
|
|
175
|
+
if response.status_code != 200 and response.status_code != 201:
|
|
176
|
+
raise WDCException("Could not send PUT for url: " + url)
|
|
177
|
+
|
|
178
|
+
def __str__(self) -> str:
|
|
179
|
+
return "[host=" + str(self.host) + ", token=" + str(self.token) + "}"
|
|
180
|
+
|
|
181
|
+
def loadDomainGraph(self, snapshot: str, selection: str = None, variant: str = 'ONLY_SEEDS') -> nx.DiGraph:
|
|
182
|
+
"""
|
|
183
|
+
Loads a DomainGraph as a DiGraph.
|
|
184
|
+
|
|
185
|
+
Note: If you intend to "merge" other data to nodes or edges, it
|
|
186
|
+
might be simpler to use the methods loadDomainGraphNodes()
|
|
187
|
+
and lodDomainGraphEdges() to load the data, modify it and
|
|
188
|
+
create the graph with createGraph().
|
|
189
|
+
|
|
190
|
+
:param snapshot: The machineName of the snapshot.
|
|
191
|
+
:param selection: A selection of the snapshot.
|
|
192
|
+
:param variant: A value of an enumeration of the variant of the DomainGraph.
|
|
193
|
+
|
|
194
|
+
:return: DiGraph of the DomainGraph
|
|
195
|
+
"""
|
|
196
|
+
domainGraphId = self.findDomainGraphId(snapshot, selection, variant)
|
|
197
|
+
|
|
198
|
+
nodes, edges = self.loadDomainGraphData(snapshot, selection, variant)
|
|
199
|
+
|
|
200
|
+
return self.createDomainGraph(nodes, edges)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def findDomainGraphId(self, snapshot: str, selection: str = None, variant: str = 'ONLY_SEEDS'):
|
|
204
|
+
"""
|
|
205
|
+
Finds the DomainGraphId of the specified Graph.
|
|
206
|
+
|
|
207
|
+
:param snapshot: The machineName of the snapshot.
|
|
208
|
+
:param selection: A selection of the snapshot.
|
|
209
|
+
:param variant: A value of an enumeration of the variant of the DomainGraph.
|
|
210
|
+
|
|
211
|
+
:return: the ID of the specified DomainGraph.
|
|
212
|
+
"""
|
|
213
|
+
domainGraphs = self.loadAsJson(
|
|
214
|
+
f"/api/domaingraph/list",
|
|
215
|
+
{
|
|
216
|
+
"snapshot": snapshot,
|
|
217
|
+
"selection": selection,
|
|
218
|
+
"variant": variant
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
self.logger.debug("domainGraphs:" + str(domainGraphs))
|
|
222
|
+
|
|
223
|
+
# es darf nur einer sein
|
|
224
|
+
if len(domainGraphs) != 1:
|
|
225
|
+
raise WDCException("There must be exactly one DomainGraph but found: " + len(domainGraphs))
|
|
226
|
+
|
|
227
|
+
return domainGraphs[0]['id']
|
|
228
|
+
|
|
229
|
+
def loadDomainGraphData(self, snapshot: str, selection: str = None, variant: str = 'ONLY_SEEDS'):
|
|
230
|
+
"""
|
|
231
|
+
Convienence method load the nodes *and* edges of a DomainGraph.
|
|
232
|
+
|
|
233
|
+
:return: a tuple of (nodes, edges) as JSON.
|
|
234
|
+
"""
|
|
235
|
+
domainGraphId = self.findDomainGraphId(snapshot, selection, variant)
|
|
236
|
+
nodes = self.loadAsJson(f"/api/domaingraph/{domainGraphId}/nodes")
|
|
237
|
+
edges = self.loadAsJson(f"/api/domaingraph/{domainGraphId}/edges")
|
|
238
|
+
|
|
239
|
+
return nodes, edges
|
|
240
|
+
|
|
241
|
+
def createDomainGraph(self, nodes, edges) -> nx.DiGraph:
|
|
242
|
+
"""
|
|
243
|
+
Create a DiGraph from a list of nodes and a list of edges.
|
|
244
|
+
|
|
245
|
+
:param nodes a JSON-Object or a DataFrame
|
|
246
|
+
:param edges a JSON-Object or a DataFrame
|
|
247
|
+
|
|
248
|
+
:return the created DiGraph.
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
if isinstance(nodes, pd.DataFrame):
|
|
252
|
+
nodes = json.loads(nodes.to_json(orient="records"));
|
|
253
|
+
|
|
254
|
+
if isinstance(edges, pd.DataFrame):
|
|
255
|
+
edges = json.loads(edges.to_json(orient="records"));
|
|
256
|
+
|
|
257
|
+
# Graph bauen
|
|
258
|
+
graph = nx.DiGraph()
|
|
259
|
+
for n in nodes:
|
|
260
|
+
_id = n.pop("id")
|
|
261
|
+
graph.add_node(_id, **n)
|
|
262
|
+
|
|
263
|
+
for e in edges:
|
|
264
|
+
graph.add_edge(e['source'], e['target'], weight=e['weight'])
|
|
265
|
+
|
|
266
|
+
return graph
|