fable-client 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ __all__ = ["FableClient", "FableError", "estimate", "types"]
2
+
3
+ from ._client import FableClient, FableError
4
+ from . import _estimate as estimate
5
+ from . import types
fable_client/_cli.py ADDED
@@ -0,0 +1,439 @@
1
+ import contextlib
2
+ import csv
3
+ import itertools
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any, TypeVar
7
+
8
+ import click
9
+ import httpx
10
+ from fable_model import (
11
+ BitVectorEntity,
12
+ BaseMatchRequest,
13
+ MatchMethod,
14
+ BaseTransformRequest,
15
+ AttributeValueEntity,
16
+ BaseMaskRequest,
17
+ TransformConfig,
18
+ EmptyValueHandling,
19
+ GlobalTransformerConfig,
20
+ NormalizationTransformer,
21
+ WeightedAttributeConfig,
22
+ )
23
+ from pydantic import BaseModel
24
+
25
+ from ._client import FableClient
26
+ from ._estimate import compute_attribute_stats
27
+ from ._model import FakerGeneratorConfig, FakerGeneratorSpec
28
+
29
+
30
+ def create_client(ctx: click.Context) -> FableClient:
31
+ return FableClient(client=httpx.Client(base_url=ctx.obj["BASE_URL"], timeout=int(ctx.obj["TIMEOUT_SECS"])))
32
+
33
+
34
+ def read_bit_vector_entity_file(reader: csv.DictReader, id_column: str, value_column: str):
35
+ """
36
+ Read a CSV file containing bit vector entities.
37
+
38
+ Args:
39
+ reader: CSV dict reader instance
40
+ id_column: name of ID column
41
+ value_column: name of value column
42
+
43
+ Returns:
44
+ list of bit vector entities
45
+ """
46
+ return [BitVectorEntity(id=row[id_column], value=row[value_column]) for row in reader]
47
+
48
+
49
+ def read_attribute_value_entity_file(reader: csv.DictReader, id_column: str):
50
+ field_names: list[str] = list(reader.fieldnames)
51
+
52
+ if id_column not in field_names:
53
+ click.echo(f"Column {id_column} not found in CSV file", err=True)
54
+ raise click.exceptions.Exit(1)
55
+
56
+ def _row_to_entity(row: dict[str, Any]):
57
+ return AttributeValueEntity(
58
+ id=str(row[id_column]),
59
+ attributes={
60
+ attribute_name: str(attribute_value)
61
+ for attribute_name, attribute_value in row.items()
62
+ if attribute_name != id_column
63
+ },
64
+ )
65
+
66
+ entities = list(_row_to_entity(row) for row in reader)
67
+
68
+ return field_names, entities
69
+
70
+
71
+ _M = TypeVar("_M", bound=BaseModel)
72
+
73
+
74
+ def parse_json_file_into(ctx: click.Context, path: str | Path, model: type[_M]) -> _M:
75
+ with open(path, mode="r", encoding=ctx.obj["ENCODING"]) as f:
76
+ return model(**json.load(f))
77
+
78
+
79
+ @contextlib.contextmanager
80
+ def read_csv_file(ctx: click.Context, path: str | Path, mode: str = "r"):
81
+ with open(path, mode=mode, encoding=ctx.obj["ENCODING"], newline="") as f:
82
+ yield csv.DictReader(f, delimiter=ctx.obj["DELIMITER"])
83
+
84
+
85
+ @contextlib.contextmanager
86
+ def write_csv_file(
87
+ ctx: click.Context, path: str | Path, fieldnames: list[str], mode: str = "w", write_header: bool = True
88
+ ):
89
+ with open(path, mode=mode, encoding=ctx.obj["ENCODING"], newline="") as f:
90
+ writer = csv.DictWriter(f, delimiter=ctx.obj["DELIMITER"], fieldnames=fieldnames)
91
+
92
+ if write_header:
93
+ writer.writeheader()
94
+
95
+ yield writer
96
+
97
+
98
+ @click.group()
99
+ @click.pass_context
100
+ @click.option("--base-url", default="http://localhost:8080", help="base URL to HTTP-based PPRL service")
101
+ @click.option(
102
+ "-b", "--batch-size", type=click.IntRange(min=1), default=1_000, help="amount of bit vectors to match at a time"
103
+ )
104
+ @click.option("--timeout-secs", type=click.IntRange(min=1), default=30, help="seconds until a request times out")
105
+ @click.option("--delimiter", type=str, default=",", help="column delimiter for CSV files")
106
+ @click.option("--encoding", type=str, default="utf-8", help="character encoding for files")
107
+ def app(ctx: click.Context, base_url: str, batch_size: int, timeout_secs: int, delimiter: str, encoding: str):
108
+ ctx.ensure_object(dict)
109
+ ctx.obj["BASE_URL"] = base_url
110
+ ctx.obj["BATCH_SIZE"] = batch_size
111
+ ctx.obj["TIMEOUT_SECS"] = timeout_secs
112
+ ctx.obj["DELIMITER"] = delimiter
113
+ ctx.obj["ENCODING"] = encoding
114
+
115
+
116
+ @app.command()
117
+ @click.pass_context
118
+ @click.argument("base_match_request_file_path", type=click.Path(exists=True, path_type=Path))
119
+ @click.argument("vector_file_path", type=click.Path(exists=True, path_type=Path, dir_okay=False), nargs=-1)
120
+ @click.argument("output_file_path", type=click.Path(path_type=Path, dir_okay=False))
121
+ @click.option("--id-column", type=str, default="id", help="column name in input CSV file containing vector ID")
122
+ @click.option("--value-column", type=str, default="value", help="column name in input CSV file containing vector value")
123
+ def match(
124
+ ctx: click.Context,
125
+ base_match_request_file_path: Path,
126
+ vector_file_path: tuple[Path, ...],
127
+ output_file_path: Path,
128
+ id_column: str,
129
+ value_column: str,
130
+ ):
131
+ """
132
+ Match bit vectors from CSV files against each other.
133
+
134
+ BASE_MATCH_REQUEST_FILE_PATH is the path to a JSON file containing the base match request.
135
+ VECTOR_FILE_PATH is the path to a CSV file containing bit vectors.
136
+ At least two files must be specified.
137
+ OUTPUT_FILE_PATH is the path of the CSV file where the matches should be written to.
138
+ """
139
+ if len(vector_file_path) < 2:
140
+ click.echo("Must specify at least two CSV files containing vectors", err=True)
141
+ ctx.exit(1)
142
+
143
+ client = create_client(ctx)
144
+ base_match_request = parse_json_file_into(ctx, base_match_request_file_path, BaseMatchRequest)
145
+
146
+ batch_size = int(ctx.obj["BATCH_SIZE"])
147
+ file_count = len(vector_file_path)
148
+ vectors_per_file: list[list[BitVectorEntity]] = []
149
+
150
+ for p in vector_file_path:
151
+ with read_csv_file(ctx, p, mode="r") as reader:
152
+ vectors_per_file.append(read_bit_vector_entity_file(reader, id_column, value_column))
153
+
154
+ # check that all files have the same amount of entries
155
+ do_pairwise_matching = base_match_request.config.method == MatchMethod.pairwise
156
+
157
+ if do_pairwise_matching:
158
+ vector_lens = set(len(v) for v in vectors_per_file)
159
+
160
+ if len(vector_lens) != 1:
161
+ click.echo(
162
+ "All bit vector files must have the same amount of vectors for pairwise matching, got"
163
+ f"{', '.join([str(len(v) for v in vectors_per_file)])}"
164
+ )
165
+ ctx.exit(1)
166
+
167
+ with write_csv_file(
168
+ ctx,
169
+ output_file_path,
170
+ ["domain_id", "domain_file", "range_id", "range_file", "similarity"],
171
+ mode="w",
172
+ write_header=True,
173
+ ) as writer:
174
+ for domain_idx in range(0, file_count - 1):
175
+ for range_idx in range(domain_idx + 1, file_count):
176
+ # get domain and range vectors
177
+ domain_vectors, range_vectors = vectors_per_file[domain_idx], vectors_per_file[range_idx]
178
+ # these are tracked for user feedback
179
+ domain_file_path, range_file_path = vector_file_path[domain_idx], vector_file_path[range_idx]
180
+
181
+ # construct the starting indices for batch-wise processing
182
+ domain_start_idx = list(range(0, len(domain_vectors), batch_size))
183
+ range_start_idx = list(range(0, len(range_vectors), batch_size))
184
+
185
+ # when doing pairwise matching, matching will be performed row-wise
186
+ if do_pairwise_matching:
187
+ idx_pairs = zip(domain_start_idx, range_start_idx)
188
+ # otherwise, cross-wise matching is performed
189
+ else:
190
+ idx_pairs = itertools.product(domain_start_idx, range_start_idx)
191
+
192
+ with click.progressbar(
193
+ idx_pairs, label=f"Matching bit vectors from {domain_file_path.name} and {range_file_path.name}"
194
+ ) as pbar:
195
+ # iterate over pairs of starting indices for domain and range
196
+ for idx_tpl in pbar:
197
+ domain_idx, range_idx = idx_tpl[0], idx_tpl[1]
198
+
199
+ # retrieve batch of vectors
200
+ domain_vector_batch = domain_vectors[domain_idx : domain_idx + batch_size]
201
+ range_vector_batch = range_vectors[range_idx : range_idx + batch_size]
202
+
203
+ # and perform matching
204
+ r = client.match(
205
+ base_match_request.with_vectors(
206
+ domain_lst=domain_vector_batch, range_lst=range_vector_batch
207
+ )
208
+ )
209
+
210
+ writer.writerows(
211
+ [
212
+ {
213
+ "domain_id": m.domain.id,
214
+ "domain_file": domain_file_path.name,
215
+ "range_id": m.range.id,
216
+ "range_file": range_file_path.name,
217
+ "similarity": m.similarity,
218
+ }
219
+ for m in r.matches
220
+ ]
221
+ )
222
+
223
+
224
+ @app.command()
225
+ @click.pass_context
226
+ @click.argument("base_transform_request_file_path", type=click.Path(exists=True, path_type=Path))
227
+ @click.argument("entity_file_path", type=click.Path(exists=True, path_type=Path))
228
+ @click.argument("output_file_path", type=click.Path(path_type=Path, dir_okay=False))
229
+ @click.option("--entity-id-column", type=str, default="id", help="column name in entity CSV file containing ID")
230
+ def transform(
231
+ ctx: click.Context,
232
+ base_transform_request_file_path: Path,
233
+ entity_file_path: Path,
234
+ output_file_path: Path,
235
+ entity_id_column: str,
236
+ ):
237
+ """
238
+ Perform pre-processing on a CSV file with entities.
239
+
240
+ BASE_TRANSFORM_REQUEST_FILE_PATH is the path to a JSON file containing the base transform request.
241
+ ENTITY_FILE_PATH is the path to the CSV file containing entities.
242
+ OUTPUT_FILE_PATH is the path of the CSV file where the pre-processed entities should be written to.
243
+ """
244
+ client = create_client(ctx)
245
+ base_transform_request = parse_json_file_into(ctx, base_transform_request_file_path, BaseTransformRequest)
246
+
247
+ # read entities
248
+ with read_csv_file(ctx, entity_file_path, mode="r") as reader:
249
+ field_names, entities = read_attribute_value_entity_file(reader, entity_id_column)
250
+
251
+ # create list of indices for batching
252
+ batch_size = int(ctx.obj["BATCH_SIZE"])
253
+ idx = list(range(0, len(entities), batch_size))
254
+
255
+ with (
256
+ write_csv_file(ctx, output_file_path, field_names, mode="w", write_header=True) as writer,
257
+ click.progressbar(idx, label="Transforming entities") as pbar,
258
+ ):
259
+ for i in pbar:
260
+ # create batch
261
+ entity_batch = entities[i : i + batch_size]
262
+ r = client.transform(base_transform_request.with_entities(entity_batch))
263
+
264
+ # write results
265
+ writer.writerows([{entity_id_column: entity.id, **entity.attributes} for entity in r.entities])
266
+
267
+
268
+ @app.command()
269
+ @click.pass_context
270
+ @click.argument("base_mask_request_file_path", type=click.Path(exists=True, path_type=Path))
271
+ @click.argument("entity_file_path", type=click.Path(exists=True, path_type=Path))
272
+ @click.argument("output_file_path", type=click.Path(dir_okay=False, file_okay=True, path_type=Path))
273
+ @click.option("--entity-id-column", type=str, default="id", help="column name in entity CSV file containing ID")
274
+ @click.option(
275
+ "--entity-value-column", type=str, default="value", help="column name in output CSV file containing vector value"
276
+ )
277
+ def mask(
278
+ ctx: click.Context,
279
+ base_mask_request_file_path: Path,
280
+ entity_file_path: Path,
281
+ output_file_path: Path,
282
+ entity_id_column: str,
283
+ entity_value_column: str,
284
+ ):
285
+ """
286
+ Mask a CSV file with entities.
287
+
288
+ BASE_MASK_REQUEST_FILE_PATH is the path to a JSON file containing the base mask request.
289
+ ENTITY_FILE_PATH is the path to the CSV file containing entities.
290
+ OUTPUT_FILE_PATH is the path of the CSV file where the masked entities should be written to.
291
+ """
292
+ client = create_client(ctx)
293
+ base_mask_request = parse_json_file_into(ctx, base_mask_request_file_path, BaseMaskRequest)
294
+
295
+ with read_csv_file(ctx, entity_file_path, mode="r") as reader:
296
+ _, entities = read_attribute_value_entity_file(reader, entity_id_column)
297
+
298
+ # create list of indices for batching
299
+ batch_size = int(ctx.obj["BATCH_SIZE"])
300
+ idx = list(range(0, len(entities), batch_size))
301
+
302
+ with (
303
+ write_csv_file(
304
+ ctx, output_file_path, [entity_id_column, entity_value_column], mode="w", write_header=True
305
+ ) as writer,
306
+ click.progressbar(idx, label="Masking entities") as pbar,
307
+ ):
308
+ for i in pbar:
309
+ # create batch
310
+ entity_batch = entities[i : i + batch_size]
311
+ r = client.mask(base_mask_request.with_entities(entity_batch))
312
+
313
+ # write results
314
+ writer.writerows(
315
+ [{entity_id_column: entity.id, entity_value_column: entity.value} for entity in r.entities]
316
+ )
317
+
318
+
319
+ @app.group()
320
+ def estimate():
321
+ """Estimate attribute weights based on randomly generated data."""
322
+ pass
323
+
324
+
325
+ def common_estimate_options(fn):
326
+ fn = click.option(
327
+ "--base-transform-request-file-path",
328
+ type=click.Path(exists=True, path_type=Path),
329
+ help="path to file containing attribute-level and global transformer definitions",
330
+ )(fn)
331
+ fn = click.option(
332
+ "-q",
333
+ "--token-size",
334
+ type=click.IntRange(min=2),
335
+ default=2,
336
+ help="size of tokens to split each attribute value into",
337
+ )(fn)
338
+ fn = click.option(
339
+ "-p", "--padding", type=str, default="_", help="padding to use when splitting attribute values into tokens"
340
+ )(fn)
341
+
342
+ return fn
343
+
344
+
345
+ @estimate.command()
346
+ @click.pass_context
347
+ @click.argument("GENERATOR_CONFIG_FILE_PATH", type=click.Path(exists=True, path_type=Path))
348
+ @click.argument("ATTRIBUTE_CONFIG_OUTPUT_FILE_PATH", type=click.Path(path_type=Path))
349
+ @common_estimate_options
350
+ def faker(
351
+ ctx: click.Context,
352
+ generator_config_file_path: Path,
353
+ attribute_config_output_file_path: Path,
354
+ base_transform_request_file_path: Path | None,
355
+ token_size: int,
356
+ padding: str,
357
+ ):
358
+ """
359
+ Estimate attribute weights based on data generated by Faker.
360
+
361
+ GENERATOR_CONFIG_FILE_PATH is the file which defines the Faker providers to use.
362
+ ATTRIBUTE_CONFIG_OUTPUT_FILE_PATH is the path to the file where the attribute weights will be written to.
363
+ """
364
+
365
+ try:
366
+ from faker import Faker
367
+ except ImportError:
368
+ click.echo("Faker not found, install it with `pip install fable_client[faker]`", err=True)
369
+ raise click.exceptions.Exit(1)
370
+
371
+ # set up vars
372
+ client = create_client(ctx)
373
+ faker_generator_config = parse_json_file_into(ctx, generator_config_file_path, FakerGeneratorConfig)
374
+ batch_size = int(ctx.obj["BATCH_SIZE"])
375
+
376
+ # load base transform request, if specified
377
+ if base_transform_request_file_path is None:
378
+ base_transform_request = BaseTransformRequest(
379
+ config=TransformConfig(empty_value=EmptyValueHandling.skip),
380
+ global_transformers=GlobalTransformerConfig(before=[NormalizationTransformer()]),
381
+ )
382
+ else:
383
+ base_transform_request = parse_json_file_into(ctx, base_transform_request_file_path, BaseTransformRequest)
384
+
385
+ # create faker instance
386
+ fake = Faker(faker_generator_config.locale)
387
+ fake.seed_instance(faker_generator_config.seed)
388
+
389
+ # creates a callable function from a generator specification
390
+ def _create_faker_generator(generator: FakerGeneratorSpec):
391
+ generator_fn = getattr(fake, generator.function_name, None)
392
+
393
+ # check that the function actually exists
394
+ if not callable(generator_fn):
395
+ click.echo(f"Invalid faker function: {generator.function_name}", err=True)
396
+ raise click.exceptions.Exit(1)
397
+
398
+ # wrapper function with no args that calls the generator
399
+ def _generate():
400
+ return str(generator_fn(**generator.args))
401
+
402
+ return _generate
403
+
404
+ # attribute name -> generator fn
405
+ attribute_name_to_generator_fn = {
406
+ generator.attribute_name: _create_faker_generator(generator) for generator in faker_generator_config.generators
407
+ }
408
+
409
+ entities = [
410
+ # loop by amount of entities to generate
411
+ AttributeValueEntity(
412
+ # loop by attribute name and generator function pairs
413
+ id=str(i),
414
+ attributes={
415
+ attribute_name: generator_fn()
416
+ for attribute_name, generator_fn in attribute_name_to_generator_fn.items()
417
+ },
418
+ )
419
+ for i in range(faker_generator_config.count)
420
+ ]
421
+
422
+ # compute stats for each attribute
423
+ attribute_name_to_stats = compute_attribute_stats(
424
+ client, entities, base_transform_request, token_size, padding, batch_size
425
+ )
426
+
427
+ # create list of attribute configs
428
+ attribute_configs = [
429
+ WeightedAttributeConfig(
430
+ attribute_name=attribute_name,
431
+ weight=attribute_stats["ngram_entropy"],
432
+ average_token_count=attribute_stats["average_tokens"],
433
+ )
434
+ for attribute_name, attribute_stats in attribute_name_to_stats.items()
435
+ ]
436
+
437
+ # export them
438
+ with open(attribute_config_output_file_path, mode="w", encoding="utf-8") as f:
439
+ json.dump([cfg.model_dump(mode="json", exclude_none=True) for cfg in attribute_configs], f, indent=2)
@@ -0,0 +1,91 @@
1
+ from json import JSONDecodeError
2
+ from typing import TypeVar
3
+
4
+ import httpx
5
+ from fable_model import (
6
+ VectorMatchRequest,
7
+ VectorMatchResponse,
8
+ EntityTransformRequest,
9
+ EntityTransformResponse,
10
+ EntityMaskRequest,
11
+ EntityMaskResponse,
12
+ )
13
+ from pydantic import BaseModel, ValidationError
14
+
15
+ _MI = TypeVar("_MI", bound=BaseModel)
16
+ _MO = TypeVar("_MO", bound=BaseModel)
17
+
18
+
19
+ class GenericErrorResponse(BaseModel):
20
+ detail: str
21
+
22
+
23
+ class ValidationErrorDetail(BaseModel):
24
+ loc: list[str]
25
+ msg: str
26
+ type: str
27
+
28
+
29
+ class ValidationErrorResponse(BaseModel):
30
+ detail: list[ValidationErrorDetail]
31
+
32
+
33
+ class FableError(httpx.HTTPError):
34
+ def __init__(
35
+ self, message: str, request: httpx.Request, error: GenericErrorResponse | ValidationErrorResponse = None
36
+ ):
37
+ super().__init__(message)
38
+ self._request = request
39
+ self.error_response = error
40
+
41
+ self.error_type = "unknown"
42
+
43
+ if isinstance(error, GenericErrorResponse):
44
+ self.error_type = "default"
45
+
46
+ if isinstance(error, ValidationErrorResponse):
47
+ self.error_type = "validation"
48
+
49
+
50
+ def new_error_from_response(r: httpx.Response):
51
+ error_response = None
52
+ error_message = f"received status code {r.status_code}"
53
+
54
+ # validation error (422 by default with FastAPI)
55
+ if r.status_code == httpx.codes.UNPROCESSABLE_ENTITY.value:
56
+ try:
57
+ error_response = ValidationErrorResponse(**r.json())
58
+ error_message += ": invalid request"
59
+ except (ValidationError, JSONDecodeError):
60
+ pass
61
+ else:
62
+ try:
63
+ error_response = GenericErrorResponse(**r.json())
64
+ error_message += f": {error_response.detail}"
65
+ except (ValidationError, JSONDecodeError):
66
+ pass
67
+
68
+ return FableError(error_message, r.request, error_response)
69
+
70
+
71
+ class FableClient(object):
72
+ def __init__(self, client: httpx.Client = None, base_url: str = None):
73
+ self._client = client or httpx.Client(base_url=base_url)
74
+
75
+ def _request(self, path: str, model_in: _MI, model_out: type[_MO]) -> _MO:
76
+ r = self._client.post(path, json=model_in.model_dump(mode="json"))
77
+
78
+ # we generally expect a 200 here
79
+ if r.status_code != httpx.codes.OK.value:
80
+ raise new_error_from_response(r)
81
+
82
+ return model_out(**r.json())
83
+
84
+ def match(self, request: VectorMatchRequest):
85
+ return self._request("match/", request, VectorMatchResponse)
86
+
87
+ def transform(self, request: EntityTransformRequest):
88
+ return self._request("transform/", request, EntityTransformResponse)
89
+
90
+ def mask(self, request: EntityMaskRequest):
91
+ return self._request("mask/", request, EntityMaskResponse)
@@ -0,0 +1,98 @@
1
+ __all__ = [
2
+ "split_into_wordlist",
3
+ "tokenize_wordlist",
4
+ "compute_average_tokens_for_token_list",
5
+ "count_tokens_in_token_list",
6
+ "compute_ngram_entropy",
7
+ "compute_attribute_stats",
8
+ "AttributeStats",
9
+ ]
10
+
11
+ import math
12
+ from collections import defaultdict, Counter
13
+ from typing import TypedDict
14
+
15
+ import fable_core
16
+ from fable_model import AttributeValueEntity, BaseTransformRequest
17
+
18
+ from ._client import FableClient
19
+
20
+
21
+ class AttributeStats(TypedDict):
22
+ average_tokens: float
23
+ ngram_entropy: float
24
+
25
+
26
+ def split_into_wordlist(entities: list[AttributeValueEntity]) -> dict[str, list[str]]:
27
+ """Split a list of entities into a dictionary of attribute names to values."""
28
+ attr_name_to_wordlist: dict[str, list[str]] = defaultdict(list)
29
+
30
+ for entity in entities:
31
+ for attr_name, attr_value in entity.attributes.items():
32
+ attr_name_to_wordlist[attr_name].append(attr_value)
33
+
34
+ return attr_name_to_wordlist
35
+
36
+
37
+ def tokenize_wordlist(wordlist: list[str], token_size=2, padding="_") -> list[set[str]]:
38
+ return [fable_core.common.tokenize(word, q=token_size, padding=padding) for word in wordlist]
39
+
40
+
41
+ def compute_average_tokens_for_token_list(token_list: list[set[str]]) -> float:
42
+ total_token_count = sum(len(tokens) for tokens in token_list)
43
+
44
+ if total_token_count == 0:
45
+ return 0
46
+
47
+ return total_token_count / len(token_list)
48
+
49
+
50
+ def count_tokens_in_token_list(token_list: list[set[str]]) -> dict[str, int]:
51
+ token_counter: dict[str, int] = Counter()
52
+
53
+ for word_tokens in token_list:
54
+ for token in word_tokens:
55
+ token_counter[token] += 1
56
+
57
+ return token_counter
58
+
59
+
60
+ def compute_ngram_entropy(token_counts: dict[str, int]) -> float:
61
+ total_ngram_count = sum(c for c in token_counts.values())
62
+ entropy = 0
63
+
64
+ for count in token_counts.values():
65
+ p = count / total_ngram_count
66
+ entropy += p * math.log2(p)
67
+
68
+ return -entropy
69
+
70
+
71
+ def compute_attribute_stats(
72
+ client: FableClient,
73
+ entities: list[AttributeValueEntity],
74
+ base_transform_request: BaseTransformRequest,
75
+ token_size: int = 2,
76
+ padding: str = "_",
77
+ batch_size: int = 100,
78
+ ):
79
+ processed_entities: list[AttributeValueEntity] = []
80
+
81
+ for i in range(0, len(entities), batch_size):
82
+ req = base_transform_request.with_entities(entities[i : i + batch_size])
83
+ res = client.transform(req)
84
+ processed_entities.extend(res.entities)
85
+
86
+ attribute_name_to_wordlist = split_into_wordlist(processed_entities)
87
+
88
+ def _compute_stats_for_wordlist(wordlist: list[str]) -> AttributeStats:
89
+ token_list = tokenize_wordlist(wordlist, token_size=token_size, padding=padding)
90
+ average_tokens = compute_average_tokens_for_token_list(token_list)
91
+ token_counts = count_tokens_in_token_list(token_list)
92
+ ngram_entropy = compute_ngram_entropy(token_counts)
93
+
94
+ return {"average_tokens": average_tokens, "ngram_entropy": ngram_entropy}
95
+
96
+ return {
97
+ attr_name: _compute_stats_for_wordlist(wordlist) for attr_name, wordlist in attribute_name_to_wordlist.items()
98
+ }
fable_client/_model.py ADDED
@@ -0,0 +1,20 @@
1
+ from typing import Any
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+
6
+ class FakerGeneratorSpec(BaseModel):
7
+ function_name: str
8
+ attribute_name: str
9
+ args: dict[str, Any] = Field(default_factory=dict)
10
+
11
+
12
+ def _default_faker_locale():
13
+ return ["en_US"]
14
+
15
+
16
+ class FakerGeneratorConfig(BaseModel):
17
+ seed: int
18
+ count: int = Field(ge=0)
19
+ locale: list[str] = Field(default_factory=_default_faker_locale)
20
+ generators: list[FakerGeneratorSpec]
fable_client/main.py ADDED
@@ -0,0 +1,9 @@
1
+ from ._cli import app
2
+
3
+
4
+ def run_cli():
5
+ app(max_content_width=120)
6
+
7
+
8
+ if __name__ == "__main__":
9
+ run_cli()
fable_client/types.py ADDED
@@ -0,0 +1,4 @@
1
+ __all__ = ["AttributeStats", "FakerGeneratorConfig", "FakerGeneratorSpec"]
2
+
3
+ from ._estimate import AttributeStats
4
+ from ._model import FakerGeneratorConfig, FakerGeneratorSpec
@@ -0,0 +1,489 @@
1
+ Metadata-Version: 2.4
2
+ Name: fable-client
3
+ Version: 0.4.1
4
+ Summary: HTTP-based client for interacting with the FABLE service for privacy-preserving record linkage with Bloom filters.
5
+ License-Expression: MIT
6
+ License-File: LICENSE
7
+ Keywords: record linkage,privacy,bloom filter,bitarray,cryptography,service,client,cli
8
+ Author: Maximilian Jugl
9
+ Requires-Python: >=3.10,<4
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Environment :: Console
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Education
14
+ Classifier: Intended Audience :: End Users/Desktop
15
+ Classifier: Intended Audience :: Information Technology
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Classifier: Topic :: Security :: Cryptography
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
26
+ Provides-Extra: faker
27
+ Requires-Dist: click (>=8.0.0,<9.0.0)
28
+ Requires-Dist: fable-core (>=0.1.5,<0.2.0)
29
+ Requires-Dist: fable-model (>=0.1.7,<0.2.0)
30
+ Requires-Dist: faker (>=26.0.0) ; extra == "faker"
31
+ Requires-Dist: httpx (>=0.28.0,<0.29.0)
32
+ Project-URL: Repository, https://github.com/ul-mds/fable-client
33
+ Description-Content-Type: text/markdown
34
+
35
+ [![PyPI](https://img.shields.io/pypi/v/fable-client?cacheSeconds=0)](https://pypi.org/project/fable-client/)
36
+ [![Python Versions](https://img.shields.io/pypi/pyversions/fable-client?cacheSeconds=0)](https://pypi.org/project/fable-client/)
37
+ ![Code Coverage](https://img.shields.io/badge/Coverage-92%25-green.svg)
38
+ [![License](https://img.shields.io/pypi/l/fable-client?cacheSeconds=0)](https://pypi.org/project/fable-client/)
39
+
40
+ # FABLE Client
41
+
42
+ This package contains a HTTP-based client for working with the server provided by
43
+ the [PPRL service](https://github.com/ul-mds/fable-pprl-service) which is part of the FABLE
44
+ (**F**ederated **A**nonymized **B**loom filter **L**inkage **E**ngine) ecosystem.
45
+ It also contains a command-line application which uses the library to process CSV files.
46
+
47
+ ## Installation
48
+
49
+ ```bash
50
+ pip install fable-client
51
+ ```
52
+
53
+ Weight estimation requires additional packages which are not shipped by default.
54
+ To add them, install this package using the following command.
55
+
56
+ ```bash
57
+ pip install fable-client[faker]
58
+ ```
59
+
60
+ ## Library methods
61
+
62
+ The library exposes functions for entity pre-processing, masking and bit vector matching.
63
+ They follow the data model that is also used by the FABLE PPRL service, which is exposed through
64
+ the [FABLE model package](https://github.com/ul-mds/fable-model).
65
+
66
+ ### Entity transformation
67
+
68
+ ```python
69
+ import fable_client
70
+ from fable_model import (
71
+ EntityTransformRequest,
72
+ TransformConfig,
73
+ EmptyValueHandling,
74
+ AttributeValueEntity,
75
+ GlobalTransformerConfig,
76
+ NormalizationTransformer,
77
+ )
78
+
79
+ client = fable_client.FableClient(base_url="http://localhost:8080")
80
+
81
+ response = client.transform(
82
+ EntityTransformRequest(
83
+ config=TransformConfig(empty_value=EmptyValueHandling.error),
84
+ entities=[AttributeValueEntity(id="001", attributes={"first_name": "Müller", "last_name": "Ludenscheidt"})],
85
+ global_transformers=GlobalTransformerConfig(before=[NormalizationTransformer()]),
86
+ )
87
+ )
88
+
89
+ print(response.entities)
90
+ # => [AttributeValueEntity(id='001', attributes={'first_name': 'muller', 'last_name': 'ludenscheidt'})]
91
+ ```
92
+
93
+ ### Entity masking
94
+
95
+ ```python
96
+ import fable_client
97
+ from fable_model import (
98
+ EntityMaskRequest,
99
+ MaskConfig,
100
+ HashConfig,
101
+ HashFunction,
102
+ HashAlgorithm,
103
+ RandomHash,
104
+ CLKFilter,
105
+ AttributeValueEntity,
106
+ )
107
+
108
+ client = fable_client.FableClient(base_url="http://localhost:8080")
109
+
110
+ response = client.mask(
111
+ EntityMaskRequest(
112
+ config=MaskConfig(
113
+ token_size=2,
114
+ hash=HashConfig(
115
+ function=HashFunction(algorithms=[HashAlgorithm.sha1], key="s3cr3t_k3y"), strategy=RandomHash()
116
+ ),
117
+ filter=CLKFilter(hash_values=5, filter_size=256),
118
+ ),
119
+ entities=[AttributeValueEntity(id="001", attributes={"first_name": "muller", "last_name": "ludenscheidt"})],
120
+ )
121
+ )
122
+
123
+ print(response.entities)
124
+ # => [BitVectorEntity(id='001', value='SKkgqBHBCJJCANICEKSpWMAUBYCQEMLuZgEQGBKRC8A=')]
125
+ ```
126
+
127
+ ### Bit vector matching
128
+
129
+ ```python
130
+ import fable_client
131
+ from fable_model import VectorMatchRequest, MatchConfig, SimilarityMeasure, BitVectorEntity
132
+
133
+ client = fable_client.FableClient(base_url="http://localhost:8080")
134
+
135
+ response = client.match(
136
+ VectorMatchRequest(
137
+ config=MatchConfig(measure=SimilarityMeasure.jaccard, threshold=0.8),
138
+ domain=[BitVectorEntity(id="001", value="SKkgqBHBCJJCANICEKSpWMAUBYCQEMLuZgEQGBKRC8A=")],
139
+ range=[
140
+ BitVectorEntity(id="100", value="UKkgqBHBDJJCANICELSpWMAUBYCMEMLrZgEQGBKRC7A="),
141
+ BitVectorEntity(id="101", value="H5DN45iUeEjrjbHZrzHb3AyQk9O4IgxcpENKKzEKRLE="),
142
+ ],
143
+ )
144
+ )
145
+
146
+ print(response.matches)
147
+ # => [Match(domain=BitVectorEntity(id='001', value='SKkgqBHBCJJCANICEKSpWMAUBYCQEMLuZgEQGBKRC8A='), range=BitVectorEntity(id='100', value='UKkgqBHBDJJCANICELSpWMAUBYCMEMLrZgEQGBKRC7A='), similarity=0.8536585365853658)]
148
+ ```
149
+
150
+ ### Attribute weight estimation
151
+
152
+ ```python
153
+ import fable_client
154
+ from fable_model import (
155
+ AttributeValueEntity,
156
+ BaseTransformRequest,
157
+ TransformConfig,
158
+ EmptyValueHandling,
159
+ GlobalTransformerConfig,
160
+ NormalizationTransformer,
161
+ )
162
+
163
+ client = fable_client.FableClient(base_url="http://localhost:8080")
164
+
165
+ stats = fable_client.estimate.compute_attribute_stats(
166
+ client,
167
+ [
168
+ AttributeValueEntity(id="001", attributes={"given_name": "Max", "last_name": "Mustermann", "gender": "m"}),
169
+ AttributeValueEntity(id="002", attributes={"given_name": "Maria", "last_name": "Musterfrau", "gender": "f"}),
170
+ ],
171
+ BaseTransformRequest(
172
+ config=TransformConfig(empty_value=EmptyValueHandling.skip),
173
+ global_transformers=GlobalTransformerConfig(before=[NormalizationTransformer()]),
174
+ ),
175
+ )
176
+
177
+ print(stats)
178
+ # => {'given_name': {'average_tokens': 5.0, 'ngram_entropy': 2.9219280948873623}, 'last_name': {'average_tokens': 11.0, 'ngram_entropy': 3.913977073182751}, 'gender': {'average_tokens': 2.0, 'ngram_entropy': 2.0}}
179
+ ```
180
+
181
+ ## Command line interface
182
+
183
+ The `fable` command exposes all the library's functions and adapts them to work with CSV files.
184
+ Running `fable --help` provides an overview of the command options.
185
+
186
+ ```
187
+ $ fable --help
188
+ Usage: fable [OPTIONS] COMMAND [ARGS]...
189
+
190
+ HTTP client for performing PPRL based on Bloom filters.
191
+
192
+ Options:
193
+ --base-url TEXT base URL to HTTP-based PPRL service
194
+ -b, --batch-size INTEGER RANGE amount of bit vectors to match at a time [x>=1]
195
+ --timeout-secs INTEGER RANGE seconds until a request times out [x>=1]
196
+ --delimiter TEXT column delimiter for CSV files
197
+ --encoding TEXT character encoding for files
198
+ --help Show this message and exit.
199
+
200
+ Commands:
201
+ estimate Estimate attribute weights based on randomly generated data.
202
+ mask Mask a CSV file with entities.
203
+ match Match bit vectors from CSV files against each other.
204
+ transform Perform pre-processing on a CSV file with entities
205
+ ```
206
+
207
+ The `fable` command works on two basic types of CSV files that follow a simple structure.
208
+ Entity files are CSV files that contain a column with a unique identifier and arbitrary additional columns which
209
+ contain values for certain attributes that identify an entity.
210
+ Each row is representative of a single entity.
211
+
212
+ ```csv
213
+ id,first_name,last_name,date_of_birth,gender
214
+ 001,Natalie,Sampson,1956-12-16,female
215
+ 002,Eric,Lynch,1910-01-11,female
216
+ 003,Pam,Vaughn,1983-10-05,male
217
+ 004,David,Jackson,2006-01-27,male
218
+ 005,Rachel,Dyer,1904-02-02,female
219
+ ```
220
+
221
+ Bit vector files contain an ID column and a value column which contains a representative bit vector.
222
+ These bit vectors are generally generated by masking a record from an entity file.
223
+
224
+ ```csv
225
+ id,value
226
+ 001,0Dr8t+kE5ltI+xdM85fwx0QLrTIgvFN35/0YvODNdOE0AaUHPphikXYy4LlArE4UqfjPs+wKtT233R7lBzSp5mwkCjTzA1tl0N7s+sFeKyIrOiGk0gNIYvA=
227
+ 002,QMEIkE9TN1Quv0K0QAIk1RZD3qF7nQh0IyOYqVDf8IQkyaLGcFjiLHsEgBpU8CRSCuATbWpjEwGi3dilizySQy4miGiJolilYmwKysjseq+IFsAU3T1IRjA=
228
+ 003,BqFoNZhrAVBq9SV1wBK0dUZLHDM9hCBoO4XdKCzvasSUELQeAB8+DV5tAhDl5KCSJfDCB6JG4WSoCFbozXqBYSUMqEQJE0JwhpRK6oLOcRRoGwGESDBMZwA=
229
+ 004,8C9KItMTwtz4oXQvo8G0t1bTnwspnghmJwyqqcL2RIHASb4XJHAqybMCXQBm5mq6h/kdxGbblxBjhy79jRUcI60haqZhNsst0n7OUAxM/UoZVumIilRIbCA=
230
+ 005,CFk4I0sKwnRoiTEOQASy1QZfHCGB1GBgYQDcZwDDtIkGGLOmLRhrQyOSlQDUDoYTbvaBRVqbkRnqmYQbDTEGlG+2y60FMmBEKtxsr0I4I00oMpuoXAsDWmA=
231
+ ```
232
+
233
+ Pre-processing is done with the `fable transform` command.
234
+ It requires a base transform request file, an entity file and an output file to write the pre-processed entities to.
235
+ Attribute and global transformer configurations can be provided, but at least one must be specified.
236
+
237
+ In this example, a global normalization transformer which is executed before all other attribute-specific transformers
238
+ is defined.
239
+ Date time reformatting is applied to the "date of birth" column in the input file.
240
+
241
+ _request.json_
242
+
243
+ ```json
244
+ {
245
+ "config": {
246
+ "empty_value": "skip"
247
+ },
248
+ "attribute_transformers": [
249
+ {
250
+ "attribute_name": "date_of_birth",
251
+ "transformers": [
252
+ {
253
+ "name": "date_time",
254
+ "input_format": "%Y-%m-%d",
255
+ "output_format": "%Y%m%d"
256
+ }
257
+ ]
258
+ }
259
+ ],
260
+ "global_transformers": {
261
+ "before": [
262
+ {
263
+ "name": "normalization"
264
+ }
265
+ ]
266
+ }
267
+ }
268
+ ```
269
+
270
+ ```
271
+ $ fable transform ./request.json ./input.csv ./output.csv
272
+ Transforming entities [####################################] 100%
273
+ ```
274
+
275
+ _output.csv_
276
+
277
+ ```csv
278
+ id,first_name,last_name,date_of_birth,gender
279
+ 001,natalie,sampson,19561216,female
280
+ 002,eric,lynch,19100111,female
281
+ 003,pam,vaughn,19831005,male
282
+ 004,david,jackson,20060127,male
283
+ 005,rachel,dyer,19040202,female
284
+ ```
285
+
286
+ Masking is done with `fable mask` and its subcommands.
287
+ It requires a base mask request file, an entity file and an output file to write the masked entities to.
288
+
289
+ _request.json_
290
+
291
+ ```json
292
+ {
293
+ "config": {
294
+ "token_size": 2,
295
+ "hash": {
296
+ "function": {
297
+ "algorithms": ["sha256"],
298
+ "key": "s3cr3t_k3y",
299
+ "strategy": {
300
+ "name": "random_hash"
301
+ }
302
+ }
303
+ },
304
+ "prepend_attribute_name": true,
305
+ "filter": {
306
+ "type": "clk",
307
+ "filter_size": 512,
308
+ "hash_values": 5,
309
+ "padding": "_",
310
+ "hardeners": [
311
+ {
312
+ "name": "permute",
313
+ "seed": 727
314
+ },
315
+ {
316
+ "name": "rehash",
317
+ "window_size": 16,
318
+ "window_step": 8,
319
+ "samples": 2
320
+ }
321
+ ]
322
+ }
323
+ }
324
+ }
325
+ ```
326
+
327
+ _input.csv_
328
+
329
+ ```csv
330
+ id,first_name,last_name,date_of_birth,gender
331
+ 001,natalie,sampson,19561216,female
332
+ 002,eric,lynch,19100111,female
333
+ 003,pam,vaughn,19831005,male
334
+ 004,david,jackson,20060127,male
335
+ 005,rachel,dyer,19040202,female
336
+ ```
337
+
338
+ ```
339
+ $ fable mask ./request.json ./input.csv ./output.csv
340
+ Masking entities [####################################] 100%
341
+ ```
342
+
343
+ _output.csv_
344
+
345
+ ```csv
346
+ id,value
347
+ 001,wAWgITvQ1/VACpRYC2EKrfCkWziyEhmyKwi5sMsFrAQVoIBygTQScPRoIIAto0AwS0ihlcAIFAcQRwccY5IOmQ==
348
+ 002,cFCwQIABQ+TgSSdlGM/z54BEUgmYhA1GKtCxQAKAXFIWiPAFIQYaFArgM61pUAAeATwBlBEOEw4Oowe0rbcMGw==
349
+ 003,IgK16AAISCRoCuVAb1UBZYBBhGgxSEkKeMkTUCKAx4IAsNGJBS4ShgBAGIapBIQWJLiBFEEKAIWAGYS8ZZGMKw==
350
+ 004,ZlBkyoYIEWmeaxbPDNng5JjHACkCAJwjlBCJQBJ4ZBSyOAukACUahOAFQ20oNwTQEDRA005+VUUfsUQcKCGNxg==
351
+ 005,cUekQFQkI7TpTcRwmcNDoodRRBshlSEiAUjBQiMlxBLTmODMJICmDmxgUqYKonQEMFD58QsogRQFIgYUwJDOHA==
352
+ ```
353
+
354
+ Matching is done with the `fable match` command.
355
+ It allows the matching of multiple bit vector input files at once.
356
+ If more than two files are provided, the command will pick out pairs of files and matches their contents against one
357
+ another.
358
+
359
+ In this example, the bit vectors of two files are matched against each other.
360
+ The Jaccard index is used as a similarity measure and a match threshold of 70% is applied.
361
+
362
+ _request.json_
363
+
364
+ ```json
365
+ {
366
+ "config": {
367
+ "measure": "jaccard",
368
+ "threshold": 0.7
369
+ }
370
+ }
371
+ ```
372
+
373
+ _domain.csv_
374
+
375
+ ```csv
376
+ id,value
377
+ 001,wAWgITvQ1/VACpRYC2EKrfCkWziyEhmyKwi5sMsFrAQVoIBygTQScPRoIIAto0AwS0ihlcAIFAcQRwccY5IOmQ==
378
+ 002,cFCwQIABQ+TgSSdlGM/z54BEUgmYhA1GKtCxQAKAXFIWiPAFIQYaFArgM61pUAAeATwBlBEOEw4Oowe0rbcMGw==
379
+ 003,IgK16AAISCRoCuVAb1UBZYBBhGgxSEkKeMkTUCKAx4IAsNGJBS4ShgBAGIapBIQWJLiBFEEKAIWAGYS8ZZGMKw==
380
+ 004,ZlBkyoYIEWmeaxbPDNng5JjHACkCAJwjlBCJQBJ4ZBSyOAukACUahOAFQ20oNwTQEDRA005+VUUfsUQcKCGNxg==
381
+ 005,cUekQFQkI7TpTcRwmcNDoodRRBshlSEiAUjBQiMlxBLTmODMJICmDmxgUqYKonQEMFD58QsogRQFIgYUwJDOHA==
382
+ ```
383
+
384
+ _range.csv_
385
+
386
+ ```csv
387
+ id,value
388
+ 101,kUSyxIgtIDSAB7ZYDkFQRZpFoMkCjCCCbDTWAUJTRAAEBpspBX4PNUZKi1AIVCABAjg6EAoKuwVleeUYgRBYoQ==
389
+ 102,IAA0YE4MGexIiYdEjwNzoOKmIA4CEHEiKQASYFPhxQTQlPAAgYW3AWBYmQJ8YMoaAj0ZkoOrFyUmFo52TDcIKw==
390
+ 103,BFAwREkkQbTdzddgDHFWgMRJMyxAMW+jq2ASICMBtIEr+YDCBRUgxEDIsQpciO4mAK3h2cIbXFQCMlaVpJPZIQ==
391
+ 104,wBWgITvQ2/VACpRYC2EKrfCkWxiyEhmyKwi5sMsFrBQVoIBygTQScPRoIIAto0AwS0ihldAIFAcQRwccY5IOmQ==
392
+ 105,QCCwIKQAED5AjaZYmodDcZAEBKkIxgAiDfEUoDKEdgEAEJAMAwcfQEbQkaQ4ANAABqiUscAKPQZEMJxRhTGIGQ==
393
+ ```
394
+
395
+ ```
396
+ $ fable match request.json domain.csv range.csv output.csv
397
+ Matching bit vectors from domain.csv and range.csv [####################################] 100%
398
+ ```
399
+
400
+ _output.csv_
401
+
402
+ ```csv
403
+ domain_id,domain_file,range_id,range_file,similarity
404
+ 001,domain.csv,104,range.csv,0.9690721649484536
405
+ ```
406
+
407
+ Weight estimation is done with the `fable estimate` command.
408
+ It generates random data based off of user specification and computes estimates for attribute weights.
409
+ Data can be generated using [Faker](https://faker.readthedocs.io/).
410
+
411
+ *faker.json*
412
+
413
+ ```json
414
+ {
415
+ "seed": 727,
416
+ "count": 5000,
417
+ "locale": ["de_DE"],
418
+ "generators": [
419
+ {"function_name": "first_name_nonbinary", "attribute_name": "given_name"},
420
+ {"function_name": "last_name", "attribute_name": "last_name"},
421
+ {"function_name": "random_element", "attribute_name": "gender", "args": {"elements": ["m", "f"]}},
422
+ {"function_name": "street_name", "attribute_name": "street_name"},
423
+ {"function_name": "city", "attribute_name": "municipality"},
424
+ {"function_name": "postcode", "attribute_name": "postcode"}
425
+ ]
426
+ }
427
+ ```
428
+
429
+ ```
430
+ $ fable estimate faker faker.json faker-output.json
431
+ ```
432
+
433
+ *faker-output.json*
434
+
435
+ ```json
436
+ [
437
+ {
438
+ "attribute_name": "given_name",
439
+ "weight": 7.657958943890718,
440
+ "average_token_count": 7.5686
441
+ },
442
+ {
443
+ "attribute_name": "last_name",
444
+ "weight": 7.444573503220938,
445
+ "average_token_count": 7.5204
446
+ },
447
+ {
448
+ "attribute_name": "gender",
449
+ "weight": 1.9999971146079947,
450
+ "average_token_count": 2.0
451
+ },
452
+ {
453
+ "attribute_name": "street_name",
454
+ "weight": 7.605565770282046,
455
+ "average_token_count": 16.2188
456
+ },
457
+ {
458
+ "attribute_name": "municipality",
459
+ "weight": 7.659422921807241,
460
+ "average_token_count": 9.952
461
+ },
462
+ {
463
+ "attribute_name": "postcode",
464
+ "weight": 6.7812429085107,
465
+ "average_token_count": 5.9464
466
+ }
467
+ ]
468
+ ```
469
+
470
+ ## Configuring pytest
471
+
472
+ In order to run integration tests, the FABLE PPRL service is needed.
473
+ The first option is to spin up the service independently and direct pytest to it.
474
+ Alternatively, pytest can start a Docker test container for the duration of the test run.
475
+ The following table shows all available configuration options.
476
+ These variables can be defined in `.env` or `.env.test`.
477
+
478
+ | **Environment variable** | **Description** | **Default** |
479
+ |-----------------------------------|-----------------------------------------------------------------------------|-------------|
480
+ | PYTEST_PPRL_BASE_URL<sup>1)</sup> | Base URL for the FABLE PPRL service | |
481
+ | PYTEST_PPRL_SERVICE_VERSION | Tag of the FABLE PPRL service image that will run inside the test container | latest |
482
+ | PYTEST_PRRL_SERVICE_PORT | Port that will be exposed by the test container | 8080 |
483
+
484
+ <sup>1)</sup> If defined, pytest will not spin up a test container.
485
+
486
+ ## License
487
+
488
+ MIT.
489
+
@@ -0,0 +1,12 @@
1
+ fable_client/__init__.py,sha256=Y3swMGPFlHZLsTJyKiBDsRRigHmwildneq-imdBBAf8,163
2
+ fable_client/_cli.py,sha256=_WXKcIhkw0EtR7ma7cX24O0-jCTTjJ134OHRr1mViLM,17095
3
+ fable_client/_client.py,sha256=HfsYRAq8ZBiELu8euJPG-x_xfiEZp1-zscWRmCm1FTc,2711
4
+ fable_client/_estimate.py,sha256=GrMIOxzm9lxsILhhqW78zEOXITr6FQbMtYL0IhvkdqM,3085
5
+ fable_client/_model.py,sha256=SiuTYVBbryJvVI1ophhF4YifgkuAN_EwhwRd6B4LPEU,449
6
+ fable_client/main.py,sha256=TcxUSffggdRNGZhlt_iSvQ7hQzJ3WVfZuuQ4fHDXmGE,113
7
+ fable_client/types.py,sha256=eiYF458YKlLU_QUocVP7t_bmSkXG6TYDnYVH0vk1Peo,175
8
+ fable_client-0.4.1.dist-info/METADATA,sha256=4_9Vgxye_tZOZyWbkZ5_RCrv19xOvIHU8gRybaxAA5c,16211
9
+ fable_client-0.4.1.dist-info/WHEEL,sha256=EGEvSphFYqXKs23-kQBeyNoJP1nrT8ZJKQoi5p5DYL8,88
10
+ fable_client-0.4.1.dist-info/entry_points.txt,sha256=7W7ZrPoF3jcq4IEAcIzs1IdMKP0NcJOHje7yO2Zlxao,51
11
+ fable_client-0.4.1.dist-info/licenses/LICENSE,sha256=Q-Ktj_VJi4SeAGoslkUojX0DD9cZFxaw36u4FR5f61o,1117
12
+ fable_client-0.4.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: poetry-core 2.4.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ fable=fable_client.main:run_cli
3
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 University Medical Center Leipzig, Dept. Medical Data Science
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.