onekit 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onekit
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: One package for utility functions.
5
5
  Home-page: https://github.com/estripling/onekit
6
6
  License: BSD 3-Clause
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "onekit"
3
- version = "0.5.0"
3
+ version = "0.7.0"
4
4
  description = "One package for utility functions."
5
5
  authors = ["onekit developers"]
6
6
  license = "BSD 3-Clause"
@@ -0,0 +1,773 @@
1
+ """Python toolz."""
2
+
3
+ import datetime as dt
4
+ import functools
5
+ import inspect
6
+ import itertools
7
+ import math
8
+ import os
9
+ import random
10
+ import re
11
+ import string
12
+ from typing import (
13
+ Any,
14
+ Callable,
15
+ Generator,
16
+ Iterable,
17
+ Iterator,
18
+ Optional,
19
+ Sequence,
20
+ Tuple,
21
+ Union,
22
+ )
23
+
24
+ import toolz
25
+ from toolz.curried import (
26
+ map,
27
+ reduce,
28
+ )
29
+
30
+ __all__ = (
31
+ "are_predicates_true",
32
+ "check_random_state",
33
+ "coinflip",
34
+ "collatz",
35
+ "concat_strings",
36
+ "contrast_sets",
37
+ "create_path",
38
+ "date_to_str",
39
+ "extend_range",
40
+ "fibonacci",
41
+ "flatten",
42
+ "filter_regex",
43
+ "func_name",
44
+ "headline",
45
+ "highlight_string_differences",
46
+ "isdivisibleby",
47
+ "iseven",
48
+ "isodd",
49
+ "map_regex",
50
+ "num_to_str",
51
+ "reduce_sets",
52
+ "remove_punctuation",
53
+ "signif",
54
+ "source_code",
55
+ )
56
+
57
+
58
+ Pair = Tuple[float, float]
59
+ Predicate = Callable[[Any], bool]
60
+ Seed = Optional[Union[int, random.Random]]
61
+
62
+
63
+ def are_predicates_true(
64
+ func: Callable[..., bool],
65
+ *predicates: Iterable[Predicate],
66
+ ) -> Predicate:
67
+ """Evaluate if predicates are true.
68
+
69
+ A predicate is of the form :math:`P\\colon X \\rightarrow \\{False, True\\}`
70
+
71
+ Examples
72
+ --------
73
+ >>> from onekit import pytlz
74
+ >>> pytlz.are_predicates_true(all, lambda x: x % 2 == 0, lambda x: x % 5 == 0)(10)
75
+ True
76
+
77
+ >>> pytlz.are_predicates_true(all, lambda x: x % 2 == 0, lambda x: x % 5 == 0)(12)
78
+ False
79
+
80
+ >>> pytlz.are_predicates_true(any, lambda x: x % 2 == 0, lambda x: x % 5 == 0)(12)
81
+ True
82
+
83
+ >>> pytlz.are_predicates_true(any, lambda x: x % 2 == 0, lambda x: x % 5 == 0)(13)
84
+ False
85
+
86
+ >>> is_divisible_by_3_and_5 = pytlz.are_predicates_true(
87
+ ... all,
88
+ ... pytlz.isdivisibleby(3),
89
+ ... pytlz.isdivisibleby(5),
90
+ ... )
91
+ >>> type(is_divisible_by_3_and_5)
92
+ <class 'function'>
93
+ >>> is_divisible_by_3_and_5(60)
94
+ True
95
+ >>> is_divisible_by_3_and_5(9)
96
+ False
97
+
98
+ >>> is_divisible_by_3_or_5 = pytlz.are_predicates_true(
99
+ ... any,
100
+ ... pytlz.isdivisibleby(3),
101
+ ... pytlz.isdivisibleby(5),
102
+ ... )
103
+ >>> type(is_divisible_by_3_or_5)
104
+ <class 'function'>
105
+ >>> is_divisible_by_3_or_5(60)
106
+ True
107
+ >>> is_divisible_by_3_or_5(9)
108
+ True
109
+ >>> is_divisible_by_3_or_5(13)
110
+ False
111
+ """
112
+
113
+ def inner(x: Any, /) -> bool:
114
+ """Evaluate all specified predicates :math:`P_i` for value :math:`x \\in X`."""
115
+ return func(predicate(x) for predicate in flatten(predicates))
116
+
117
+ return inner
118
+
119
+
120
+ def check_random_state(seed: Seed = None, /) -> random.Random:
121
+ """Turn seed into random.Random instance.
122
+
123
+ Examples
124
+ --------
125
+ >>> import random
126
+ >>> from onekit import pytlz
127
+ >>> rng = pytlz.check_random_state()
128
+ >>> isinstance(rng, random.Random)
129
+ True
130
+ """
131
+ singleton_instance = getattr(random, "_inst")
132
+
133
+ if seed is None or seed is singleton_instance:
134
+ return singleton_instance
135
+
136
+ elif isinstance(seed, int):
137
+ return random.Random(seed)
138
+
139
+ elif isinstance(seed, random.Random):
140
+ return seed
141
+
142
+ else:
143
+ raise ValueError(f"{seed=} - cannot be used to seed Random instance")
144
+
145
+
146
+ def coinflip(bias: float, /, *, seed: Seed = None) -> bool:
147
+ """Flip coin with adjustable bias.
148
+
149
+ Examples
150
+ --------
151
+ >>> from functools import partial
152
+ >>> from onekit import pytlz
153
+ >>> {pytlz.coinflip(0.5) for _ in range(30)} == {True, False}
154
+ True
155
+
156
+ >>> fair_coin = partial(pytlz.coinflip, 0.5)
157
+ >>> type(fair_coin)
158
+ <class 'functools.partial'>
159
+ >>> # fix coinflip outcome
160
+ >>> fair_coin(seed=1) # doctest: +SKIP
161
+ True
162
+ >>> # fix sequence of coinflip outcomes
163
+ >>> rng = pytlz.check_random_state(2)
164
+ >>> [fair_coin(seed=rng) for _ in range(6)] # doctest: +SKIP
165
+ [False, False, True, True, False, False]
166
+
167
+ >>> biased_coin = partial(pytlz.coinflip, 0.6, seed=pytlz.check_random_state(3))
168
+ >>> type(biased_coin)
169
+ <class 'functools.partial'>
170
+ >>> [biased_coin() for _ in range(6)] # doctest: +SKIP
171
+ [True, True, True, False, False, True]
172
+ """
173
+ if not (0 <= bias <= 1):
174
+ raise ValueError(f"{bias=} - must be a float in [0, 1]")
175
+
176
+ rng = check_random_state(seed)
177
+
178
+ return rng.random() < bias
179
+
180
+
181
+ def collatz(n: int, /) -> Generator:
182
+ """Generate a Collatz sequence.
183
+
184
+ The famous 3n + 1 conjecture [c1]_ [c2]_. Given a positive integer :math:`n > 0`,
185
+ the next term in the Collatz sequence is half of :math:`n`
186
+ if :math:`n` is even; otherwise, if :math:`n` is odd,
187
+ the next term is 3 times :math:`n` plus 1.
188
+ Symbolically,
189
+
190
+ .. math::
191
+
192
+ f(n) =
193
+ \\begin{cases}
194
+ n / 2 & \\text{ if } n \\equiv 0 \\text{ (mod 2) } \\\\[6pt]
195
+ 3n + 1 & \\text{ if } n \\equiv 1 \\text{ (mod 2) }
196
+ \\end{cases}
197
+
198
+ The Collatz conjecture is that the sequence always reaches 1
199
+ for any positive integer :math:`n`.
200
+
201
+ Parameters
202
+ ----------
203
+ n : int
204
+ A positive integer seeding the Collatz sequence.
205
+
206
+ Yields
207
+ ------
208
+ int
209
+ A generator of Collatz numbers that breaks when 1 is reached.
210
+
211
+ Raises
212
+ ------
213
+ ValueError
214
+ If ``n`` is not a positive integer.
215
+
216
+ References
217
+ ----------
218
+ .. [c1] "Collatz", The On-Line Encyclopedia of Integer Sequences®,
219
+ https://oeis.org/A006370
220
+ .. [c2] "Collatz conjecture", Wikipedia,
221
+ https://en.wikipedia.org/wiki/Collatz_conjecture
222
+
223
+ Examples
224
+ --------
225
+ >>> import toolz
226
+ >>> from onekit import pytlz
227
+ >>> n = 12
228
+ >>> list(pytlz.collatz(n))
229
+ [12, 6, 3, 10, 5, 16, 8, 4, 2, 1]
230
+ >>> toolz.count(pytlz.collatz(n))
231
+ 10
232
+ """
233
+ if not isinstance(n, int) or n < 1:
234
+ raise ValueError(f"{n=} - must be a positive integer")
235
+
236
+ while True:
237
+ yield n
238
+
239
+ if n == 1:
240
+ break
241
+
242
+ # update
243
+ n = n // 2 if iseven(n) else 3 * n + 1
244
+
245
+
246
+ def concat_strings(sep: str, /, *strings: Iterable[str]) -> str:
247
+ """Concatenate strings.
248
+
249
+ Examples
250
+ --------
251
+ >>> from functools import partial
252
+ >>> from onekit import pytlz
253
+ >>> pytlz.concat_strings(" ", "Hello", "World")
254
+ 'Hello World'
255
+ >>> pytlz.concat_strings(" ", ["Hello", "World"])
256
+ 'Hello World'
257
+
258
+ >>> plus_concat = partial(pytlz.concat_strings, " + ")
259
+ >>> plus_concat("Hello", "World")
260
+ 'Hello + World'
261
+ >>> plus_concat(["Hello", "World"])
262
+ 'Hello + World'
263
+ """
264
+ return sep.join(toolz.pipe(strings, flatten, map(str)))
265
+
266
+
267
+ def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
268
+ """Contrast sets.
269
+
270
+ Examples
271
+ --------
272
+ >>> from onekit import pytlz
273
+ >>> a = {"a", "c", "b", "g", "h", "i", "j", "k"}
274
+ >>> b = {"c", "d", "e", "f", "g", "p", "q"}
275
+ >>> summary = pytlz.contrast_sets(a, b)
276
+ >>> isinstance(summary, dict)
277
+ True
278
+ >>> summary["x"] == a
279
+ True
280
+ >>> summary["y"] == b
281
+ True
282
+ >>> summary["x | y"] == a.union(b)
283
+ True
284
+ >>> summary["x & y"] == a.intersection(b)
285
+ True
286
+ >>> summary["x - y"] == a.difference(b)
287
+ True
288
+ >>> summary["y - x"] == b.difference(a)
289
+ True
290
+ >>> summary["x ^ y"] == a.symmetric_difference(b)
291
+ True
292
+ >>> print(summary["report"])
293
+ x (n= 8): {'a', 'b', 'c', ...}
294
+ y (n= 7): {'c', 'd', 'e', ...}
295
+ x | y (n=13): {'a', 'b', 'c', ...}
296
+ x & y (n= 2): {'c', 'g'}
297
+ x - y (n= 6): {'a', 'b', 'h', ...}
298
+ y - x (n= 5): {'d', 'e', 'f', ...}
299
+ x ^ y (n=11): {'a', 'b', 'd', ...}
300
+ jaccard = 0.153846
301
+ overlap = 0.285714
302
+ dice = 0.266667
303
+ disjoint?: False
304
+ x == y: False
305
+ x <= y: False
306
+ x < y: False
307
+ y <= x: False
308
+ y < x: False
309
+ """
310
+ x, y = set(x), set(y)
311
+ union = x.union(y)
312
+ intersection = x.intersection(y)
313
+ in_x_but_not_y = x.difference(y)
314
+ in_y_but_not_x = y.difference(x)
315
+ symmetric_diff = x ^ y
316
+ jaccard = len(intersection) / len(union)
317
+ overlap = len(intersection) / min(len(x), len(y))
318
+ dice = 2 * len(intersection) / (len(x) + len(y))
319
+
320
+ output = {
321
+ "x": x,
322
+ "y": y,
323
+ "x | y": union,
324
+ "x & y": intersection,
325
+ "x - y": in_x_but_not_y,
326
+ "y - x": in_y_but_not_x,
327
+ "x ^ y": symmetric_diff,
328
+ "jaccard": jaccard,
329
+ "overlap": overlap,
330
+ "dice": dice,
331
+ }
332
+
333
+ max_set_size = max(
334
+ len(num_to_str(len(v))) for v in output.values() if isinstance(v, set)
335
+ )
336
+
337
+ lines = []
338
+ for k, v in output.items():
339
+ if isinstance(v, set):
340
+ elements = f"{sorted(v)[:n]}".replace("[", "{")
341
+ elements = (
342
+ elements.replace("]", ", ...}")
343
+ if len(v) > n
344
+ else elements.replace("]", "}")
345
+ )
346
+ elements = elements.replace(",", "") if len(v) == 1 else elements
347
+
348
+ set_size = num_to_str(len(v)).rjust(max_set_size)
349
+ desc = f"{k} (n={set_size})"
350
+
351
+ if k in ["x", "y"]:
352
+ desc = f" {desc}"
353
+ msg = f"{desc}: {elements}"
354
+ lines.append(msg)
355
+
356
+ else:
357
+ lines.append(f"{k} = {v:g}")
358
+
359
+ tmp = {
360
+ "disjoint?": x.isdisjoint(y),
361
+ "x == y": x == y,
362
+ "x <= y": x <= y,
363
+ "x < y": x < y,
364
+ "y <= x": y <= x,
365
+ "y < x": y < x,
366
+ }
367
+
368
+ for k, v in tmp.items():
369
+ lines.append(f"{k}: {v}")
370
+
371
+ output.update(tmp)
372
+ output["report"] = "\n".join(lines)
373
+
374
+ return output
375
+
376
+
377
+ def create_path(*strings: Iterable[str]) -> str:
378
+ """Create path by concatenating strings.
379
+
380
+ Examples
381
+ --------
382
+ >>> from onekit import pytlz
383
+ >>> pytlz.create_path("path", "to", "file")
384
+ 'path/to/file'
385
+
386
+ >>> pytlz.create_path(["hdfs://", "path", "to", "file"])
387
+ 'hdfs://path/to/file'
388
+ """
389
+ return functools.reduce(os.path.join, flatten(strings))
390
+
391
+
392
+ def date_to_str(d: dt.date, /) -> str:
393
+ """Cast date to string in ISO format: YYYY-MM-DD.
394
+
395
+ Examples
396
+ --------
397
+ >>> import datetime as dt
398
+ >>> from onekit import pytlz
399
+ >>> pytlz.date_to_str(dt.date(2022, 1, 1))
400
+ '2022-01-01'
401
+ """
402
+ return d.isoformat()
403
+
404
+
405
+ def extend_range(xmin: float, xmax: float, /, *, factor: float = 0.05) -> Pair:
406
+ """Extend value range ``xmax - xmin`` by factor.
407
+
408
+ Examples
409
+ --------
410
+ >>> from onekit import pytlz
411
+ >>> pytlz.extend_range(0.0, 1.0)
412
+ (-0.05, 1.05)
413
+
414
+ >>> pytlz.extend_range(0.0, 1.0, factor=0.1)
415
+ (-0.1, 1.1)
416
+ """
417
+ if not isinstance(factor, float) or factor < 0:
418
+ raise ValueError(f"{factor=} - must be a non-negative float")
419
+
420
+ xmin, xmax = sorted([xmin, xmax])
421
+ value_range = xmax - xmin
422
+
423
+ new_xmin = xmin - factor * value_range
424
+ new_xmax = xmax + factor * value_range
425
+
426
+ return new_xmin, new_xmax
427
+
428
+
429
+ def fibonacci() -> Generator:
430
+ """Generate the Fibonacci sequence.
431
+
432
+ For :math:`n > 1`, Fibonacci numbers may be defined by [f1]_ [f2]_:
433
+
434
+ .. math::
435
+
436
+ F(n) = F(n-1) + F(n-2) \\text{ with } F(0) = 0 \\text{ and } F(1) = 1.
437
+
438
+ As such, the sequence starts as follows:
439
+
440
+ .. math::
441
+
442
+ 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, \\dots
443
+
444
+ Yields
445
+ ------
446
+ int
447
+ A generator of consecutive Fibonacci numbers.
448
+
449
+ References
450
+ ----------
451
+ .. [f1] "Fibonacci numbers", The On-Line Encyclopedia of Integer Sequences®,
452
+ https://oeis.org/A000045
453
+ .. [f2] "Fibonacci number", Wikipedia,
454
+ https://en.wikipedia.org/wiki/Fibonacci_number
455
+
456
+ Examples
457
+ --------
458
+ >>> import toolz
459
+ >>> from onekit import pytlz
460
+ >>> list(toolz.take(13, pytlz.fibonacci()))
461
+ [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144]
462
+ """
463
+ lag2, lag1 = 0, 1
464
+ yield lag2
465
+ yield lag1
466
+
467
+ while True:
468
+ lag0 = lag2 + lag1
469
+ yield lag0
470
+ lag2, lag1 = lag1, lag0
471
+
472
+
473
+ def filter_regex(
474
+ pattern: str,
475
+ /,
476
+ *strings: Iterable[str],
477
+ flags=re.IGNORECASE,
478
+ ) -> Generator:
479
+ """Filter iterable of strings with regex.
480
+
481
+ Examples
482
+ --------
483
+ >>> from functools import partial
484
+ >>> from onekit import pytlz
485
+ >>> list(pytlz.filter_regex("hello", "Hello, World!", "Hi, there!", "Hello!"))
486
+ ['Hello, World!', 'Hello!']
487
+
488
+ >>> strings = [
489
+ ... "Guiding principles for Python's design: The Zen of Python",
490
+ ... "Beautiful is better than ugly.",
491
+ ... "Explicit is better than implicit.",
492
+ ... "Simple is better than complex.",
493
+ ... ]
494
+ >>> list(pytlz.filter_regex("python", strings))
495
+ ["Guiding principles for Python's design: The Zen of Python"]
496
+
497
+ >>> filter_regex__hi = partial(pytlz.filter_regex, "hi")
498
+ >>> list(filter_regex__hi("Hello, World!", "Hi, there!", "Hello!"))
499
+ ['Hi, there!']
500
+ """
501
+ return filter(functools.partial(re.findall, pattern, flags=flags), flatten(strings))
502
+
503
+
504
+ def flatten(*items: Iterable[Any]) -> Generator:
505
+ """Flatten iterable of items.
506
+
507
+ Examples
508
+ --------
509
+ >>> from onekit import pytlz
510
+ >>> list(pytlz.flatten([[1, 2], *[3, 4], [5]]))
511
+ [1, 2, 3, 4, 5]
512
+
513
+ >>> list(pytlz.flatten([1, (2, 3)], 4, [], [[[5]], 6]))
514
+ [1, 2, 3, 4, 5, 6]
515
+
516
+ >>> list(pytlz.flatten(["one", 2], 3, [(4, "five")], [[["six"]]], "seven", []))
517
+ ['one', 2, 3, 4, 'five', 'six', 'seven']
518
+ """
519
+
520
+ def _flatten(items):
521
+ for item in items:
522
+ if isinstance(item, (Iterator, Sequence)) and not isinstance(item, str):
523
+ yield from _flatten(item)
524
+ else:
525
+ yield item
526
+
527
+ return _flatten(items)
528
+
529
+
530
+ def func_name() -> str:
531
+ """Get name of called function.
532
+
533
+ Examples
534
+ --------
535
+ >>> from onekit import pytlz
536
+ >>> def foobar():
537
+ ... return pytlz.func_name()
538
+ ...
539
+ >>> foobar()
540
+ 'foobar'
541
+ """
542
+ return inspect.stack()[1].function
543
+
544
+
545
+ def headline(text: str, /, *, n: int = 88, fillchar: str = "-") -> str:
546
+ """Create headline string.
547
+
548
+ Examples
549
+ --------
550
+ >>> from onekit import pytlz
551
+ >>> pytlz.headline("Hello, World!", n=30)
552
+ '------- Hello, World! --------'
553
+ """
554
+ return f" {text} ".center(n, fillchar)
555
+
556
+
557
+ def highlight_string_differences(lft_str: str, rgt_str: str, /) -> str:
558
+ """Highlight differences between two strings.
559
+
560
+ Examples
561
+ --------
562
+ >>> from onekit import pytlz
563
+ >>> print(pytlz.highlight_string_differences("hello", "hall"))
564
+ hello
565
+ | |
566
+ hall
567
+
568
+ >>> # no differences when there is no '|' character
569
+ >>> print(pytlz.highlight_string_differences("hello", "hello"))
570
+ hello
571
+ <BLANKLINE>
572
+ hello
573
+ """
574
+ return concat_strings(
575
+ os.linesep,
576
+ lft_str,
577
+ concat_strings(
578
+ "",
579
+ (
580
+ " " if x == y else "|"
581
+ for x, y in itertools.zip_longest(lft_str, rgt_str, fillvalue="")
582
+ ),
583
+ ),
584
+ rgt_str,
585
+ )
586
+
587
+
588
+ @toolz.curry
589
+ def isdivisibleby(n: int, x: Union[int, float], /) -> bool:
590
+ """Evaluate if :math:`x` is evenly divisible by :math:`n`.
591
+
592
+ Examples
593
+ --------
594
+ >>> from onekit import pytlz
595
+ >>> pytlz.isdivisibleby(7, 49)
596
+ True
597
+
598
+ >>> # function is curried
599
+ >>> pytlz.isdivisibleby(5)(10)
600
+ True
601
+ >>> is_divisible_by_5 = pytlz.isdivisibleby(5)
602
+ >>> is_divisible_by_5(10)
603
+ True
604
+ >>> is_divisible_by_5(11.0)
605
+ False
606
+ """
607
+ return x % n == 0
608
+
609
+
610
+ def iseven(x: Union[int, float], /) -> bool:
611
+ """Evaluate if :math:`x` is even.
612
+
613
+ Examples
614
+ --------
615
+ >>> from onekit import pytlz
616
+ >>> pytlz.iseven(0)
617
+ True
618
+
619
+ >>> pytlz.iseven(1)
620
+ False
621
+
622
+ >>> pytlz.iseven(2)
623
+ True
624
+ """
625
+ return isdivisibleby(2)(x)
626
+
627
+
628
+ def isodd(x: Union[int, float], /) -> bool:
629
+ """Evaluate if :math:`x` is odd.
630
+
631
+ Examples
632
+ --------
633
+ >>> from onekit import pytlz
634
+ >>> pytlz.isodd(0)
635
+ False
636
+
637
+ >>> pytlz.isodd(1)
638
+ True
639
+
640
+ >>> pytlz.isodd(2)
641
+ False
642
+ """
643
+ return toolz.complement(iseven)(x)
644
+
645
+
646
+ def map_regex(
647
+ pattern: str,
648
+ /,
649
+ *strings: Iterable[str],
650
+ flags=re.IGNORECASE,
651
+ ) -> Generator:
652
+ """Match regex to iterable of strings.
653
+
654
+ Examples
655
+ --------
656
+ >>> from functools import partial
657
+ >>> from onekit import pytlz
658
+ >>> list(pytlz.map_regex("hello", "Hello, World!", "Hi, there!", "Hello!"))
659
+ [['Hello'], [], ['Hello']]
660
+
661
+ >>> strings = [
662
+ ... "Guiding principles for Python's design: The Zen of Python",
663
+ ... "Beautiful is better than ugly.",
664
+ ... "Explicit is better than implicit.",
665
+ ... "Simple is better than complex.",
666
+ ... ]
667
+ >>> list(pytlz.map_regex("python", strings))
668
+ [['Python', 'Python'], [], [], []]
669
+
670
+ >>> map_regex__hi = partial(pytlz.map_regex, "hi")
671
+ >>> list(map_regex__hi("Hello, World!", "Hi, there!", "Hello!"))
672
+ [[], ['Hi'], []]
673
+ """
674
+ return map(functools.partial(re.findall, pattern, flags=flags), flatten(strings))
675
+
676
+
677
+ def num_to_str(x: Union[int, float], /) -> str:
678
+ """Cast number to string with underscores as thousands separator.
679
+
680
+ Examples
681
+ --------
682
+ >>> from onekit import pytlz
683
+ >>> pytlz.num_to_str(1000000)
684
+ '1_000_000'
685
+
686
+ >>> pytlz.num_to_str(100000.0)
687
+ '100_000.0'
688
+ """
689
+ return f"{x:_}"
690
+
691
+
692
+ @toolz.curry
693
+ def reduce_sets(func: Callable[[set, set], set], /, *sets: Iterable[set]) -> set:
694
+ """Apply function of two set arguments to reduce iterable of sets.
695
+
696
+ Examples
697
+ --------
698
+ >>> from onekit import pytlz
699
+ >>> x = {0, 1, 2, 3}
700
+ >>> y = {2, 4, 6}
701
+ >>> z = {2, 6, 8}
702
+ >>> pytlz.reduce_sets(set.intersection, x, y, z)
703
+ {2}
704
+ >>> sets = [x, y, z]
705
+ >>> pytlz.reduce_sets(set.symmetric_difference, sets)
706
+ {0, 1, 2, 3, 4, 8}
707
+ >>> pytlz.reduce_sets(set.difference, *sets)
708
+ {0, 1, 3}
709
+
710
+ >>> # function is curried
711
+ >>> pytlz.reduce_sets(set.union)(*sets)
712
+ {0, 1, 2, 3, 4, 6, 8}
713
+ >>> pytlz.reduce_sets(set.union)(sets)
714
+ {0, 1, 2, 3, 4, 6, 8}
715
+ >>> union_sets = pytlz.reduce_sets(set.union)
716
+ >>> union_sets(x, y, z)
717
+ {0, 1, 2, 3, 4, 6, 8}
718
+ """
719
+ return toolz.pipe(sets, flatten, map(set), reduce(func))
720
+
721
+
722
+ def remove_punctuation(text: str, /) -> str:
723
+ """Remove punctuation from text string.
724
+
725
+ Examples
726
+ --------
727
+ >>> from onekit import pytlz
728
+ >>> pytlz.remove_punctuation("I think, therefore I am. --Descartes")
729
+ 'I think therefore I am Descartes'
730
+ """
731
+ return text.translate(str.maketrans("", "", string.punctuation))
732
+
733
+
734
+ def signif(x: Union[int, float], /, *, n: int = 3) -> Union[int, float]:
735
+ """Round :math:`x` to its :math:`n` significant digits.
736
+
737
+ Examples
738
+ --------
739
+ >>> from onekit import pytlz
740
+ >>> pytlz.signif(987654321)
741
+ 988000000
742
+
743
+ >>> pytlz.signif(14393237.76, n=2)
744
+ 14000000.0
745
+
746
+ >>> pytlz.signif(14393237.76, n=3)
747
+ 14400000.0
748
+ """
749
+ if not isinstance(n, int) or n < 1:
750
+ raise ValueError(f"{n=} - must be a positive integer")
751
+
752
+ if not math.isfinite(x) or math.isclose(x, 0.0):
753
+ return x
754
+
755
+ n -= math.ceil(math.log10(abs(x)))
756
+ return round(x, n)
757
+
758
+
759
+ def source_code(x: object, /) -> str:
760
+ """Get source code of an object :math:`x`.
761
+
762
+ Examples
763
+ --------
764
+ >>> from onekit import pytlz
765
+ >>> def greet():
766
+ ... return "Hello, World!"
767
+ ...
768
+ >>> print(pytlz.source_code(greet))
769
+ def greet():
770
+ return "Hello, World!"
771
+ <BLANKLINE>
772
+ """
773
+ return inspect.getsource(x)
@@ -1,7 +1,7 @@
1
1
  """PySpark toolz."""
2
2
 
3
3
  import functools
4
- from typing import Sequence
4
+ from typing import Iterable
5
5
 
6
6
  from pyspark.sql import DataFrame as SparkDF
7
7
 
@@ -10,8 +10,8 @@ from onekit import pytlz
10
10
  __all__ = ("union",)
11
11
 
12
12
 
13
- def union(*dataframes: Sequence[SparkDF]) -> SparkDF:
14
- """Union sequence of Spark dataframes by name.
13
+ def union(*dataframes: Iterable[SparkDF]) -> SparkDF:
14
+ """Union iterable of Spark dataframes by name.
15
15
 
16
16
  Examples
17
17
  --------
@@ -1,442 +0,0 @@
1
- """Python toolz."""
2
-
3
- import datetime as dt
4
- import inspect
5
- import math
6
- from typing import (
7
- Any,
8
- Callable,
9
- Generator,
10
- Iterator,
11
- Sequence,
12
- Tuple,
13
- Union,
14
- )
15
-
16
- import toolz
17
- from toolz.curried import (
18
- map,
19
- reduce,
20
- )
21
-
22
- __all__ = (
23
- "all_predicate_true",
24
- "any_predicate_true",
25
- "contrast_sets",
26
- "date_to_str",
27
- "extend_range",
28
- "flatten",
29
- "func_name",
30
- "isdivisibleby",
31
- "iseven",
32
- "isodd",
33
- "num_to_str",
34
- "reduce_sets",
35
- "signif",
36
- "source_code",
37
- )
38
-
39
-
40
- Pair = Tuple[float, float]
41
- Predicate = Callable[[Any], bool]
42
-
43
-
44
- def all_predicate_true(*predicates: Sequence[Predicate]) -> Predicate:
45
- """Check every predicate :math:`P\\colon X \\rightarrow \\{False, True\\}` is true.
46
-
47
- Examples
48
- --------
49
- >>> from onekit import pytlz
50
- >>> pytlz.all_predicate_true(lambda x: x % 2 == 0, lambda x: x % 5 == 0)(10)
51
- True
52
-
53
- >>> is_divisible_by_3_and_5 = pytlz.all_predicate_true(
54
- ... pytlz.isdivisibleby(3),
55
- ... pytlz.isdivisibleby(5),
56
- ... )
57
- >>> type(is_divisible_by_3_and_5)
58
- <class 'function'>
59
- >>> is_divisible_by_3_and_5(60)
60
- True
61
- >>> is_divisible_by_3_and_5(9)
62
- False
63
- """
64
-
65
- def inner(x: Any, /) -> bool:
66
- """Evaluate all specified predicates :math:`P_i` for value :math:`x \\in X`."""
67
- return all(predicate(x) for predicate in flatten(predicates))
68
-
69
- return inner
70
-
71
-
72
- def any_predicate_true(*predicates: Sequence[Predicate]) -> Predicate:
73
- """Check any predicate :math:`P\\colon X \\rightarrow \\{False, True\\}` is true.
74
-
75
- Examples
76
- --------
77
- >>> from onekit import pytlz
78
- >>> pytlz.any_predicate_true(lambda x: x % 2 == 0, lambda x: x % 5 == 0)(10)
79
- True
80
-
81
- >>> is_divisible_by_3_or_5 = pytlz.any_predicate_true(
82
- ... pytlz.isdivisibleby(3),
83
- ... pytlz.isdivisibleby(5),
84
- ... )
85
- >>> type(is_divisible_by_3_or_5)
86
- <class 'function'>
87
- >>> is_divisible_by_3_or_5(60)
88
- True
89
- >>> is_divisible_by_3_or_5(9)
90
- True
91
- >>> is_divisible_by_3_or_5(13)
92
- False
93
- """
94
-
95
- def inner(x: Any, /) -> bool:
96
- """Evaluate all specified predicates :math:`P_i` for value :math:`x \\in X`."""
97
- return any(predicate(x) for predicate in flatten(predicates))
98
-
99
- return inner
100
-
101
-
102
- def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
103
- """Contrast sets.
104
-
105
- Examples
106
- --------
107
- >>> from onekit import pytlz
108
- >>> a = {"a", "c", "b", "g", "h", "i", "j", "k"}
109
- >>> b = {"c", "d", "e", "f", "g", "p", "q"}
110
- >>> summary = pytlz.contrast_sets(a, b)
111
- >>> isinstance(summary, dict)
112
- True
113
- >>> summary["x"] == a
114
- True
115
- >>> summary["y"] == b
116
- True
117
- >>> summary["x | y"] == a.union(b)
118
- True
119
- >>> summary["x & y"] == a.intersection(b)
120
- True
121
- >>> summary["x - y"] == a.difference(b)
122
- True
123
- >>> summary["y - x"] == b.difference(a)
124
- True
125
- >>> summary["x ^ y"] == a.symmetric_difference(b)
126
- True
127
- >>> print(summary["report"])
128
- x (n= 8): {'a', 'b', 'c', ...}
129
- y (n= 7): {'c', 'd', 'e', ...}
130
- x | y (n=13): {'a', 'b', 'c', ...}
131
- x & y (n= 2): {'c', 'g'}
132
- x - y (n= 6): {'a', 'b', 'h', ...}
133
- y - x (n= 5): {'d', 'e', 'f', ...}
134
- x ^ y (n=11): {'a', 'b', 'd', ...}
135
- jaccard = 0.153846
136
- overlap = 0.285714
137
- dice = 0.266667
138
- disjoint?: False
139
- x == y: False
140
- x <= y: False
141
- x < y: False
142
- y <= x: False
143
- y < x: False
144
- """
145
- x, y = set(x), set(y)
146
- union = x.union(y)
147
- intersection = x.intersection(y)
148
- in_x_but_not_y = x.difference(y)
149
- in_y_but_not_x = y.difference(x)
150
- symmetric_diff = x ^ y
151
- jaccard = len(intersection) / len(union)
152
- overlap = len(intersection) / min(len(x), len(y))
153
- dice = 2 * len(intersection) / (len(x) + len(y))
154
-
155
- output = {
156
- "x": x,
157
- "y": y,
158
- "x | y": union,
159
- "x & y": intersection,
160
- "x - y": in_x_but_not_y,
161
- "y - x": in_y_but_not_x,
162
- "x ^ y": symmetric_diff,
163
- "jaccard": jaccard,
164
- "overlap": overlap,
165
- "dice": dice,
166
- }
167
-
168
- max_set_size = max(
169
- len(num_to_str(len(v))) for v in output.values() if isinstance(v, set)
170
- )
171
-
172
- lines = []
173
- for k, v in output.items():
174
- if isinstance(v, set):
175
- elements = f"{sorted(v)[:n]}".replace("[", "{")
176
- elements = (
177
- elements.replace("]", ", ...}")
178
- if len(v) > n
179
- else elements.replace("]", "}")
180
- )
181
- elements = elements.replace(",", "") if len(v) == 1 else elements
182
-
183
- set_size = num_to_str(len(v)).rjust(max_set_size)
184
- desc = f"{k} (n={set_size})"
185
-
186
- if k in ["x", "y"]:
187
- desc = f" {desc}"
188
- msg = f"{desc}: {elements}"
189
- lines.append(msg)
190
-
191
- else:
192
- lines.append(f"{k} = {v:g}")
193
-
194
- tmp = {
195
- "disjoint?": x.isdisjoint(y),
196
- "x == y": x == y,
197
- "x <= y": x <= y,
198
- "x < y": x < y,
199
- "y <= x": y <= x,
200
- "y < x": y < x,
201
- }
202
-
203
- for k, v in tmp.items():
204
- lines.append(f"{k}: {v}")
205
-
206
- output.update(tmp)
207
- output["report"] = "\n".join(lines)
208
-
209
- return output
210
-
211
-
212
- def date_to_str(d: dt.date, /) -> str:
213
- """Cast date to string in ISO format: YYYY-MM-DD.
214
-
215
- Examples
216
- --------
217
- >>> import datetime as dt
218
- >>> from onekit import pytlz
219
- >>> pytlz.date_to_str(dt.date(2022, 1, 1))
220
- '2022-01-01'
221
- """
222
- return d.isoformat()
223
-
224
-
225
- @toolz.curry
226
- def extend_range(xmin: float, xmax: float, /, *, factor: float = 0.05) -> Pair:
227
- """Extend value range ``xmax - xmin`` by factor.
228
-
229
- Examples
230
- --------
231
- >>> from onekit import pytlz
232
- >>> pytlz.extend_range(0.0, 1.0)
233
- (-0.05, 1.05)
234
-
235
- >>> pytlz.extend_range(0.0, 1.0, factor=0.1)
236
- (-0.1, 1.1)
237
-
238
- >>> extend_range = pytlz.extend_range(factor=0.2)
239
- >>> type(extend_range)
240
- <class 'toolz.functoolz.curry'>
241
- >>> extend_range(0.0, 1.0)
242
- (-0.2, 1.2)
243
- """
244
- if not isinstance(factor, float) or factor < 0:
245
- raise ValueError(f"{factor=} - must be a non-negative float")
246
-
247
- xmin, xmax = sorted([xmin, xmax])
248
- value_range = xmax - xmin
249
-
250
- new_xmin = xmin - factor * value_range
251
- new_xmax = xmax + factor * value_range
252
-
253
- return new_xmin, new_xmax
254
-
255
-
256
- def flatten(*items: Sequence[Any]) -> Generator:
257
- """Flatten sequence of items.
258
-
259
- Examples
260
- --------
261
- >>> from onekit import pytlz
262
- >>> list(pytlz.flatten([[1, 2], *[3, 4], [5]]))
263
- [1, 2, 3, 4, 5]
264
-
265
- >>> list(pytlz.flatten([1, (2, 3)], 4, [], [[[5]], 6]))
266
- [1, 2, 3, 4, 5, 6]
267
-
268
- >>> list(pytlz.flatten(["one", 2], 3, [(4, "five")], [[["six"]]], "seven", []))
269
- ['one', 2, 3, 4, 'five', 'six', 'seven']
270
- """
271
-
272
- def _flatten(items):
273
- for item in items:
274
- if isinstance(item, (Iterator, Sequence)) and not isinstance(item, str):
275
- yield from _flatten(item)
276
- else:
277
- yield item
278
-
279
- return _flatten(items)
280
-
281
-
282
- def func_name() -> str:
283
- """Get name of called function.
284
-
285
- Examples
286
- --------
287
- >>> from onekit import pytlz
288
- >>> def foobar():
289
- ... return pytlz.func_name()
290
- ...
291
- >>> foobar()
292
- 'foobar'
293
- """
294
- return inspect.stack()[1].function
295
-
296
-
297
- @toolz.curry
298
- def isdivisibleby(n: int, x: Union[int, float], /) -> bool:
299
- """Check if :math:`x` is evenly divisible by :math:`n`.
300
-
301
- Examples
302
- --------
303
- >>> from onekit import pytlz
304
- >>> pytlz.isdivisibleby(7, 49)
305
- True
306
-
307
- >>> is_divisible_by_5 = pytlz.isdivisibleby(5)
308
- >>> type(is_divisible_by_5)
309
- <class 'toolz.functoolz.curry'>
310
- >>> is_divisible_by_5(10)
311
- True
312
- >>> is_divisible_by_5(11.0)
313
- False
314
- """
315
- return x % n == 0
316
-
317
-
318
- def iseven(x: Union[int, float], /) -> bool:
319
- """Check if :math:`x` is even.
320
-
321
- Examples
322
- --------
323
- >>> from onekit import pytlz
324
- >>> pytlz.iseven(0)
325
- True
326
-
327
- >>> pytlz.iseven(1)
328
- False
329
-
330
- >>> pytlz.iseven(2)
331
- True
332
- """
333
- return isdivisibleby(2)(x)
334
-
335
-
336
- def isodd(x: Union[int, float], /) -> bool:
337
- """Check if :math:`x` is odd.
338
-
339
- Examples
340
- --------
341
- >>> from onekit import pytlz
342
- >>> pytlz.isodd(0)
343
- False
344
-
345
- >>> pytlz.isodd(1)
346
- True
347
-
348
- >>> pytlz.isodd(2)
349
- False
350
- """
351
- return toolz.complement(iseven)(x)
352
-
353
-
354
- def num_to_str(x: Union[int, float], /) -> str:
355
- """Cast number to string with underscores as thousands separator.
356
-
357
- Examples
358
- --------
359
- >>> from onekit import pytlz
360
- >>> pytlz.num_to_str(1000000)
361
- '1_000_000'
362
-
363
- >>> pytlz.num_to_str(100000.0)
364
- '100_000.0'
365
- """
366
- return f"{x:_}"
367
-
368
-
369
- @toolz.curry
370
- def reduce_sets(func: Callable[[set, set], set], /, *sets: Sequence[set]) -> set:
371
- """Apply function of two set arguments to reduce a sequence of sets.
372
-
373
- Examples
374
- --------
375
- >>> from onekit import pytlz
376
- >>> x = {0, 1, 2, 3}
377
- >>> y = {2, 4, 6}
378
- >>> z = {2, 6, 8}
379
- >>> sets = [x, y, z]
380
- >>> pytlz.reduce_sets(set.intersection, sets)
381
- {2}
382
- >>> pytlz.reduce_sets(set.difference, *sets)
383
- {0, 1, 3}
384
- >>> pytlz.reduce_sets(set.symmetric_difference, sets)
385
- {0, 1, 2, 3, 4, 8}
386
-
387
- >>> set_union = pytlz.reduce_sets(set.union)
388
- >>> type(set_union)
389
- <class 'toolz.functoolz.curry'>
390
- >>> set_union(x, y, z)
391
- {0, 1, 2, 3, 4, 6, 8}
392
- """
393
- return toolz.pipe(sets, flatten, map(set), reduce(func))
394
-
395
-
396
- @toolz.curry
397
- def signif(x: Union[int, float], /, *, n: int = 3) -> Union[int, float]:
398
- """Round :math:`x` to its :math:`n` significant digits.
399
-
400
- Examples
401
- --------
402
- >>> from onekit import pytlz
403
- >>> pytlz.signif(987654321)
404
- 988000000
405
-
406
- >>> pytlz.signif(14393237.76, n=2)
407
- 14000000.0
408
-
409
- >>> pytlz.signif(14393237.76, n=3)
410
- 14400000.0
411
-
412
- >>> signif3 = pytlz.signif(n=3)
413
- >>> type(signif3)
414
- <class 'toolz.functoolz.curry'>
415
- >>> signif3(14393237.76)
416
- 14400000.0
417
- """
418
- if not isinstance(n, int) or n < 1:
419
- raise ValueError(f"{n=} - must be a positive integer")
420
-
421
- if not math.isfinite(x) or math.isclose(x, 0.0):
422
- return x
423
-
424
- n -= math.ceil(math.log10(abs(x)))
425
- return round(x, n)
426
-
427
-
428
- def source_code(x: object, /) -> str:
429
- """Get source code of an object.
430
-
431
- Examples
432
- --------
433
- >>> from onekit import pytlz
434
- >>> def greet():
435
- ... return "Hello, World!"
436
- ...
437
- >>> print(pytlz.source_code(greet))
438
- def greet():
439
- return "Hello, World!"
440
- <BLANKLINE>
441
- """
442
- return inspect.getsource(x)
File without changes
File without changes
File without changes