lawkit-python 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lawkit/lawkit.py ADDED
@@ -0,0 +1,629 @@
1
+ """
2
+ Main lawkit wrapper implementation
3
+ """
4
+
5
+ import json
6
+ import subprocess
7
+ import tempfile
8
+ import os
9
+ import platform
10
+ from pathlib import Path
11
+ from typing import Union, List, Dict, Any, Optional, Literal
12
+ from dataclasses import dataclass
13
+
14
+
15
+ # Type definitions
16
+ Format = Literal["text", "json", "csv", "yaml", "toml", "xml"]
17
+ OutputFormat = Literal["text", "json", "csv", "yaml", "toml", "xml"]
18
+ LawType = Literal["benf", "pareto", "zipf", "normal", "poisson"]
19
+
20
+
21
+ @dataclass
22
+ class LawkitOptions:
23
+ """Options for lawkit operations"""
24
+ format: Optional[Format] = None
25
+ output: Optional[OutputFormat] = None
26
+ min_count: Optional[int] = None
27
+ threshold: Optional[float] = None
28
+ confidence: Optional[float] = None
29
+ verbose: bool = False
30
+ optimize: bool = False
31
+ international: bool = False
32
+ # Law-specific options
33
+ gini_coefficient: bool = False
34
+ percentiles: Optional[str] = None
35
+ business_analysis: bool = False
36
+ # Statistical options
37
+ test_type: Optional[str] = None
38
+ alpha: Optional[float] = None
39
+ # Advanced options
40
+ outlier_detection: bool = False
41
+ time_series: bool = False
42
+ parallel: bool = False
43
+ memory_efficient: bool = False
44
+
45
+
46
+ class LawkitResult:
47
+ """Result of a lawkit analysis operation"""
48
+ def __init__(self, data: Dict[str, Any], law_type: str):
49
+ self.data = data
50
+ self.law_type = law_type
51
+
52
+ @property
53
+ def risk_level(self) -> Optional[str]:
54
+ """Get risk level if present"""
55
+ return self.data.get("risk_level")
56
+
57
+ @property
58
+ def p_value(self) -> Optional[float]:
59
+ """Get p-value if present"""
60
+ return self.data.get("p_value")
61
+
62
+ @property
63
+ def chi_square(self) -> Optional[float]:
64
+ """Get chi-square statistic if present"""
65
+ return self.data.get("chi_square")
66
+
67
+ @property
68
+ def mad(self) -> Optional[float]:
69
+ """Get Mean Absolute Deviation if present"""
70
+ return self.data.get("mad")
71
+
72
+ @property
73
+ def gini_coefficient(self) -> Optional[float]:
74
+ """Get Gini coefficient if present"""
75
+ return self.data.get("gini_coefficient")
76
+
77
+ @property
78
+ def concentration_80_20(self) -> Optional[float]:
79
+ """Get 80/20 concentration if present"""
80
+ return self.data.get("concentration_80_20")
81
+
82
+ @property
83
+ def exponent(self) -> Optional[float]:
84
+ """Get Zipf exponent if present"""
85
+ return self.data.get("exponent")
86
+
87
+ @property
88
+ def lambda_estimate(self) -> Optional[float]:
89
+ """Get lambda estimate for Poisson distribution if present"""
90
+ return self.data.get("lambda")
91
+
92
+ @property
93
+ def mean(self) -> Optional[float]:
94
+ """Get mean if present"""
95
+ return self.data.get("mean")
96
+
97
+ @property
98
+ def std_dev(self) -> Optional[float]:
99
+ """Get standard deviation if present"""
100
+ return self.data.get("std_dev")
101
+
102
+ @property
103
+ def outliers(self) -> Optional[List[Any]]:
104
+ """Get outliers if present"""
105
+ return self.data.get("outliers")
106
+
107
+ @property
108
+ def anomalies(self) -> Optional[List[Any]]:
109
+ """Get anomalies if present"""
110
+ return self.data.get("anomalies")
111
+
112
+ def __repr__(self) -> str:
113
+ return f"LawkitResult(law_type='{self.law_type}', data={self.data})"
114
+
115
+
116
+ class LawkitError(Exception):
117
+ """Error thrown when lawkit command fails"""
118
+ def __init__(self, message: str, exit_code: int, stderr: str):
119
+ super().__init__(message)
120
+ self.exit_code = exit_code
121
+ self.stderr = stderr
122
+
123
+
124
+ def _get_lawkit_binary_path() -> str:
125
+ """Get the path to the lawkit binary"""
126
+ # Check if local binary exists (installed via postinstall)
127
+ package_dir = Path(__file__).parent.parent.parent
128
+ binary_name = "lawkit.exe" if platform.system() == "Windows" else "lawkit"
129
+ local_binary_path = package_dir / "bin" / binary_name
130
+
131
+ if local_binary_path.exists():
132
+ return str(local_binary_path)
133
+
134
+ # Fall back to system PATH
135
+ return "lawkit"
136
+
137
+
138
+ def _execute_lawkit(args: List[str]) -> tuple[str, str]:
139
+ """Execute lawkit command and return stdout, stderr"""
140
+ lawkit_path = _get_lawkit_binary_path()
141
+
142
+ try:
143
+ result = subprocess.run(
144
+ [lawkit_path] + args,
145
+ capture_output=True,
146
+ text=True,
147
+ check=True
148
+ )
149
+ return result.stdout, result.stderr
150
+ except subprocess.CalledProcessError as e:
151
+ raise LawkitError(
152
+ f"lawkit exited with code {e.returncode}",
153
+ e.returncode,
154
+ e.stderr or ""
155
+ )
156
+ except FileNotFoundError:
157
+ raise LawkitError(
158
+ "lawkit command not found. Please install lawkit CLI tool.",
159
+ -1,
160
+ ""
161
+ )
162
+
163
+
164
+ def analyze_benford(
165
+ input_data: str,
166
+ options: Optional[LawkitOptions] = None
167
+ ) -> Union[str, LawkitResult]:
168
+ """
169
+ Analyze data using Benford's Law
170
+
171
+ Args:
172
+ input_data: Path to input file or '-' for stdin
173
+ options: Analysis options
174
+
175
+ Returns:
176
+ String output for text format, or LawkitResult for JSON format
177
+
178
+ Examples:
179
+ >>> result = analyze_benford('financial_data.csv')
180
+ >>> print(result)
181
+
182
+ >>> json_result = analyze_benford('accounting.json',
183
+ ... LawkitOptions(format='json', output='json'))
184
+ >>> print(f"Risk level: {json_result.risk_level}")
185
+ >>> print(f"P-value: {json_result.p_value}")
186
+ """
187
+ if options is None:
188
+ options = LawkitOptions()
189
+
190
+ args = ["benf", input_data]
191
+
192
+ # Add common options
193
+ _add_common_options(args, options)
194
+
195
+ stdout, stderr = _execute_lawkit(args)
196
+
197
+ # If output format is JSON, parse the result
198
+ if options.output == "json":
199
+ try:
200
+ json_data = json.loads(stdout)
201
+ return LawkitResult(json_data, "benford")
202
+ except json.JSONDecodeError as e:
203
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
204
+
205
+ # Return raw output for other formats
206
+ return stdout
207
+
208
+
209
+ def analyze_pareto(
210
+ input_data: str,
211
+ options: Optional[LawkitOptions] = None
212
+ ) -> Union[str, LawkitResult]:
213
+ """
214
+ Analyze data using Pareto principle (80/20 rule)
215
+
216
+ Args:
217
+ input_data: Path to input file or '-' for stdin
218
+ options: Analysis options
219
+
220
+ Returns:
221
+ String output for text format, or LawkitResult for JSON format
222
+
223
+ Examples:
224
+ >>> result = analyze_pareto('sales_data.csv')
225
+ >>> print(result)
226
+
227
+ >>> json_result = analyze_pareto('revenue.json',
228
+ ... LawkitOptions(output='json', gini_coefficient=True))
229
+ >>> print(f"Gini coefficient: {json_result.gini_coefficient}")
230
+ >>> print(f"80/20 concentration: {json_result.concentration_80_20}")
231
+ """
232
+ if options is None:
233
+ options = LawkitOptions()
234
+
235
+ args = ["pareto", input_data]
236
+
237
+ # Add common options
238
+ _add_common_options(args, options)
239
+
240
+ # Add Pareto-specific options
241
+ if options.gini_coefficient:
242
+ args.append("--gini-coefficient")
243
+
244
+ if options.percentiles:
245
+ args.extend(["--percentiles", options.percentiles])
246
+
247
+ if options.business_analysis:
248
+ args.append("--business-analysis")
249
+
250
+ stdout, stderr = _execute_lawkit(args)
251
+
252
+ # If output format is JSON, parse the result
253
+ if options.output == "json":
254
+ try:
255
+ json_data = json.loads(stdout)
256
+ return LawkitResult(json_data, "pareto")
257
+ except json.JSONDecodeError as e:
258
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
259
+
260
+ # Return raw output for other formats
261
+ return stdout
262
+
263
+
264
+ def analyze_zipf(
265
+ input_data: str,
266
+ options: Optional[LawkitOptions] = None
267
+ ) -> Union[str, LawkitResult]:
268
+ """
269
+ Analyze data using Zipf's Law
270
+
271
+ Args:
272
+ input_data: Path to input file or '-' for stdin
273
+ options: Analysis options
274
+
275
+ Returns:
276
+ String output for text format, or LawkitResult for JSON format
277
+
278
+ Examples:
279
+ >>> result = analyze_zipf('text_data.txt')
280
+ >>> print(result)
281
+
282
+ >>> json_result = analyze_zipf('word_frequencies.json',
283
+ ... LawkitOptions(output='json'))
284
+ >>> print(f"Zipf exponent: {json_result.exponent}")
285
+ """
286
+ if options is None:
287
+ options = LawkitOptions()
288
+
289
+ args = ["zipf", input_data]
290
+
291
+ # Add common options
292
+ _add_common_options(args, options)
293
+
294
+ stdout, stderr = _execute_lawkit(args)
295
+
296
+ # If output format is JSON, parse the result
297
+ if options.output == "json":
298
+ try:
299
+ json_data = json.loads(stdout)
300
+ return LawkitResult(json_data, "zipf")
301
+ except json.JSONDecodeError as e:
302
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
303
+
304
+ # Return raw output for other formats
305
+ return stdout
306
+
307
+
308
+ def analyze_normal(
309
+ input_data: str,
310
+ options: Optional[LawkitOptions] = None
311
+ ) -> Union[str, LawkitResult]:
312
+ """
313
+ Analyze data for normal distribution
314
+
315
+ Args:
316
+ input_data: Path to input file or '-' for stdin
317
+ options: Analysis options
318
+
319
+ Returns:
320
+ String output for text format, or LawkitResult for JSON format
321
+
322
+ Examples:
323
+ >>> result = analyze_normal('measurements.csv')
324
+ >>> print(result)
325
+
326
+ >>> json_result = analyze_normal('quality_data.json',
327
+ ... LawkitOptions(output='json', outlier_detection=True))
328
+ >>> print(f"Mean: {json_result.mean}")
329
+ >>> print(f"Standard deviation: {json_result.std_dev}")
330
+ >>> print(f"Outliers: {json_result.outliers}")
331
+ """
332
+ if options is None:
333
+ options = LawkitOptions()
334
+
335
+ args = ["normal", input_data]
336
+
337
+ # Add common options
338
+ _add_common_options(args, options)
339
+
340
+ # Add Normal-specific options
341
+ if options.outlier_detection:
342
+ args.append("--outlier-detection")
343
+
344
+ if options.test_type:
345
+ args.extend(["--test-type", options.test_type])
346
+
347
+ stdout, stderr = _execute_lawkit(args)
348
+
349
+ # If output format is JSON, parse the result
350
+ if options.output == "json":
351
+ try:
352
+ json_data = json.loads(stdout)
353
+ return LawkitResult(json_data, "normal")
354
+ except json.JSONDecodeError as e:
355
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
356
+
357
+ # Return raw output for other formats
358
+ return stdout
359
+
360
+
361
+ def analyze_poisson(
362
+ input_data: str,
363
+ options: Optional[LawkitOptions] = None
364
+ ) -> Union[str, LawkitResult]:
365
+ """
366
+ Analyze data using Poisson distribution
367
+
368
+ Args:
369
+ input_data: Path to input file or '-' for stdin
370
+ options: Analysis options
371
+
372
+ Returns:
373
+ String output for text format, or LawkitResult for JSON format
374
+
375
+ Examples:
376
+ >>> result = analyze_poisson('event_counts.csv')
377
+ >>> print(result)
378
+
379
+ >>> json_result = analyze_poisson('incidents.json',
380
+ ... LawkitOptions(output='json'))
381
+ >>> print(f"Lambda estimate: {json_result.lambda_estimate}")
382
+ """
383
+ if options is None:
384
+ options = LawkitOptions()
385
+
386
+ args = ["poisson", input_data]
387
+
388
+ # Add common options
389
+ _add_common_options(args, options)
390
+
391
+ stdout, stderr = _execute_lawkit(args)
392
+
393
+ # If output format is JSON, parse the result
394
+ if options.output == "json":
395
+ try:
396
+ json_data = json.loads(stdout)
397
+ return LawkitResult(json_data, "poisson")
398
+ except json.JSONDecodeError as e:
399
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
400
+
401
+ # Return raw output for other formats
402
+ return stdout
403
+
404
+
405
+ def compare_laws(
406
+ input_data: str,
407
+ options: Optional[LawkitOptions] = None
408
+ ) -> Union[str, LawkitResult]:
409
+ """
410
+ Compare multiple statistical laws on the same data
411
+
412
+ Args:
413
+ input_data: Path to input file or '-' for stdin
414
+ options: Analysis options
415
+
416
+ Returns:
417
+ String output for text format, or LawkitResult for JSON format
418
+
419
+ Examples:
420
+ >>> result = compare_laws('dataset.csv')
421
+ >>> print(result)
422
+
423
+ >>> json_result = compare_laws('complex_data.json',
424
+ ... LawkitOptions(output='json'))
425
+ >>> print(f"Risk level: {json_result.risk_level}")
426
+ """
427
+ if options is None:
428
+ options = LawkitOptions()
429
+
430
+ args = ["compare", input_data]
431
+
432
+ # Add common options
433
+ _add_common_options(args, options)
434
+
435
+ stdout, stderr = _execute_lawkit(args)
436
+
437
+ # If output format is JSON, parse the result
438
+ if options.output == "json":
439
+ try:
440
+ json_data = json.loads(stdout)
441
+ return LawkitResult(json_data, "compare")
442
+ except json.JSONDecodeError as e:
443
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
444
+
445
+ # Return raw output for other formats
446
+ return stdout
447
+
448
+
449
+ def generate_data(
450
+ law_type: LawType,
451
+ samples: int = 1000,
452
+ seed: Optional[int] = None,
453
+ **kwargs
454
+ ) -> str:
455
+ """
456
+ Generate sample data following a specific statistical law
457
+
458
+ Args:
459
+ law_type: Type of statistical law to use
460
+ samples: Number of samples to generate
461
+ seed: Random seed for reproducibility
462
+ **kwargs: Law-specific parameters
463
+
464
+ Returns:
465
+ Generated data as string
466
+
467
+ Examples:
468
+ >>> data = generate_data('benf', samples=1000, seed=42)
469
+ >>> print(data)
470
+
471
+ >>> normal_data = generate_data('normal', samples=500, mean=100, stddev=15)
472
+ >>> pareto_data = generate_data('pareto', samples=1000, concentration=0.8)
473
+ """
474
+ args = ["generate", law_type, "--samples", str(samples)]
475
+
476
+ if seed is not None:
477
+ args.extend(["--seed", str(seed)])
478
+
479
+ # Add law-specific parameters
480
+ for key, value in kwargs.items():
481
+ key_formatted = key.replace("_", "-")
482
+ args.extend([f"--{key_formatted}", str(value)])
483
+
484
+ stdout, stderr = _execute_lawkit(args)
485
+ return stdout
486
+
487
+
488
+ def analyze_string(
489
+ content: str,
490
+ law_type: LawType,
491
+ options: Optional[LawkitOptions] = None
492
+ ) -> Union[str, LawkitResult]:
493
+ """
494
+ Analyze string data directly (writes to temporary file)
495
+
496
+ Args:
497
+ content: Data content as string
498
+ law_type: Type of statistical law to use
499
+ options: Analysis options
500
+
501
+ Returns:
502
+ String output for text format, or LawkitResult for JSON format
503
+
504
+ Examples:
505
+ >>> csv_data = "amount\\n123\\n456\\n789"
506
+ >>> result = analyze_string(csv_data, 'benf',
507
+ ... LawkitOptions(format='csv', output='json'))
508
+ >>> print(result.risk_level)
509
+ """
510
+ if options is None:
511
+ options = LawkitOptions()
512
+
513
+ # Create temporary file
514
+ with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as tmp_file:
515
+ tmp_file.write(content)
516
+ tmp_file_path = tmp_file.name
517
+
518
+ try:
519
+ # Analyze the temporary file
520
+ if law_type == "benf":
521
+ return analyze_benford(tmp_file_path, options)
522
+ elif law_type == "pareto":
523
+ return analyze_pareto(tmp_file_path, options)
524
+ elif law_type == "zipf":
525
+ return analyze_zipf(tmp_file_path, options)
526
+ elif law_type == "normal":
527
+ return analyze_normal(tmp_file_path, options)
528
+ elif law_type == "poisson":
529
+ return analyze_poisson(tmp_file_path, options)
530
+ else:
531
+ raise LawkitError(f"Unknown law type: {law_type}", -1, "")
532
+ finally:
533
+ # Clean up temporary file
534
+ os.unlink(tmp_file_path)
535
+
536
+
537
+ def _add_common_options(args: List[str], options: LawkitOptions) -> None:
538
+ """Add common options to command arguments"""
539
+ if options.format:
540
+ args.extend(["--format", options.format])
541
+
542
+ if options.output:
543
+ args.extend(["--output", options.output])
544
+
545
+ if options.min_count is not None:
546
+ args.extend(["--min-count", str(options.min_count)])
547
+
548
+ if options.threshold is not None:
549
+ args.extend(["--threshold", str(options.threshold)])
550
+
551
+ if options.confidence is not None:
552
+ args.extend(["--confidence", str(options.confidence)])
553
+
554
+ if options.alpha is not None:
555
+ args.extend(["--alpha", str(options.alpha)])
556
+
557
+ if options.verbose:
558
+ args.append("--verbose")
559
+
560
+ if options.optimize:
561
+ args.append("--optimize")
562
+
563
+ if options.international:
564
+ args.append("--international")
565
+
566
+ if options.time_series:
567
+ args.append("--time-series")
568
+
569
+ if options.parallel:
570
+ args.append("--parallel")
571
+
572
+ if options.memory_efficient:
573
+ args.append("--memory-efficient")
574
+
575
+
576
+ def is_lawkit_available() -> bool:
577
+ """
578
+ Check if lawkit command is available in the system
579
+
580
+ Returns:
581
+ True if lawkit is available, False otherwise
582
+
583
+ Examples:
584
+ >>> if not is_lawkit_available():
585
+ ... print("Please install lawkit CLI tool")
586
+ ... exit(1)
587
+ """
588
+ try:
589
+ _execute_lawkit(["--version"])
590
+ return True
591
+ except LawkitError:
592
+ return False
593
+
594
+
595
+ def get_version() -> str:
596
+ """
597
+ Get the version of the lawkit CLI tool
598
+
599
+ Returns:
600
+ Version string
601
+
602
+ Examples:
603
+ >>> version = get_version()
604
+ >>> print(f"Using lawkit version: {version}")
605
+ """
606
+ try:
607
+ stdout, stderr = _execute_lawkit(["--version"])
608
+ return stdout.strip()
609
+ except LawkitError:
610
+ return "Unknown"
611
+
612
+
613
+ def selftest() -> bool:
614
+ """
615
+ Run lawkit self-test to verify installation
616
+
617
+ Returns:
618
+ True if self-test passes, False otherwise
619
+
620
+ Examples:
621
+ >>> if not selftest():
622
+ ... print("lawkit self-test failed")
623
+ ... exit(1)
624
+ """
625
+ try:
626
+ _execute_lawkit(["selftest"])
627
+ return True
628
+ except LawkitError:
629
+ return False