lawkit-python 2.1.0__tar.gz → 2.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/.gitignore +4 -1
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/PKG-INFO +37 -20
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/README.md +36 -19
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/pyproject.toml +1 -1
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/src/lawkit/__init__.py +6 -0
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/src/lawkit/lawkit.py +216 -48
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/src/lawkit/compat.py +0 -0
- {lawkit_python-2.1.0 → lawkit_python-2.1.2}/src/lawkit/installer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lawkit-python
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.2
|
|
4
4
|
Summary: Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment
|
|
5
5
|
Project-URL: Homepage, https://github.com/kako-jun/lawkit
|
|
6
6
|
Project-URL: Repository, https://github.com/kako-jun/lawkit
|
|
@@ -55,14 +55,14 @@ This will automatically download the appropriate `lawkit` binary for your system
|
|
|
55
55
|
```python
|
|
56
56
|
import lawkit
|
|
57
57
|
|
|
58
|
-
# Analyze financial data with Benford
|
|
58
|
+
# Analyze financial data with Benford Law
|
|
59
59
|
result = lawkit.analyze_benford('financial_data.csv')
|
|
60
60
|
print(result)
|
|
61
61
|
|
|
62
62
|
# Get structured JSON output
|
|
63
63
|
json_result = lawkit.analyze_benford(
|
|
64
64
|
'accounting.csv',
|
|
65
|
-
lawkit.LawkitOptions(format='
|
|
65
|
+
lawkit.LawkitOptions(format='json')
|
|
66
66
|
)
|
|
67
67
|
print(f"Risk level: {json_result.risk_level}")
|
|
68
68
|
print(f"P-value: {json_result.p_value}")
|
|
@@ -70,7 +70,7 @@ print(f"P-value: {json_result.p_value}")
|
|
|
70
70
|
# Check if data follows Pareto principle (80/20 rule)
|
|
71
71
|
pareto_result = lawkit.analyze_pareto(
|
|
72
72
|
'sales_data.csv',
|
|
73
|
-
lawkit.LawkitOptions(
|
|
73
|
+
lawkit.LawkitOptions(format='json', gini_coefficient=True)
|
|
74
74
|
)
|
|
75
75
|
print(f"Gini coefficient: {pareto_result.gini_coefficient}")
|
|
76
76
|
print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
@@ -80,9 +80,9 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
|
80
80
|
|
|
81
81
|
### Statistical Laws Supported
|
|
82
82
|
|
|
83
|
-
- **Benford
|
|
83
|
+
- **Benford Law**: Detect fraud and anomalies in numerical data
|
|
84
84
|
- **Pareto Principle**: Analyze 80/20 distributions and concentration
|
|
85
|
-
- **Zipf
|
|
85
|
+
- **Zipf Law**: Analyze word frequencies and power-law distributions
|
|
86
86
|
- **Normal Distribution**: Test for normality and detect outliers
|
|
87
87
|
- **Poisson Distribution**: Analyze rare events and count data
|
|
88
88
|
|
|
@@ -109,7 +109,7 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
|
109
109
|
```python
|
|
110
110
|
import lawkit
|
|
111
111
|
|
|
112
|
-
# Analyze with Benford
|
|
112
|
+
# Analyze with Benford Law
|
|
113
113
|
result = lawkit.analyze_benford('invoice_data.csv')
|
|
114
114
|
print(result)
|
|
115
115
|
|
|
@@ -159,13 +159,27 @@ if normal_result.p_value < 0.05:
|
|
|
159
159
|
if normal_result.outliers:
|
|
160
160
|
print(f"Found {len(normal_result.outliers)} outliers")
|
|
161
161
|
|
|
162
|
-
# Multi-law
|
|
163
|
-
|
|
162
|
+
# Multi-law analysis
|
|
163
|
+
analysis = lawkit.analyze_laws(
|
|
164
164
|
'complex_dataset.csv',
|
|
165
|
-
lawkit.LawkitOptions(
|
|
165
|
+
lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
|
|
166
166
|
)
|
|
167
|
-
print(f"
|
|
168
|
-
print(f"Overall risk level: {
|
|
167
|
+
print(f"Analysis results: {analysis.data}")
|
|
168
|
+
print(f"Overall risk level: {analysis.risk_level}")
|
|
169
|
+
|
|
170
|
+
# Data validation
|
|
171
|
+
validation = lawkit.validate_laws(
|
|
172
|
+
'complex_dataset.csv',
|
|
173
|
+
lawkit.LawkitOptions(format='json', consistency_check=True)
|
|
174
|
+
)
|
|
175
|
+
print(f"Validation status: {validation.data}")
|
|
176
|
+
|
|
177
|
+
# Conflict diagnosis
|
|
178
|
+
diagnosis = lawkit.diagnose_laws(
|
|
179
|
+
'complex_dataset.csv',
|
|
180
|
+
lawkit.LawkitOptions(format='json', report='detailed')
|
|
181
|
+
)
|
|
182
|
+
print(f"Diagnosis: {diagnosis.data}")
|
|
169
183
|
```
|
|
170
184
|
|
|
171
185
|
### Generate Sample Data
|
|
@@ -173,7 +187,7 @@ print(f"Overall risk level: {comparison.risk_level}")
|
|
|
173
187
|
```python
|
|
174
188
|
import lawkit
|
|
175
189
|
|
|
176
|
-
# Generate Benford
|
|
190
|
+
# Generate Benford Law compliant data
|
|
177
191
|
benford_data = lawkit.generate_data('benf', samples=1000, seed=42)
|
|
178
192
|
print(benford_data)
|
|
179
193
|
|
|
@@ -205,7 +219,7 @@ csv_data = """amount
|
|
|
205
219
|
result = lawkit.analyze_string(
|
|
206
220
|
csv_data,
|
|
207
221
|
'benf',
|
|
208
|
-
lawkit.LawkitOptions(format='
|
|
222
|
+
lawkit.LawkitOptions(format='json')
|
|
209
223
|
)
|
|
210
224
|
print(f"Risk assessment: {result.risk_level}")
|
|
211
225
|
|
|
@@ -214,7 +228,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
|
|
|
214
228
|
result = lawkit.analyze_string(
|
|
215
229
|
json_data,
|
|
216
230
|
'normal',
|
|
217
|
-
lawkit.LawkitOptions(format='json'
|
|
231
|
+
lawkit.LawkitOptions(format='json')
|
|
218
232
|
)
|
|
219
233
|
print(f"Is normal: {result.p_value > 0.05}")
|
|
220
234
|
```
|
|
@@ -338,7 +352,7 @@ result = lawkit.analyze_benford('invoices.csv',
|
|
|
338
352
|
if result.risk_level in ['High', 'Critical']:
|
|
339
353
|
print("🚨 Potential fraud detected in invoice data")
|
|
340
354
|
print(f"Statistical significance: p={result.p_value:.6f}")
|
|
341
|
-
print(f"Deviation from Benford
|
|
355
|
+
print(f"Deviation from Benford Law: {result.mad:.2f}%")
|
|
342
356
|
```
|
|
343
357
|
|
|
344
358
|
### Business Intelligence
|
|
@@ -386,7 +400,7 @@ import lawkit
|
|
|
386
400
|
result = lawkit.analyze_zipf('document.txt',
|
|
387
401
|
lawkit.LawkitOptions(output='json'))
|
|
388
402
|
|
|
389
|
-
print(f"Text follows Zipf
|
|
403
|
+
print(f"Text follows Zipf Law: {result.p_value > 0.05}")
|
|
390
404
|
print(f"Power law exponent: {result.exponent:.3f}")
|
|
391
405
|
```
|
|
392
406
|
|
|
@@ -394,12 +408,15 @@ print(f"Power law exponent: {result.exponent:.3f}")
|
|
|
394
408
|
|
|
395
409
|
### Main Functions
|
|
396
410
|
|
|
397
|
-
- `analyze_benford(input_data, options)` - Benford
|
|
411
|
+
- `analyze_benford(input_data, options)` - Benford Law analysis
|
|
398
412
|
- `analyze_pareto(input_data, options)` - Pareto principle analysis
|
|
399
|
-
- `analyze_zipf(input_data, options)` - Zipf
|
|
413
|
+
- `analyze_zipf(input_data, options)` - Zipf Law analysis
|
|
400
414
|
- `analyze_normal(input_data, options)` - Normal distribution analysis
|
|
401
415
|
- `analyze_poisson(input_data, options)` - Poisson distribution analysis
|
|
402
|
-
- `
|
|
416
|
+
- `analyze_laws(input_data, options)` - Multi-law analysis
|
|
417
|
+
- `validate_laws(input_data, options)` - Data validation and consistency check
|
|
418
|
+
- `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
|
|
419
|
+
- `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
|
|
403
420
|
- `generate_data(law_type, samples, **kwargs)` - Generate sample data
|
|
404
421
|
- `analyze_string(content, law_type, options)` - Analyze string data directly
|
|
405
422
|
|
|
@@ -15,14 +15,14 @@ This will automatically download the appropriate `lawkit` binary for your system
|
|
|
15
15
|
```python
|
|
16
16
|
import lawkit
|
|
17
17
|
|
|
18
|
-
# Analyze financial data with Benford
|
|
18
|
+
# Analyze financial data with Benford Law
|
|
19
19
|
result = lawkit.analyze_benford('financial_data.csv')
|
|
20
20
|
print(result)
|
|
21
21
|
|
|
22
22
|
# Get structured JSON output
|
|
23
23
|
json_result = lawkit.analyze_benford(
|
|
24
24
|
'accounting.csv',
|
|
25
|
-
lawkit.LawkitOptions(format='
|
|
25
|
+
lawkit.LawkitOptions(format='json')
|
|
26
26
|
)
|
|
27
27
|
print(f"Risk level: {json_result.risk_level}")
|
|
28
28
|
print(f"P-value: {json_result.p_value}")
|
|
@@ -30,7 +30,7 @@ print(f"P-value: {json_result.p_value}")
|
|
|
30
30
|
# Check if data follows Pareto principle (80/20 rule)
|
|
31
31
|
pareto_result = lawkit.analyze_pareto(
|
|
32
32
|
'sales_data.csv',
|
|
33
|
-
lawkit.LawkitOptions(
|
|
33
|
+
lawkit.LawkitOptions(format='json', gini_coefficient=True)
|
|
34
34
|
)
|
|
35
35
|
print(f"Gini coefficient: {pareto_result.gini_coefficient}")
|
|
36
36
|
print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
@@ -40,9 +40,9 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
|
40
40
|
|
|
41
41
|
### Statistical Laws Supported
|
|
42
42
|
|
|
43
|
-
- **Benford
|
|
43
|
+
- **Benford Law**: Detect fraud and anomalies in numerical data
|
|
44
44
|
- **Pareto Principle**: Analyze 80/20 distributions and concentration
|
|
45
|
-
- **Zipf
|
|
45
|
+
- **Zipf Law**: Analyze word frequencies and power-law distributions
|
|
46
46
|
- **Normal Distribution**: Test for normality and detect outliers
|
|
47
47
|
- **Poisson Distribution**: Analyze rare events and count data
|
|
48
48
|
|
|
@@ -69,7 +69,7 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
|
69
69
|
```python
|
|
70
70
|
import lawkit
|
|
71
71
|
|
|
72
|
-
# Analyze with Benford
|
|
72
|
+
# Analyze with Benford Law
|
|
73
73
|
result = lawkit.analyze_benford('invoice_data.csv')
|
|
74
74
|
print(result)
|
|
75
75
|
|
|
@@ -119,13 +119,27 @@ if normal_result.p_value < 0.05:
|
|
|
119
119
|
if normal_result.outliers:
|
|
120
120
|
print(f"Found {len(normal_result.outliers)} outliers")
|
|
121
121
|
|
|
122
|
-
# Multi-law
|
|
123
|
-
|
|
122
|
+
# Multi-law analysis
|
|
123
|
+
analysis = lawkit.analyze_laws(
|
|
124
124
|
'complex_dataset.csv',
|
|
125
|
-
lawkit.LawkitOptions(
|
|
125
|
+
lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
|
|
126
126
|
)
|
|
127
|
-
print(f"
|
|
128
|
-
print(f"Overall risk level: {
|
|
127
|
+
print(f"Analysis results: {analysis.data}")
|
|
128
|
+
print(f"Overall risk level: {analysis.risk_level}")
|
|
129
|
+
|
|
130
|
+
# Data validation
|
|
131
|
+
validation = lawkit.validate_laws(
|
|
132
|
+
'complex_dataset.csv',
|
|
133
|
+
lawkit.LawkitOptions(format='json', consistency_check=True)
|
|
134
|
+
)
|
|
135
|
+
print(f"Validation status: {validation.data}")
|
|
136
|
+
|
|
137
|
+
# Conflict diagnosis
|
|
138
|
+
diagnosis = lawkit.diagnose_laws(
|
|
139
|
+
'complex_dataset.csv',
|
|
140
|
+
lawkit.LawkitOptions(format='json', report='detailed')
|
|
141
|
+
)
|
|
142
|
+
print(f"Diagnosis: {diagnosis.data}")
|
|
129
143
|
```
|
|
130
144
|
|
|
131
145
|
### Generate Sample Data
|
|
@@ -133,7 +147,7 @@ print(f"Overall risk level: {comparison.risk_level}")
|
|
|
133
147
|
```python
|
|
134
148
|
import lawkit
|
|
135
149
|
|
|
136
|
-
# Generate Benford
|
|
150
|
+
# Generate Benford Law compliant data
|
|
137
151
|
benford_data = lawkit.generate_data('benf', samples=1000, seed=42)
|
|
138
152
|
print(benford_data)
|
|
139
153
|
|
|
@@ -165,7 +179,7 @@ csv_data = """amount
|
|
|
165
179
|
result = lawkit.analyze_string(
|
|
166
180
|
csv_data,
|
|
167
181
|
'benf',
|
|
168
|
-
lawkit.LawkitOptions(format='
|
|
182
|
+
lawkit.LawkitOptions(format='json')
|
|
169
183
|
)
|
|
170
184
|
print(f"Risk assessment: {result.risk_level}")
|
|
171
185
|
|
|
@@ -174,7 +188,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
|
|
|
174
188
|
result = lawkit.analyze_string(
|
|
175
189
|
json_data,
|
|
176
190
|
'normal',
|
|
177
|
-
lawkit.LawkitOptions(format='json'
|
|
191
|
+
lawkit.LawkitOptions(format='json')
|
|
178
192
|
)
|
|
179
193
|
print(f"Is normal: {result.p_value > 0.05}")
|
|
180
194
|
```
|
|
@@ -298,7 +312,7 @@ result = lawkit.analyze_benford('invoices.csv',
|
|
|
298
312
|
if result.risk_level in ['High', 'Critical']:
|
|
299
313
|
print("🚨 Potential fraud detected in invoice data")
|
|
300
314
|
print(f"Statistical significance: p={result.p_value:.6f}")
|
|
301
|
-
print(f"Deviation from Benford
|
|
315
|
+
print(f"Deviation from Benford Law: {result.mad:.2f}%")
|
|
302
316
|
```
|
|
303
317
|
|
|
304
318
|
### Business Intelligence
|
|
@@ -346,7 +360,7 @@ import lawkit
|
|
|
346
360
|
result = lawkit.analyze_zipf('document.txt',
|
|
347
361
|
lawkit.LawkitOptions(output='json'))
|
|
348
362
|
|
|
349
|
-
print(f"Text follows Zipf
|
|
363
|
+
print(f"Text follows Zipf Law: {result.p_value > 0.05}")
|
|
350
364
|
print(f"Power law exponent: {result.exponent:.3f}")
|
|
351
365
|
```
|
|
352
366
|
|
|
@@ -354,12 +368,15 @@ print(f"Power law exponent: {result.exponent:.3f}")
|
|
|
354
368
|
|
|
355
369
|
### Main Functions
|
|
356
370
|
|
|
357
|
-
- `analyze_benford(input_data, options)` - Benford
|
|
371
|
+
- `analyze_benford(input_data, options)` - Benford Law analysis
|
|
358
372
|
- `analyze_pareto(input_data, options)` - Pareto principle analysis
|
|
359
|
-
- `analyze_zipf(input_data, options)` - Zipf
|
|
373
|
+
- `analyze_zipf(input_data, options)` - Zipf Law analysis
|
|
360
374
|
- `analyze_normal(input_data, options)` - Normal distribution analysis
|
|
361
375
|
- `analyze_poisson(input_data, options)` - Poisson distribution analysis
|
|
362
|
-
- `
|
|
376
|
+
- `analyze_laws(input_data, options)` - Multi-law analysis
|
|
377
|
+
- `validate_laws(input_data, options)` - Data validation and consistency check
|
|
378
|
+
- `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
|
|
379
|
+
- `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
|
|
363
380
|
- `generate_data(law_type, samples, **kwargs)` - Generate sample data
|
|
364
381
|
- `analyze_string(content, law_type, options)` - Analyze string data directly
|
|
365
382
|
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lawkit-python"
|
|
7
|
-
version = "2.1.
|
|
7
|
+
version = "2.1.2"
|
|
8
8
|
description = "Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -13,6 +13,9 @@ from .lawkit import (
|
|
|
13
13
|
analyze_zipf,
|
|
14
14
|
analyze_normal,
|
|
15
15
|
analyze_poisson,
|
|
16
|
+
analyze_laws,
|
|
17
|
+
validate_laws,
|
|
18
|
+
diagnose_laws,
|
|
16
19
|
compare_laws,
|
|
17
20
|
generate_data,
|
|
18
21
|
analyze_string,
|
|
@@ -38,6 +41,9 @@ __all__ = [
|
|
|
38
41
|
"analyze_zipf",
|
|
39
42
|
"analyze_normal",
|
|
40
43
|
"analyze_poisson",
|
|
44
|
+
"analyze_laws",
|
|
45
|
+
"validate_laws",
|
|
46
|
+
"diagnose_laws",
|
|
41
47
|
"compare_laws",
|
|
42
48
|
|
|
43
49
|
# Utility functions
|
|
@@ -22,20 +22,44 @@ LawType = Literal["benf", "pareto", "zipf", "normal", "poisson"]
|
|
|
22
22
|
class LawkitOptions:
|
|
23
23
|
"""Options for lawkit operations"""
|
|
24
24
|
format: Optional[Format] = None
|
|
25
|
-
|
|
26
|
-
min_count: Optional[int] = None
|
|
27
|
-
threshold: Optional[float] = None
|
|
28
|
-
confidence: Optional[float] = None
|
|
25
|
+
quiet: bool = False
|
|
29
26
|
verbose: bool = False
|
|
27
|
+
filter: Optional[str] = None
|
|
28
|
+
min_count: Optional[int] = None
|
|
30
29
|
optimize: bool = False
|
|
31
|
-
|
|
30
|
+
|
|
31
|
+
# Integration-specific options
|
|
32
|
+
laws: Optional[str] = None # "benf,pareto,zipf,normal,poisson"
|
|
33
|
+
focus: Optional[str] = None # "quality", "concentration", "distribution", "anomaly"
|
|
34
|
+
threshold: Optional[float] = None # Conflict detection threshold
|
|
35
|
+
recommend: bool = False
|
|
36
|
+
report: Optional[str] = None # "summary", "detailed", "conflicting"
|
|
37
|
+
consistency_check: bool = False
|
|
38
|
+
cross_validation: bool = False
|
|
39
|
+
confidence_level: Optional[float] = None
|
|
40
|
+
purpose: Optional[str] = None # "quality", "fraud", "concentration", "anomaly", "distribution", "general"
|
|
41
|
+
|
|
32
42
|
# Law-specific options
|
|
33
43
|
gini_coefficient: bool = False
|
|
34
44
|
percentiles: Optional[str] = None
|
|
35
45
|
business_analysis: bool = False
|
|
46
|
+
concentration: Optional[float] = None
|
|
47
|
+
|
|
48
|
+
# Benford-specific options
|
|
49
|
+
threshold_level: Optional[str] = None # "low", "medium", "high", "critical", "auto"
|
|
50
|
+
|
|
51
|
+
# Generate-specific options
|
|
52
|
+
samples: Optional[int] = None
|
|
53
|
+
seed: Optional[int] = None
|
|
54
|
+
output_file: Optional[str] = None
|
|
55
|
+
fraud_rate: Optional[float] = None
|
|
56
|
+
range: Optional[str] = None # "1,100000"
|
|
57
|
+
scale: Optional[float] = None
|
|
58
|
+
|
|
36
59
|
# Statistical options
|
|
37
60
|
test_type: Optional[str] = None
|
|
38
61
|
alpha: Optional[float] = None
|
|
62
|
+
|
|
39
63
|
# Advanced options
|
|
40
64
|
outlier_detection: bool = False
|
|
41
65
|
time_series: bool = False
|
|
@@ -144,14 +168,17 @@ def _execute_lawkit(args: List[str]) -> tuple[str, str]:
|
|
|
144
168
|
[lawkit_path] + args,
|
|
145
169
|
capture_output=True,
|
|
146
170
|
text=True,
|
|
147
|
-
check=
|
|
171
|
+
check=False # Don't raise exception on non-zero exit
|
|
148
172
|
)
|
|
149
|
-
|
|
150
|
-
|
|
173
|
+
|
|
174
|
+
# Exit codes 10-19 are typically warnings, not fatal errors
|
|
175
|
+
if result.returncode == 0 or (result.returncode >= 10 and result.returncode <= 19):
|
|
176
|
+
return result.stdout, result.stderr
|
|
177
|
+
|
|
151
178
|
raise LawkitError(
|
|
152
|
-
f"lawkit exited with code {
|
|
153
|
-
|
|
154
|
-
|
|
179
|
+
f"lawkit exited with code {result.returncode}",
|
|
180
|
+
result.returncode,
|
|
181
|
+
result.stderr or ""
|
|
155
182
|
)
|
|
156
183
|
except FileNotFoundError:
|
|
157
184
|
raise LawkitError(
|
|
@@ -192,10 +219,14 @@ def analyze_benford(
|
|
|
192
219
|
# Add common options
|
|
193
220
|
_add_common_options(args, options)
|
|
194
221
|
|
|
222
|
+
# Add Benford-specific options
|
|
223
|
+
if options.threshold_level:
|
|
224
|
+
args.extend(["--threshold", options.threshold_level])
|
|
225
|
+
|
|
195
226
|
stdout, stderr = _execute_lawkit(args)
|
|
196
227
|
|
|
197
228
|
# If output format is JSON, parse the result
|
|
198
|
-
if options.
|
|
229
|
+
if options.format == "json":
|
|
199
230
|
try:
|
|
200
231
|
json_data = json.loads(stdout)
|
|
201
232
|
return LawkitResult(json_data, "benford")
|
|
@@ -238,6 +269,9 @@ def analyze_pareto(
|
|
|
238
269
|
_add_common_options(args, options)
|
|
239
270
|
|
|
240
271
|
# Add Pareto-specific options
|
|
272
|
+
if options.concentration is not None:
|
|
273
|
+
args.extend(["--concentration", str(options.concentration)])
|
|
274
|
+
|
|
241
275
|
if options.gini_coefficient:
|
|
242
276
|
args.append("--gini-coefficient")
|
|
243
277
|
|
|
@@ -250,7 +284,7 @@ def analyze_pareto(
|
|
|
250
284
|
stdout, stderr = _execute_lawkit(args)
|
|
251
285
|
|
|
252
286
|
# If output format is JSON, parse the result
|
|
253
|
-
if options.
|
|
287
|
+
if options.format == "json":
|
|
254
288
|
try:
|
|
255
289
|
json_data = json.loads(stdout)
|
|
256
290
|
return LawkitResult(json_data, "pareto")
|
|
@@ -294,7 +328,7 @@ def analyze_zipf(
|
|
|
294
328
|
stdout, stderr = _execute_lawkit(args)
|
|
295
329
|
|
|
296
330
|
# If output format is JSON, parse the result
|
|
297
|
-
if options.
|
|
331
|
+
if options.format == "json":
|
|
298
332
|
try:
|
|
299
333
|
json_data = json.loads(stdout)
|
|
300
334
|
return LawkitResult(json_data, "zipf")
|
|
@@ -347,7 +381,7 @@ def analyze_normal(
|
|
|
347
381
|
stdout, stderr = _execute_lawkit(args)
|
|
348
382
|
|
|
349
383
|
# If output format is JSON, parse the result
|
|
350
|
-
if options.
|
|
384
|
+
if options.format == "json":
|
|
351
385
|
try:
|
|
352
386
|
json_data = json.loads(stdout)
|
|
353
387
|
return LawkitResult(json_data, "normal")
|
|
@@ -391,7 +425,7 @@ def analyze_poisson(
|
|
|
391
425
|
stdout, stderr = _execute_lawkit(args)
|
|
392
426
|
|
|
393
427
|
# If output format is JSON, parse the result
|
|
394
|
-
if options.
|
|
428
|
+
if options.format == "json":
|
|
395
429
|
try:
|
|
396
430
|
json_data = json.loads(stdout)
|
|
397
431
|
return LawkitResult(json_data, "poisson")
|
|
@@ -402,12 +436,12 @@ def analyze_poisson(
|
|
|
402
436
|
return stdout
|
|
403
437
|
|
|
404
438
|
|
|
405
|
-
def
|
|
439
|
+
def analyze_laws(
|
|
406
440
|
input_data: str,
|
|
407
441
|
options: Optional[LawkitOptions] = None
|
|
408
442
|
) -> Union[str, LawkitResult]:
|
|
409
443
|
"""
|
|
410
|
-
|
|
444
|
+
Analyze data using multiple statistical laws (basic analysis)
|
|
411
445
|
|
|
412
446
|
Args:
|
|
413
447
|
input_data: Path to input file or '-' for stdin
|
|
@@ -417,17 +451,105 @@ def compare_laws(
|
|
|
417
451
|
String output for text format, or LawkitResult for JSON format
|
|
418
452
|
|
|
419
453
|
Examples:
|
|
420
|
-
>>> result =
|
|
454
|
+
>>> result = analyze_laws('dataset.csv')
|
|
421
455
|
>>> print(result)
|
|
422
456
|
|
|
423
|
-
>>> json_result =
|
|
424
|
-
... LawkitOptions(
|
|
457
|
+
>>> json_result = analyze_laws('complex_data.json',
|
|
458
|
+
... LawkitOptions(format='json'))
|
|
425
459
|
>>> print(f"Risk level: {json_result.risk_level}")
|
|
426
460
|
"""
|
|
427
461
|
if options is None:
|
|
428
462
|
options = LawkitOptions()
|
|
429
463
|
|
|
430
|
-
args = ["
|
|
464
|
+
args = ["analyze", input_data]
|
|
465
|
+
|
|
466
|
+
# Add common options
|
|
467
|
+
_add_common_options(args, options)
|
|
468
|
+
|
|
469
|
+
stdout, stderr = _execute_lawkit(args)
|
|
470
|
+
|
|
471
|
+
# If output format is JSON, parse the result
|
|
472
|
+
if options.format == "json":
|
|
473
|
+
try:
|
|
474
|
+
json_data = json.loads(stdout)
|
|
475
|
+
return LawkitResult(json_data, "analyze")
|
|
476
|
+
except json.JSONDecodeError as e:
|
|
477
|
+
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
478
|
+
|
|
479
|
+
# Return raw output for other formats
|
|
480
|
+
return stdout
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def validate_laws(
|
|
484
|
+
input_data: str,
|
|
485
|
+
options: Optional[LawkitOptions] = None
|
|
486
|
+
) -> Union[str, LawkitResult]:
|
|
487
|
+
"""
|
|
488
|
+
Validate data consistency using multiple statistical laws
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
input_data: Path to input file or '-' for stdin
|
|
492
|
+
options: Analysis options
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
String output for text format, or LawkitResult for JSON format
|
|
496
|
+
|
|
497
|
+
Examples:
|
|
498
|
+
>>> result = validate_laws('dataset.csv')
|
|
499
|
+
>>> print(result)
|
|
500
|
+
|
|
501
|
+
>>> json_result = validate_laws('complex_data.json',
|
|
502
|
+
... LawkitOptions(format='json'))
|
|
503
|
+
>>> print(f"Validation result: {json_result.data}")
|
|
504
|
+
"""
|
|
505
|
+
if options is None:
|
|
506
|
+
options = LawkitOptions()
|
|
507
|
+
|
|
508
|
+
args = ["validate", input_data]
|
|
509
|
+
|
|
510
|
+
# Add common options
|
|
511
|
+
_add_common_options(args, options)
|
|
512
|
+
|
|
513
|
+
stdout, stderr = _execute_lawkit(args)
|
|
514
|
+
|
|
515
|
+
# If output format is JSON, parse the result
|
|
516
|
+
if options.format == "json":
|
|
517
|
+
try:
|
|
518
|
+
json_data = json.loads(stdout)
|
|
519
|
+
return LawkitResult(json_data, "validate")
|
|
520
|
+
except json.JSONDecodeError as e:
|
|
521
|
+
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
522
|
+
|
|
523
|
+
# Return raw output for other formats
|
|
524
|
+
return stdout
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def diagnose_laws(
|
|
528
|
+
input_data: str,
|
|
529
|
+
options: Optional[LawkitOptions] = None
|
|
530
|
+
) -> Union[str, LawkitResult]:
|
|
531
|
+
"""
|
|
532
|
+
Diagnose conflicts and generate detailed analysis report
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
input_data: Path to input file or '-' for stdin
|
|
536
|
+
options: Analysis options
|
|
537
|
+
|
|
538
|
+
Returns:
|
|
539
|
+
String output for text format, or LawkitResult for JSON format
|
|
540
|
+
|
|
541
|
+
Examples:
|
|
542
|
+
>>> result = diagnose_laws('dataset.csv')
|
|
543
|
+
>>> print(result)
|
|
544
|
+
|
|
545
|
+
>>> json_result = diagnose_laws('complex_data.json',
|
|
546
|
+
... LawkitOptions(format='json'))
|
|
547
|
+
>>> print(f"Diagnosis: {json_result.data}")
|
|
548
|
+
"""
|
|
549
|
+
if options is None:
|
|
550
|
+
options = LawkitOptions()
|
|
551
|
+
|
|
552
|
+
args = ["diagnose", input_data]
|
|
431
553
|
|
|
432
554
|
# Add common options
|
|
433
555
|
_add_common_options(args, options)
|
|
@@ -435,10 +557,10 @@ def compare_laws(
|
|
|
435
557
|
stdout, stderr = _execute_lawkit(args)
|
|
436
558
|
|
|
437
559
|
# If output format is JSON, parse the result
|
|
438
|
-
if options.
|
|
560
|
+
if options.format == "json":
|
|
439
561
|
try:
|
|
440
562
|
json_data = json.loads(stdout)
|
|
441
|
-
return LawkitResult(json_data, "
|
|
563
|
+
return LawkitResult(json_data, "diagnose")
|
|
442
564
|
except json.JSONDecodeError as e:
|
|
443
565
|
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
444
566
|
|
|
@@ -446,10 +568,13 @@ def compare_laws(
|
|
|
446
568
|
return stdout
|
|
447
569
|
|
|
448
570
|
|
|
571
|
+
# Backward compatibility alias
|
|
572
|
+
compare_laws = analyze_laws
|
|
573
|
+
|
|
574
|
+
|
|
449
575
|
def generate_data(
|
|
450
576
|
law_type: LawType,
|
|
451
|
-
|
|
452
|
-
seed: Optional[int] = None,
|
|
577
|
+
options: Optional[LawkitOptions] = None,
|
|
453
578
|
**kwargs
|
|
454
579
|
) -> str:
|
|
455
580
|
"""
|
|
@@ -457,26 +582,46 @@ def generate_data(
|
|
|
457
582
|
|
|
458
583
|
Args:
|
|
459
584
|
law_type: Type of statistical law to use
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
**kwargs: Law-specific parameters
|
|
585
|
+
options: Generation options (samples, seed, etc.)
|
|
586
|
+
**kwargs: Law-specific parameters (deprecated, use options instead)
|
|
463
587
|
|
|
464
588
|
Returns:
|
|
465
589
|
Generated data as string
|
|
466
590
|
|
|
467
591
|
Examples:
|
|
468
|
-
>>> data = generate_data('benf', samples=1000, seed=42)
|
|
592
|
+
>>> data = generate_data('benf', LawkitOptions(samples=1000, seed=42))
|
|
469
593
|
>>> print(data)
|
|
470
594
|
|
|
471
|
-
>>>
|
|
472
|
-
>>>
|
|
595
|
+
>>> options = LawkitOptions(samples=500, fraud_rate=0.1, range="1,10000")
|
|
596
|
+
>>> normal_data = generate_data('normal', options)
|
|
597
|
+
>>> pareto_data = generate_data('pareto', LawkitOptions(concentration=0.8))
|
|
473
598
|
"""
|
|
474
|
-
|
|
599
|
+
if options is None:
|
|
600
|
+
options = LawkitOptions()
|
|
475
601
|
|
|
476
|
-
|
|
477
|
-
args.extend(["--seed", str(seed)])
|
|
602
|
+
args = ["generate", law_type]
|
|
478
603
|
|
|
479
|
-
# Add
|
|
604
|
+
# Add common options
|
|
605
|
+
_add_common_options(args, options)
|
|
606
|
+
|
|
607
|
+
# Add generate-specific options
|
|
608
|
+
if options.samples is not None:
|
|
609
|
+
args.extend(["--samples", str(options.samples)])
|
|
610
|
+
|
|
611
|
+
if options.seed is not None:
|
|
612
|
+
args.extend(["--seed", str(options.seed)])
|
|
613
|
+
|
|
614
|
+
if options.output_file:
|
|
615
|
+
args.extend(["--output-file", options.output_file])
|
|
616
|
+
|
|
617
|
+
if options.fraud_rate is not None:
|
|
618
|
+
args.extend(["--fraud-rate", str(options.fraud_rate)])
|
|
619
|
+
|
|
620
|
+
# Note: --range option not available in current CLI
|
|
621
|
+
|
|
622
|
+
# Note: --scale option may not be available for all law types
|
|
623
|
+
|
|
624
|
+
# Add law-specific parameters (backward compatibility)
|
|
480
625
|
for key, value in kwargs.items():
|
|
481
626
|
key_formatted = key.replace("_", "-")
|
|
482
627
|
args.extend([f"--{key_formatted}", str(value)])
|
|
@@ -539,29 +684,52 @@ def _add_common_options(args: List[str], options: LawkitOptions) -> None:
|
|
|
539
684
|
if options.format:
|
|
540
685
|
args.extend(["--format", options.format])
|
|
541
686
|
|
|
542
|
-
if options.
|
|
543
|
-
args.
|
|
687
|
+
if options.quiet:
|
|
688
|
+
args.append("--quiet")
|
|
689
|
+
|
|
690
|
+
if options.verbose:
|
|
691
|
+
args.append("--verbose")
|
|
692
|
+
|
|
693
|
+
if options.filter:
|
|
694
|
+
args.extend(["--filter", options.filter])
|
|
544
695
|
|
|
545
696
|
if options.min_count is not None:
|
|
546
697
|
args.extend(["--min-count", str(options.min_count)])
|
|
547
698
|
|
|
699
|
+
if options.optimize:
|
|
700
|
+
args.append("--optimize")
|
|
701
|
+
|
|
702
|
+
# Integration-specific options
|
|
703
|
+
if options.laws:
|
|
704
|
+
args.extend(["--laws", options.laws])
|
|
705
|
+
|
|
706
|
+
if options.focus:
|
|
707
|
+
args.extend(["--focus", options.focus])
|
|
708
|
+
|
|
548
709
|
if options.threshold is not None:
|
|
549
710
|
args.extend(["--threshold", str(options.threshold)])
|
|
550
711
|
|
|
551
|
-
if options.
|
|
552
|
-
args.
|
|
712
|
+
if options.recommend:
|
|
713
|
+
args.append("--recommend")
|
|
553
714
|
|
|
554
|
-
if options.
|
|
555
|
-
args.extend(["--
|
|
715
|
+
if options.report:
|
|
716
|
+
args.extend(["--report", options.report])
|
|
556
717
|
|
|
557
|
-
if options.
|
|
558
|
-
args.append("--
|
|
718
|
+
if options.consistency_check:
|
|
719
|
+
args.append("--consistency-check")
|
|
559
720
|
|
|
560
|
-
if options.
|
|
561
|
-
args.append("--
|
|
721
|
+
if options.cross_validation:
|
|
722
|
+
args.append("--cross-validation")
|
|
723
|
+
|
|
724
|
+
if options.confidence_level is not None:
|
|
725
|
+
args.extend(["--confidence-level", str(options.confidence_level)])
|
|
726
|
+
|
|
727
|
+
if options.purpose:
|
|
728
|
+
args.extend(["--purpose", options.purpose])
|
|
562
729
|
|
|
563
|
-
|
|
564
|
-
|
|
730
|
+
# Advanced options
|
|
731
|
+
if options.alpha is not None:
|
|
732
|
+
args.extend(["--alpha", str(options.alpha)])
|
|
565
733
|
|
|
566
734
|
if options.time_series:
|
|
567
735
|
args.append("--time-series")
|
|
File without changes
|
|
File without changes
|