lawkit-python 2.1.1__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/PKG-INFO +29 -14
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/README.md +27 -10
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/pyproject.toml +4 -5
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/src/lawkit/__init__.py +7 -5
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/src/lawkit/lawkit.py +216 -48
- lawkit_python-2.1.1/src/lawkit/compat.py +0 -204
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/.gitignore +0 -0
- {lawkit_python-2.1.1 → lawkit_python-2.3.0}/src/lawkit/installer.py +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: lawkit-python
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment
|
|
5
5
|
Project-URL: Homepage, https://github.com/kako-jun/lawkit
|
|
6
6
|
Project-URL: Repository, https://github.com/kako-jun/lawkit
|
|
7
7
|
Project-URL: Issues, https://github.com/kako-jun/lawkit/issues
|
|
8
8
|
Project-URL: Documentation, https://github.com/kako-jun/lawkit/tree/main/docs
|
|
9
|
-
Author
|
|
9
|
+
Author: kako-jun
|
|
10
10
|
License-Expression: MIT
|
|
11
11
|
Keywords: anomaly-detection,audit,benford,compliance,data-quality,forensic-accounting,fraud-detection,normal,outlier-detection,pareto,poisson,statistical-analysis,statistics,zipf
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
@@ -31,11 +31,9 @@ Provides-Extra: dev
|
|
|
31
31
|
Requires-Dist: black; extra == 'dev'
|
|
32
32
|
Requires-Dist: isort; extra == 'dev'
|
|
33
33
|
Requires-Dist: mypy; extra == 'dev'
|
|
34
|
-
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
35
34
|
Requires-Dist: pytest-cov; extra == 'dev'
|
|
36
35
|
Requires-Dist: pytest>=6.0; extra == 'dev'
|
|
37
36
|
Requires-Dist: ruff; extra == 'dev'
|
|
38
|
-
Requires-Dist: types-requests; extra == 'dev'
|
|
39
37
|
Description-Content-Type: text/markdown
|
|
40
38
|
|
|
41
39
|
# lawkit-python
|
|
@@ -62,7 +60,7 @@ print(result)
|
|
|
62
60
|
# Get structured JSON output
|
|
63
61
|
json_result = lawkit.analyze_benford(
|
|
64
62
|
'accounting.csv',
|
|
65
|
-
lawkit.LawkitOptions(format='
|
|
63
|
+
lawkit.LawkitOptions(format='json')
|
|
66
64
|
)
|
|
67
65
|
print(f"Risk level: {json_result.risk_level}")
|
|
68
66
|
print(f"P-value: {json_result.p_value}")
|
|
@@ -70,7 +68,7 @@ print(f"P-value: {json_result.p_value}")
|
|
|
70
68
|
# Check if data follows Pareto principle (80/20 rule)
|
|
71
69
|
pareto_result = lawkit.analyze_pareto(
|
|
72
70
|
'sales_data.csv',
|
|
73
|
-
lawkit.LawkitOptions(
|
|
71
|
+
lawkit.LawkitOptions(format='json', gini_coefficient=True)
|
|
74
72
|
)
|
|
75
73
|
print(f"Gini coefficient: {pareto_result.gini_coefficient}")
|
|
76
74
|
print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
@@ -159,13 +157,27 @@ if normal_result.p_value < 0.05:
|
|
|
159
157
|
if normal_result.outliers:
|
|
160
158
|
print(f"Found {len(normal_result.outliers)} outliers")
|
|
161
159
|
|
|
162
|
-
# Multi-law
|
|
163
|
-
|
|
160
|
+
# Multi-law analysis
|
|
161
|
+
analysis = lawkit.analyze_laws(
|
|
164
162
|
'complex_dataset.csv',
|
|
165
|
-
lawkit.LawkitOptions(
|
|
163
|
+
lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
|
|
166
164
|
)
|
|
167
|
-
print(f"
|
|
168
|
-
print(f"Overall risk level: {
|
|
165
|
+
print(f"Analysis results: {analysis.data}")
|
|
166
|
+
print(f"Overall risk level: {analysis.risk_level}")
|
|
167
|
+
|
|
168
|
+
# Data validation
|
|
169
|
+
validation = lawkit.validate_laws(
|
|
170
|
+
'complex_dataset.csv',
|
|
171
|
+
lawkit.LawkitOptions(format='json', consistency_check=True)
|
|
172
|
+
)
|
|
173
|
+
print(f"Validation status: {validation.data}")
|
|
174
|
+
|
|
175
|
+
# Conflict diagnosis
|
|
176
|
+
diagnosis = lawkit.diagnose_laws(
|
|
177
|
+
'complex_dataset.csv',
|
|
178
|
+
lawkit.LawkitOptions(format='json', report='detailed')
|
|
179
|
+
)
|
|
180
|
+
print(f"Diagnosis: {diagnosis.data}")
|
|
169
181
|
```
|
|
170
182
|
|
|
171
183
|
### Generate Sample Data
|
|
@@ -205,7 +217,7 @@ csv_data = """amount
|
|
|
205
217
|
result = lawkit.analyze_string(
|
|
206
218
|
csv_data,
|
|
207
219
|
'benf',
|
|
208
|
-
lawkit.LawkitOptions(format='
|
|
220
|
+
lawkit.LawkitOptions(format='json')
|
|
209
221
|
)
|
|
210
222
|
print(f"Risk assessment: {result.risk_level}")
|
|
211
223
|
|
|
@@ -214,7 +226,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
|
|
|
214
226
|
result = lawkit.analyze_string(
|
|
215
227
|
json_data,
|
|
216
228
|
'normal',
|
|
217
|
-
lawkit.LawkitOptions(format='json'
|
|
229
|
+
lawkit.LawkitOptions(format='json')
|
|
218
230
|
)
|
|
219
231
|
print(f"Is normal: {result.p_value > 0.05}")
|
|
220
232
|
```
|
|
@@ -399,7 +411,10 @@ print(f"Power law exponent: {result.exponent:.3f}")
|
|
|
399
411
|
- `analyze_zipf(input_data, options)` - Zipf Law analysis
|
|
400
412
|
- `analyze_normal(input_data, options)` - Normal distribution analysis
|
|
401
413
|
- `analyze_poisson(input_data, options)` - Poisson distribution analysis
|
|
402
|
-
- `
|
|
414
|
+
- `analyze_laws(input_data, options)` - Multi-law analysis
|
|
415
|
+
- `validate_laws(input_data, options)` - Data validation and consistency check
|
|
416
|
+
- `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
|
|
417
|
+
- `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
|
|
403
418
|
- `generate_data(law_type, samples, **kwargs)` - Generate sample data
|
|
404
419
|
- `analyze_string(content, law_type, options)` - Analyze string data directly
|
|
405
420
|
|
|
@@ -22,7 +22,7 @@ print(result)
|
|
|
22
22
|
# Get structured JSON output
|
|
23
23
|
json_result = lawkit.analyze_benford(
|
|
24
24
|
'accounting.csv',
|
|
25
|
-
lawkit.LawkitOptions(format='
|
|
25
|
+
lawkit.LawkitOptions(format='json')
|
|
26
26
|
)
|
|
27
27
|
print(f"Risk level: {json_result.risk_level}")
|
|
28
28
|
print(f"P-value: {json_result.p_value}")
|
|
@@ -30,7 +30,7 @@ print(f"P-value: {json_result.p_value}")
|
|
|
30
30
|
# Check if data follows Pareto principle (80/20 rule)
|
|
31
31
|
pareto_result = lawkit.analyze_pareto(
|
|
32
32
|
'sales_data.csv',
|
|
33
|
-
lawkit.LawkitOptions(
|
|
33
|
+
lawkit.LawkitOptions(format='json', gini_coefficient=True)
|
|
34
34
|
)
|
|
35
35
|
print(f"Gini coefficient: {pareto_result.gini_coefficient}")
|
|
36
36
|
print(f"80/20 concentration: {pareto_result.concentration_80_20}")
|
|
@@ -119,13 +119,27 @@ if normal_result.p_value < 0.05:
|
|
|
119
119
|
if normal_result.outliers:
|
|
120
120
|
print(f"Found {len(normal_result.outliers)} outliers")
|
|
121
121
|
|
|
122
|
-
# Multi-law
|
|
123
|
-
|
|
122
|
+
# Multi-law analysis
|
|
123
|
+
analysis = lawkit.analyze_laws(
|
|
124
124
|
'complex_dataset.csv',
|
|
125
|
-
lawkit.LawkitOptions(
|
|
125
|
+
lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
|
|
126
126
|
)
|
|
127
|
-
print(f"
|
|
128
|
-
print(f"Overall risk level: {
|
|
127
|
+
print(f"Analysis results: {analysis.data}")
|
|
128
|
+
print(f"Overall risk level: {analysis.risk_level}")
|
|
129
|
+
|
|
130
|
+
# Data validation
|
|
131
|
+
validation = lawkit.validate_laws(
|
|
132
|
+
'complex_dataset.csv',
|
|
133
|
+
lawkit.LawkitOptions(format='json', consistency_check=True)
|
|
134
|
+
)
|
|
135
|
+
print(f"Validation status: {validation.data}")
|
|
136
|
+
|
|
137
|
+
# Conflict diagnosis
|
|
138
|
+
diagnosis = lawkit.diagnose_laws(
|
|
139
|
+
'complex_dataset.csv',
|
|
140
|
+
lawkit.LawkitOptions(format='json', report='detailed')
|
|
141
|
+
)
|
|
142
|
+
print(f"Diagnosis: {diagnosis.data}")
|
|
129
143
|
```
|
|
130
144
|
|
|
131
145
|
### Generate Sample Data
|
|
@@ -165,7 +179,7 @@ csv_data = """amount
|
|
|
165
179
|
result = lawkit.analyze_string(
|
|
166
180
|
csv_data,
|
|
167
181
|
'benf',
|
|
168
|
-
lawkit.LawkitOptions(format='
|
|
182
|
+
lawkit.LawkitOptions(format='json')
|
|
169
183
|
)
|
|
170
184
|
print(f"Risk assessment: {result.risk_level}")
|
|
171
185
|
|
|
@@ -174,7 +188,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
|
|
|
174
188
|
result = lawkit.analyze_string(
|
|
175
189
|
json_data,
|
|
176
190
|
'normal',
|
|
177
|
-
lawkit.LawkitOptions(format='json'
|
|
191
|
+
lawkit.LawkitOptions(format='json')
|
|
178
192
|
)
|
|
179
193
|
print(f"Is normal: {result.p_value > 0.05}")
|
|
180
194
|
```
|
|
@@ -359,7 +373,10 @@ print(f"Power law exponent: {result.exponent:.3f}")
|
|
|
359
373
|
- `analyze_zipf(input_data, options)` - Zipf Law analysis
|
|
360
374
|
- `analyze_normal(input_data, options)` - Normal distribution analysis
|
|
361
375
|
- `analyze_poisson(input_data, options)` - Poisson distribution analysis
|
|
362
|
-
- `
|
|
376
|
+
- `analyze_laws(input_data, options)` - Multi-law analysis
|
|
377
|
+
- `validate_laws(input_data, options)` - Data validation and consistency check
|
|
378
|
+
- `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
|
|
379
|
+
- `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
|
|
363
380
|
- `generate_data(law_type, samples, **kwargs)` - Generate sample data
|
|
364
381
|
- `analyze_string(content, law_type, options)` - Analyze string data directly
|
|
365
382
|
|
|
@@ -4,12 +4,12 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "lawkit-python"
|
|
7
|
-
version = "2.
|
|
7
|
+
version = "2.3.0"
|
|
8
8
|
description = "Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
11
11
|
authors = [
|
|
12
|
-
{ name = "kako-jun"
|
|
12
|
+
{ name = "kako-jun" }
|
|
13
13
|
]
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 4 - Beta",
|
|
@@ -59,12 +59,10 @@ Documentation = "https://github.com/kako-jun/lawkit/tree/main/docs"
|
|
|
59
59
|
dev = [
|
|
60
60
|
"pytest >= 6.0",
|
|
61
61
|
"pytest-cov",
|
|
62
|
-
"pytest-asyncio",
|
|
63
62
|
"black",
|
|
64
63
|
"isort",
|
|
65
64
|
"mypy",
|
|
66
|
-
"ruff"
|
|
67
|
-
"types-requests"
|
|
65
|
+
"ruff"
|
|
68
66
|
]
|
|
69
67
|
|
|
70
68
|
[project.scripts]
|
|
@@ -76,6 +74,7 @@ packages = ["src/lawkit"]
|
|
|
76
74
|
[tool.hatch.build.targets.sdist]
|
|
77
75
|
include = [
|
|
78
76
|
"/src",
|
|
77
|
+
"/scripts",
|
|
79
78
|
"/README.md"
|
|
80
79
|
]
|
|
81
80
|
|
|
@@ -13,6 +13,9 @@ from .lawkit import (
|
|
|
13
13
|
analyze_zipf,
|
|
14
14
|
analyze_normal,
|
|
15
15
|
analyze_poisson,
|
|
16
|
+
analyze_laws,
|
|
17
|
+
validate_laws,
|
|
18
|
+
diagnose_laws,
|
|
16
19
|
compare_laws,
|
|
17
20
|
generate_data,
|
|
18
21
|
analyze_string,
|
|
@@ -27,10 +30,8 @@ from .lawkit import (
|
|
|
27
30
|
LawType,
|
|
28
31
|
)
|
|
29
32
|
|
|
30
|
-
# For backward compatibility and convenience
|
|
31
|
-
from .compat import run_lawkit
|
|
32
33
|
|
|
33
|
-
__version__ = "2.
|
|
34
|
+
__version__ = "2.3.0"
|
|
34
35
|
__all__ = [
|
|
35
36
|
# Main analysis functions
|
|
36
37
|
"analyze_benford",
|
|
@@ -38,6 +39,9 @@ __all__ = [
|
|
|
38
39
|
"analyze_zipf",
|
|
39
40
|
"analyze_normal",
|
|
40
41
|
"analyze_poisson",
|
|
42
|
+
"analyze_laws",
|
|
43
|
+
"validate_laws",
|
|
44
|
+
"diagnose_laws",
|
|
41
45
|
"compare_laws",
|
|
42
46
|
|
|
43
47
|
# Utility functions
|
|
@@ -55,6 +59,4 @@ __all__ = [
|
|
|
55
59
|
"OutputFormat",
|
|
56
60
|
"LawType",
|
|
57
61
|
|
|
58
|
-
# Backward compatibility
|
|
59
|
-
"run_lawkit",
|
|
60
62
|
]
|
|
@@ -22,20 +22,44 @@ LawType = Literal["benf", "pareto", "zipf", "normal", "poisson"]
|
|
|
22
22
|
class LawkitOptions:
|
|
23
23
|
"""Options for lawkit operations"""
|
|
24
24
|
format: Optional[Format] = None
|
|
25
|
-
|
|
26
|
-
min_count: Optional[int] = None
|
|
27
|
-
threshold: Optional[float] = None
|
|
28
|
-
confidence: Optional[float] = None
|
|
25
|
+
quiet: bool = False
|
|
29
26
|
verbose: bool = False
|
|
27
|
+
filter: Optional[str] = None
|
|
28
|
+
min_count: Optional[int] = None
|
|
30
29
|
optimize: bool = False
|
|
31
|
-
|
|
30
|
+
|
|
31
|
+
# Integration-specific options
|
|
32
|
+
laws: Optional[str] = None # "benf,pareto,zipf,normal,poisson"
|
|
33
|
+
focus: Optional[str] = None # "quality", "concentration", "distribution", "anomaly"
|
|
34
|
+
threshold: Optional[float] = None # Conflict detection threshold
|
|
35
|
+
recommend: bool = False
|
|
36
|
+
report: Optional[str] = None # "summary", "detailed", "conflicting"
|
|
37
|
+
consistency_check: bool = False
|
|
38
|
+
cross_validation: bool = False
|
|
39
|
+
confidence_level: Optional[float] = None
|
|
40
|
+
purpose: Optional[str] = None # "quality", "fraud", "concentration", "anomaly", "distribution", "general"
|
|
41
|
+
|
|
32
42
|
# Law-specific options
|
|
33
43
|
gini_coefficient: bool = False
|
|
34
44
|
percentiles: Optional[str] = None
|
|
35
45
|
business_analysis: bool = False
|
|
46
|
+
concentration: Optional[float] = None
|
|
47
|
+
|
|
48
|
+
# Benford-specific options
|
|
49
|
+
threshold_level: Optional[str] = None # "low", "medium", "high", "critical", "auto"
|
|
50
|
+
|
|
51
|
+
# Generate-specific options
|
|
52
|
+
samples: Optional[int] = None
|
|
53
|
+
seed: Optional[int] = None
|
|
54
|
+
output_file: Optional[str] = None
|
|
55
|
+
fraud_rate: Optional[float] = None
|
|
56
|
+
range: Optional[str] = None # "1,100000"
|
|
57
|
+
scale: Optional[float] = None
|
|
58
|
+
|
|
36
59
|
# Statistical options
|
|
37
60
|
test_type: Optional[str] = None
|
|
38
61
|
alpha: Optional[float] = None
|
|
62
|
+
|
|
39
63
|
# Advanced options
|
|
40
64
|
outlier_detection: bool = False
|
|
41
65
|
time_series: bool = False
|
|
@@ -144,14 +168,17 @@ def _execute_lawkit(args: List[str]) -> tuple[str, str]:
|
|
|
144
168
|
[lawkit_path] + args,
|
|
145
169
|
capture_output=True,
|
|
146
170
|
text=True,
|
|
147
|
-
check=
|
|
171
|
+
check=False # Don't raise exception on non-zero exit
|
|
148
172
|
)
|
|
149
|
-
|
|
150
|
-
|
|
173
|
+
|
|
174
|
+
# Exit codes 10-19 are typically warnings, not fatal errors
|
|
175
|
+
if result.returncode == 0 or (result.returncode >= 10 and result.returncode <= 19):
|
|
176
|
+
return result.stdout, result.stderr
|
|
177
|
+
|
|
151
178
|
raise LawkitError(
|
|
152
|
-
f"lawkit exited with code {
|
|
153
|
-
|
|
154
|
-
|
|
179
|
+
f"lawkit exited with code {result.returncode}",
|
|
180
|
+
result.returncode,
|
|
181
|
+
result.stderr or ""
|
|
155
182
|
)
|
|
156
183
|
except FileNotFoundError:
|
|
157
184
|
raise LawkitError(
|
|
@@ -192,10 +219,14 @@ def analyze_benford(
|
|
|
192
219
|
# Add common options
|
|
193
220
|
_add_common_options(args, options)
|
|
194
221
|
|
|
222
|
+
# Add Benford-specific options
|
|
223
|
+
if options.threshold_level:
|
|
224
|
+
args.extend(["--threshold", options.threshold_level])
|
|
225
|
+
|
|
195
226
|
stdout, stderr = _execute_lawkit(args)
|
|
196
227
|
|
|
197
228
|
# If output format is JSON, parse the result
|
|
198
|
-
if options.
|
|
229
|
+
if options.format == "json":
|
|
199
230
|
try:
|
|
200
231
|
json_data = json.loads(stdout)
|
|
201
232
|
return LawkitResult(json_data, "benford")
|
|
@@ -238,6 +269,9 @@ def analyze_pareto(
|
|
|
238
269
|
_add_common_options(args, options)
|
|
239
270
|
|
|
240
271
|
# Add Pareto-specific options
|
|
272
|
+
if options.concentration is not None:
|
|
273
|
+
args.extend(["--concentration", str(options.concentration)])
|
|
274
|
+
|
|
241
275
|
if options.gini_coefficient:
|
|
242
276
|
args.append("--gini-coefficient")
|
|
243
277
|
|
|
@@ -250,7 +284,7 @@ def analyze_pareto(
|
|
|
250
284
|
stdout, stderr = _execute_lawkit(args)
|
|
251
285
|
|
|
252
286
|
# If output format is JSON, parse the result
|
|
253
|
-
if options.
|
|
287
|
+
if options.format == "json":
|
|
254
288
|
try:
|
|
255
289
|
json_data = json.loads(stdout)
|
|
256
290
|
return LawkitResult(json_data, "pareto")
|
|
@@ -294,7 +328,7 @@ def analyze_zipf(
|
|
|
294
328
|
stdout, stderr = _execute_lawkit(args)
|
|
295
329
|
|
|
296
330
|
# If output format is JSON, parse the result
|
|
297
|
-
if options.
|
|
331
|
+
if options.format == "json":
|
|
298
332
|
try:
|
|
299
333
|
json_data = json.loads(stdout)
|
|
300
334
|
return LawkitResult(json_data, "zipf")
|
|
@@ -347,7 +381,7 @@ def analyze_normal(
|
|
|
347
381
|
stdout, stderr = _execute_lawkit(args)
|
|
348
382
|
|
|
349
383
|
# If output format is JSON, parse the result
|
|
350
|
-
if options.
|
|
384
|
+
if options.format == "json":
|
|
351
385
|
try:
|
|
352
386
|
json_data = json.loads(stdout)
|
|
353
387
|
return LawkitResult(json_data, "normal")
|
|
@@ -391,7 +425,7 @@ def analyze_poisson(
|
|
|
391
425
|
stdout, stderr = _execute_lawkit(args)
|
|
392
426
|
|
|
393
427
|
# If output format is JSON, parse the result
|
|
394
|
-
if options.
|
|
428
|
+
if options.format == "json":
|
|
395
429
|
try:
|
|
396
430
|
json_data = json.loads(stdout)
|
|
397
431
|
return LawkitResult(json_data, "poisson")
|
|
@@ -402,12 +436,12 @@ def analyze_poisson(
|
|
|
402
436
|
return stdout
|
|
403
437
|
|
|
404
438
|
|
|
405
|
-
def
|
|
439
|
+
def analyze_laws(
|
|
406
440
|
input_data: str,
|
|
407
441
|
options: Optional[LawkitOptions] = None
|
|
408
442
|
) -> Union[str, LawkitResult]:
|
|
409
443
|
"""
|
|
410
|
-
|
|
444
|
+
Analyze data using multiple statistical laws (basic analysis)
|
|
411
445
|
|
|
412
446
|
Args:
|
|
413
447
|
input_data: Path to input file or '-' for stdin
|
|
@@ -417,17 +451,105 @@ def compare_laws(
|
|
|
417
451
|
String output for text format, or LawkitResult for JSON format
|
|
418
452
|
|
|
419
453
|
Examples:
|
|
420
|
-
>>> result =
|
|
454
|
+
>>> result = analyze_laws('dataset.csv')
|
|
421
455
|
>>> print(result)
|
|
422
456
|
|
|
423
|
-
>>> json_result =
|
|
424
|
-
... LawkitOptions(
|
|
457
|
+
>>> json_result = analyze_laws('complex_data.json',
|
|
458
|
+
... LawkitOptions(format='json'))
|
|
425
459
|
>>> print(f"Risk level: {json_result.risk_level}")
|
|
426
460
|
"""
|
|
427
461
|
if options is None:
|
|
428
462
|
options = LawkitOptions()
|
|
429
463
|
|
|
430
|
-
args = ["
|
|
464
|
+
args = ["analyze", input_data]
|
|
465
|
+
|
|
466
|
+
# Add common options
|
|
467
|
+
_add_common_options(args, options)
|
|
468
|
+
|
|
469
|
+
stdout, stderr = _execute_lawkit(args)
|
|
470
|
+
|
|
471
|
+
# If output format is JSON, parse the result
|
|
472
|
+
if options.format == "json":
|
|
473
|
+
try:
|
|
474
|
+
json_data = json.loads(stdout)
|
|
475
|
+
return LawkitResult(json_data, "analyze")
|
|
476
|
+
except json.JSONDecodeError as e:
|
|
477
|
+
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
478
|
+
|
|
479
|
+
# Return raw output for other formats
|
|
480
|
+
return stdout
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
def validate_laws(
|
|
484
|
+
input_data: str,
|
|
485
|
+
options: Optional[LawkitOptions] = None
|
|
486
|
+
) -> Union[str, LawkitResult]:
|
|
487
|
+
"""
|
|
488
|
+
Validate data consistency using multiple statistical laws
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
input_data: Path to input file or '-' for stdin
|
|
492
|
+
options: Analysis options
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
String output for text format, or LawkitResult for JSON format
|
|
496
|
+
|
|
497
|
+
Examples:
|
|
498
|
+
>>> result = validate_laws('dataset.csv')
|
|
499
|
+
>>> print(result)
|
|
500
|
+
|
|
501
|
+
>>> json_result = validate_laws('complex_data.json',
|
|
502
|
+
... LawkitOptions(format='json'))
|
|
503
|
+
>>> print(f"Validation result: {json_result.data}")
|
|
504
|
+
"""
|
|
505
|
+
if options is None:
|
|
506
|
+
options = LawkitOptions()
|
|
507
|
+
|
|
508
|
+
args = ["validate", input_data]
|
|
509
|
+
|
|
510
|
+
# Add common options
|
|
511
|
+
_add_common_options(args, options)
|
|
512
|
+
|
|
513
|
+
stdout, stderr = _execute_lawkit(args)
|
|
514
|
+
|
|
515
|
+
# If output format is JSON, parse the result
|
|
516
|
+
if options.format == "json":
|
|
517
|
+
try:
|
|
518
|
+
json_data = json.loads(stdout)
|
|
519
|
+
return LawkitResult(json_data, "validate")
|
|
520
|
+
except json.JSONDecodeError as e:
|
|
521
|
+
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
522
|
+
|
|
523
|
+
# Return raw output for other formats
|
|
524
|
+
return stdout
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def diagnose_laws(
|
|
528
|
+
input_data: str,
|
|
529
|
+
options: Optional[LawkitOptions] = None
|
|
530
|
+
) -> Union[str, LawkitResult]:
|
|
531
|
+
"""
|
|
532
|
+
Diagnose conflicts and generate detailed analysis report
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
input_data: Path to input file or '-' for stdin
|
|
536
|
+
options: Analysis options
|
|
537
|
+
|
|
538
|
+
Returns:
|
|
539
|
+
String output for text format, or LawkitResult for JSON format
|
|
540
|
+
|
|
541
|
+
Examples:
|
|
542
|
+
>>> result = diagnose_laws('dataset.csv')
|
|
543
|
+
>>> print(result)
|
|
544
|
+
|
|
545
|
+
>>> json_result = diagnose_laws('complex_data.json',
|
|
546
|
+
... LawkitOptions(format='json'))
|
|
547
|
+
>>> print(f"Diagnosis: {json_result.data}")
|
|
548
|
+
"""
|
|
549
|
+
if options is None:
|
|
550
|
+
options = LawkitOptions()
|
|
551
|
+
|
|
552
|
+
args = ["diagnose", input_data]
|
|
431
553
|
|
|
432
554
|
# Add common options
|
|
433
555
|
_add_common_options(args, options)
|
|
@@ -435,10 +557,10 @@ def compare_laws(
|
|
|
435
557
|
stdout, stderr = _execute_lawkit(args)
|
|
436
558
|
|
|
437
559
|
# If output format is JSON, parse the result
|
|
438
|
-
if options.
|
|
560
|
+
if options.format == "json":
|
|
439
561
|
try:
|
|
440
562
|
json_data = json.loads(stdout)
|
|
441
|
-
return LawkitResult(json_data, "
|
|
563
|
+
return LawkitResult(json_data, "diagnose")
|
|
442
564
|
except json.JSONDecodeError as e:
|
|
443
565
|
raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
|
|
444
566
|
|
|
@@ -446,10 +568,13 @@ def compare_laws(
|
|
|
446
568
|
return stdout
|
|
447
569
|
|
|
448
570
|
|
|
571
|
+
# Backward compatibility alias
|
|
572
|
+
compare_laws = analyze_laws
|
|
573
|
+
|
|
574
|
+
|
|
449
575
|
def generate_data(
|
|
450
576
|
law_type: LawType,
|
|
451
|
-
|
|
452
|
-
seed: Optional[int] = None,
|
|
577
|
+
options: Optional[LawkitOptions] = None,
|
|
453
578
|
**kwargs
|
|
454
579
|
) -> str:
|
|
455
580
|
"""
|
|
@@ -457,26 +582,46 @@ def generate_data(
|
|
|
457
582
|
|
|
458
583
|
Args:
|
|
459
584
|
law_type: Type of statistical law to use
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
**kwargs: Law-specific parameters
|
|
585
|
+
options: Generation options (samples, seed, etc.)
|
|
586
|
+
**kwargs: Law-specific parameters (deprecated, use options instead)
|
|
463
587
|
|
|
464
588
|
Returns:
|
|
465
589
|
Generated data as string
|
|
466
590
|
|
|
467
591
|
Examples:
|
|
468
|
-
>>> data = generate_data('benf', samples=1000, seed=42)
|
|
592
|
+
>>> data = generate_data('benf', LawkitOptions(samples=1000, seed=42))
|
|
469
593
|
>>> print(data)
|
|
470
594
|
|
|
471
|
-
>>>
|
|
472
|
-
>>>
|
|
595
|
+
>>> options = LawkitOptions(samples=500, fraud_rate=0.1, range="1,10000")
|
|
596
|
+
>>> normal_data = generate_data('normal', options)
|
|
597
|
+
>>> pareto_data = generate_data('pareto', LawkitOptions(concentration=0.8))
|
|
473
598
|
"""
|
|
474
|
-
|
|
599
|
+
if options is None:
|
|
600
|
+
options = LawkitOptions()
|
|
475
601
|
|
|
476
|
-
|
|
477
|
-
args.extend(["--seed", str(seed)])
|
|
602
|
+
args = ["generate", law_type]
|
|
478
603
|
|
|
479
|
-
# Add
|
|
604
|
+
# Add common options
|
|
605
|
+
_add_common_options(args, options)
|
|
606
|
+
|
|
607
|
+
# Add generate-specific options
|
|
608
|
+
if options.samples is not None:
|
|
609
|
+
args.extend(["--samples", str(options.samples)])
|
|
610
|
+
|
|
611
|
+
if options.seed is not None:
|
|
612
|
+
args.extend(["--seed", str(options.seed)])
|
|
613
|
+
|
|
614
|
+
if options.output_file:
|
|
615
|
+
args.extend(["--output-file", options.output_file])
|
|
616
|
+
|
|
617
|
+
if options.fraud_rate is not None:
|
|
618
|
+
args.extend(["--fraud-rate", str(options.fraud_rate)])
|
|
619
|
+
|
|
620
|
+
# Note: --range option not available in current CLI
|
|
621
|
+
|
|
622
|
+
# Note: --scale option may not be available for all law types
|
|
623
|
+
|
|
624
|
+
# Add law-specific parameters (backward compatibility)
|
|
480
625
|
for key, value in kwargs.items():
|
|
481
626
|
key_formatted = key.replace("_", "-")
|
|
482
627
|
args.extend([f"--{key_formatted}", str(value)])
|
|
@@ -539,29 +684,52 @@ def _add_common_options(args: List[str], options: LawkitOptions) -> None:
|
|
|
539
684
|
if options.format:
|
|
540
685
|
args.extend(["--format", options.format])
|
|
541
686
|
|
|
542
|
-
if options.
|
|
543
|
-
args.
|
|
687
|
+
if options.quiet:
|
|
688
|
+
args.append("--quiet")
|
|
689
|
+
|
|
690
|
+
if options.verbose:
|
|
691
|
+
args.append("--verbose")
|
|
692
|
+
|
|
693
|
+
if options.filter:
|
|
694
|
+
args.extend(["--filter", options.filter])
|
|
544
695
|
|
|
545
696
|
if options.min_count is not None:
|
|
546
697
|
args.extend(["--min-count", str(options.min_count)])
|
|
547
698
|
|
|
699
|
+
if options.optimize:
|
|
700
|
+
args.append("--optimize")
|
|
701
|
+
|
|
702
|
+
# Integration-specific options
|
|
703
|
+
if options.laws:
|
|
704
|
+
args.extend(["--laws", options.laws])
|
|
705
|
+
|
|
706
|
+
if options.focus:
|
|
707
|
+
args.extend(["--focus", options.focus])
|
|
708
|
+
|
|
548
709
|
if options.threshold is not None:
|
|
549
710
|
args.extend(["--threshold", str(options.threshold)])
|
|
550
711
|
|
|
551
|
-
if options.
|
|
552
|
-
args.
|
|
712
|
+
if options.recommend:
|
|
713
|
+
args.append("--recommend")
|
|
553
714
|
|
|
554
|
-
if options.
|
|
555
|
-
args.extend(["--
|
|
715
|
+
if options.report:
|
|
716
|
+
args.extend(["--report", options.report])
|
|
556
717
|
|
|
557
|
-
if options.
|
|
558
|
-
args.append("--
|
|
718
|
+
if options.consistency_check:
|
|
719
|
+
args.append("--consistency-check")
|
|
559
720
|
|
|
560
|
-
if options.
|
|
561
|
-
args.append("--
|
|
721
|
+
if options.cross_validation:
|
|
722
|
+
args.append("--cross-validation")
|
|
723
|
+
|
|
724
|
+
if options.confidence_level is not None:
|
|
725
|
+
args.extend(["--confidence-level", str(options.confidence_level)])
|
|
726
|
+
|
|
727
|
+
if options.purpose:
|
|
728
|
+
args.extend(["--purpose", options.purpose])
|
|
562
729
|
|
|
563
|
-
|
|
564
|
-
|
|
730
|
+
# Advanced options
|
|
731
|
+
if options.alpha is not None:
|
|
732
|
+
args.extend(["--alpha", str(options.alpha)])
|
|
565
733
|
|
|
566
734
|
if options.time_series:
|
|
567
735
|
args.append("--time-series")
|
|
@@ -1,204 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Backward compatibility module for lawkit-python
|
|
3
|
-
|
|
4
|
-
This module provides compatibility functions for users migrating from
|
|
5
|
-
other statistical analysis tools or expecting different API patterns.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import subprocess
|
|
9
|
-
import platform
|
|
10
|
-
from pathlib import Path
|
|
11
|
-
from typing import List, Union
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class LawkitProcess:
|
|
15
|
-
"""Compatibility class that mimics subprocess.CompletedProcess"""
|
|
16
|
-
def __init__(self, returncode: int, stdout: str, stderr: str):
|
|
17
|
-
self.returncode = returncode
|
|
18
|
-
self.stdout = stdout
|
|
19
|
-
self.stderr = stderr
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def run_lawkit(args: List[str], input_data: Union[str, None] = None) -> LawkitProcess:
|
|
23
|
-
"""
|
|
24
|
-
Run lawkit command with arguments (legacy compatibility function)
|
|
25
|
-
|
|
26
|
-
Args:
|
|
27
|
-
args: Command line arguments (without 'lawkit' prefix)
|
|
28
|
-
input_data: Optional input data to pass via stdin
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
LawkitProcess object with returncode, stdout, stderr
|
|
32
|
-
|
|
33
|
-
Examples:
|
|
34
|
-
>>> result = run_lawkit(["benf", "data.csv"])
|
|
35
|
-
>>> if result.returncode == 0:
|
|
36
|
-
... print("Analysis successful")
|
|
37
|
-
... print(result.stdout)
|
|
38
|
-
... else:
|
|
39
|
-
... print("Analysis failed")
|
|
40
|
-
... print(result.stderr)
|
|
41
|
-
|
|
42
|
-
>>> # With input data
|
|
43
|
-
>>> csv_data = "amount\\n123\\n456\\n789"
|
|
44
|
-
>>> result = run_lawkit(["benf", "-"], input_data=csv_data)
|
|
45
|
-
"""
|
|
46
|
-
# Get the path to the lawkit binary
|
|
47
|
-
package_dir = Path(__file__).parent.parent.parent
|
|
48
|
-
binary_name = "lawkit.exe" if platform.system() == "Windows" else "lawkit"
|
|
49
|
-
local_binary_path = package_dir / "bin" / binary_name
|
|
50
|
-
|
|
51
|
-
if local_binary_path.exists():
|
|
52
|
-
lawkit_path = str(local_binary_path)
|
|
53
|
-
else:
|
|
54
|
-
lawkit_path = "lawkit"
|
|
55
|
-
|
|
56
|
-
try:
|
|
57
|
-
# Run the command
|
|
58
|
-
result = subprocess.run(
|
|
59
|
-
[lawkit_path] + args,
|
|
60
|
-
input=input_data,
|
|
61
|
-
capture_output=True,
|
|
62
|
-
text=True,
|
|
63
|
-
timeout=300 # 5 minute timeout
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
return LawkitProcess(
|
|
67
|
-
returncode=result.returncode,
|
|
68
|
-
stdout=result.stdout,
|
|
69
|
-
stderr=result.stderr
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
except subprocess.TimeoutExpired:
|
|
73
|
-
return LawkitProcess(
|
|
74
|
-
returncode=-1,
|
|
75
|
-
stdout="",
|
|
76
|
-
stderr="Command timed out after 5 minutes"
|
|
77
|
-
)
|
|
78
|
-
except FileNotFoundError:
|
|
79
|
-
return LawkitProcess(
|
|
80
|
-
returncode=-1,
|
|
81
|
-
stdout="",
|
|
82
|
-
stderr="lawkit command not found. Please install lawkit CLI tool."
|
|
83
|
-
)
|
|
84
|
-
except Exception as e:
|
|
85
|
-
return LawkitProcess(
|
|
86
|
-
returncode=-1,
|
|
87
|
-
stdout="",
|
|
88
|
-
stderr=f"Error running lawkit: {e}"
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def run_benford_analysis(file_path: str, **kwargs) -> LawkitProcess:
|
|
93
|
-
"""
|
|
94
|
-
Legacy function for Benford's Law analysis
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
file_path: Path to input file
|
|
98
|
-
**kwargs: Additional options (format, output, etc.)
|
|
99
|
-
|
|
100
|
-
Returns:
|
|
101
|
-
LawkitProcess object
|
|
102
|
-
|
|
103
|
-
Examples:
|
|
104
|
-
>>> result = run_benford_analysis("data.csv", format="csv", output="json")
|
|
105
|
-
"""
|
|
106
|
-
args = ["benf", file_path]
|
|
107
|
-
|
|
108
|
-
if "format" in kwargs:
|
|
109
|
-
args.extend(["--format", kwargs["format"]])
|
|
110
|
-
|
|
111
|
-
if "output" in kwargs:
|
|
112
|
-
args.extend(["--output", kwargs["output"]])
|
|
113
|
-
|
|
114
|
-
if "min_count" in kwargs:
|
|
115
|
-
args.extend(["--min-count", str(kwargs["min_count"])])
|
|
116
|
-
|
|
117
|
-
if "threshold" in kwargs:
|
|
118
|
-
args.extend(["--threshold", str(kwargs["threshold"])])
|
|
119
|
-
|
|
120
|
-
if kwargs.get("verbose", False):
|
|
121
|
-
args.append("--verbose")
|
|
122
|
-
|
|
123
|
-
if kwargs.get("optimize", False):
|
|
124
|
-
args.append("--optimize")
|
|
125
|
-
|
|
126
|
-
return run_lawkit(args)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
def run_pareto_analysis(file_path: str, **kwargs) -> LawkitProcess:
|
|
130
|
-
"""
|
|
131
|
-
Legacy function for Pareto principle analysis
|
|
132
|
-
|
|
133
|
-
Args:
|
|
134
|
-
file_path: Path to input file
|
|
135
|
-
**kwargs: Additional options
|
|
136
|
-
|
|
137
|
-
Returns:
|
|
138
|
-
LawkitProcess object
|
|
139
|
-
|
|
140
|
-
Examples:
|
|
141
|
-
>>> result = run_pareto_analysis("sales.csv", gini_coefficient=True)
|
|
142
|
-
"""
|
|
143
|
-
args = ["pareto", file_path]
|
|
144
|
-
|
|
145
|
-
if "format" in kwargs:
|
|
146
|
-
args.extend(["--format", kwargs["format"]])
|
|
147
|
-
|
|
148
|
-
if "output" in kwargs:
|
|
149
|
-
args.extend(["--output", kwargs["output"]])
|
|
150
|
-
|
|
151
|
-
if kwargs.get("gini_coefficient", False):
|
|
152
|
-
args.append("--gini-coefficient")
|
|
153
|
-
|
|
154
|
-
if "percentiles" in kwargs:
|
|
155
|
-
args.extend(["--percentiles", kwargs["percentiles"]])
|
|
156
|
-
|
|
157
|
-
if kwargs.get("business_analysis", False):
|
|
158
|
-
args.append("--business-analysis")
|
|
159
|
-
|
|
160
|
-
if kwargs.get("verbose", False):
|
|
161
|
-
args.append("--verbose")
|
|
162
|
-
|
|
163
|
-
return run_lawkit(args)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def check_lawkit_installation() -> bool:
|
|
167
|
-
"""
|
|
168
|
-
Check if lawkit is properly installed
|
|
169
|
-
|
|
170
|
-
Returns:
|
|
171
|
-
True if lawkit is available, False otherwise
|
|
172
|
-
|
|
173
|
-
Examples:
|
|
174
|
-
>>> if not check_lawkit_installation():
|
|
175
|
-
... print("Please install lawkit first")
|
|
176
|
-
... exit(1)
|
|
177
|
-
"""
|
|
178
|
-
result = run_lawkit(["--version"])
|
|
179
|
-
return result.returncode == 0
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
def get_lawkit_help(subcommand: str = None) -> str:
|
|
183
|
-
"""
|
|
184
|
-
Get help text for lawkit or a specific subcommand
|
|
185
|
-
|
|
186
|
-
Args:
|
|
187
|
-
subcommand: Optional subcommand name
|
|
188
|
-
|
|
189
|
-
Returns:
|
|
190
|
-
Help text as string
|
|
191
|
-
|
|
192
|
-
Examples:
|
|
193
|
-
>>> help_text = get_lawkit_help()
|
|
194
|
-
>>> print(help_text)
|
|
195
|
-
|
|
196
|
-
>>> benf_help = get_lawkit_help("benf")
|
|
197
|
-
>>> print(benf_help)
|
|
198
|
-
"""
|
|
199
|
-
if subcommand:
|
|
200
|
-
result = run_lawkit([subcommand, "--help"])
|
|
201
|
-
else:
|
|
202
|
-
result = run_lawkit(["--help"])
|
|
203
|
-
|
|
204
|
-
return result.stdout if result.returncode == 0 else result.stderr
|
|
File without changes
|
|
File without changes
|