lawkit-python 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -116,4 +116,7 @@ secrets.json
116
116
  # Large test files
117
117
  *.csv.large
118
118
  *.xlsx.large
119
- *.json.large
119
+ *.json.large
120
+
121
+ # Test directories
122
+ test-packages/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: lawkit-python
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment
5
5
  Project-URL: Homepage, https://github.com/kako-jun/lawkit
6
6
  Project-URL: Repository, https://github.com/kako-jun/lawkit
@@ -55,14 +55,14 @@ This will automatically download the appropriate `lawkit` binary for your system
55
55
  ```python
56
56
  import lawkit
57
57
 
58
- # Analyze financial data with Benford's Law
58
+ # Analyze financial data with Benford Law
59
59
  result = lawkit.analyze_benford('financial_data.csv')
60
60
  print(result)
61
61
 
62
62
  # Get structured JSON output
63
63
  json_result = lawkit.analyze_benford(
64
64
  'accounting.csv',
65
- lawkit.LawkitOptions(format='csv', output='json')
65
+ lawkit.LawkitOptions(format='json')
66
66
  )
67
67
  print(f"Risk level: {json_result.risk_level}")
68
68
  print(f"P-value: {json_result.p_value}")
@@ -70,7 +70,7 @@ print(f"P-value: {json_result.p_value}")
70
70
  # Check if data follows Pareto principle (80/20 rule)
71
71
  pareto_result = lawkit.analyze_pareto(
72
72
  'sales_data.csv',
73
- lawkit.LawkitOptions(output='json', gini_coefficient=True)
73
+ lawkit.LawkitOptions(format='json', gini_coefficient=True)
74
74
  )
75
75
  print(f"Gini coefficient: {pareto_result.gini_coefficient}")
76
76
  print(f"80/20 concentration: {pareto_result.concentration_80_20}")
@@ -80,9 +80,9 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
80
80
 
81
81
  ### Statistical Laws Supported
82
82
 
83
- - **Benford's Law**: Detect fraud and anomalies in numerical data
83
+ - **Benford Law**: Detect fraud and anomalies in numerical data
84
84
  - **Pareto Principle**: Analyze 80/20 distributions and concentration
85
- - **Zipf's Law**: Analyze word frequencies and power-law distributions
85
+ - **Zipf Law**: Analyze word frequencies and power-law distributions
86
86
  - **Normal Distribution**: Test for normality and detect outliers
87
87
  - **Poisson Distribution**: Analyze rare events and count data
88
88
 
@@ -109,7 +109,7 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
109
109
  ```python
110
110
  import lawkit
111
111
 
112
- # Analyze with Benford's Law
112
+ # Analyze with Benford Law
113
113
  result = lawkit.analyze_benford('invoice_data.csv')
114
114
  print(result)
115
115
 
@@ -159,13 +159,27 @@ if normal_result.p_value < 0.05:
159
159
  if normal_result.outliers:
160
160
  print(f"Found {len(normal_result.outliers)} outliers")
161
161
 
162
- # Multi-law comparison
163
- comparison = lawkit.compare_laws(
162
+ # Multi-law analysis
163
+ analysis = lawkit.analyze_laws(
164
164
  'complex_dataset.csv',
165
- lawkit.LawkitOptions(output='json')
165
+ lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
166
166
  )
167
- print(f"Best fitting law: {comparison.data.get('best_law')}")
168
- print(f"Overall risk level: {comparison.risk_level}")
167
+ print(f"Analysis results: {analysis.data}")
168
+ print(f"Overall risk level: {analysis.risk_level}")
169
+
170
+ # Data validation
171
+ validation = lawkit.validate_laws(
172
+ 'complex_dataset.csv',
173
+ lawkit.LawkitOptions(format='json', consistency_check=True)
174
+ )
175
+ print(f"Validation status: {validation.data}")
176
+
177
+ # Conflict diagnosis
178
+ diagnosis = lawkit.diagnose_laws(
179
+ 'complex_dataset.csv',
180
+ lawkit.LawkitOptions(format='json', report='detailed')
181
+ )
182
+ print(f"Diagnosis: {diagnosis.data}")
169
183
  ```
170
184
 
171
185
  ### Generate Sample Data
@@ -173,7 +187,7 @@ print(f"Overall risk level: {comparison.risk_level}")
173
187
  ```python
174
188
  import lawkit
175
189
 
176
- # Generate Benford's Law compliant data
190
+ # Generate Benford Law compliant data
177
191
  benford_data = lawkit.generate_data('benf', samples=1000, seed=42)
178
192
  print(benford_data)
179
193
 
@@ -205,7 +219,7 @@ csv_data = """amount
205
219
  result = lawkit.analyze_string(
206
220
  csv_data,
207
221
  'benf',
208
- lawkit.LawkitOptions(format='csv', output='json')
222
+ lawkit.LawkitOptions(format='json')
209
223
  )
210
224
  print(f"Risk assessment: {result.risk_level}")
211
225
 
@@ -214,7 +228,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
214
228
  result = lawkit.analyze_string(
215
229
  json_data,
216
230
  'normal',
217
- lawkit.LawkitOptions(format='json', output='json')
231
+ lawkit.LawkitOptions(format='json')
218
232
  )
219
233
  print(f"Is normal: {result.p_value > 0.05}")
220
234
  ```
@@ -338,7 +352,7 @@ result = lawkit.analyze_benford('invoices.csv',
338
352
  if result.risk_level in ['High', 'Critical']:
339
353
  print("🚨 Potential fraud detected in invoice data")
340
354
  print(f"Statistical significance: p={result.p_value:.6f}")
341
- print(f"Deviation from Benford's Law: {result.mad:.2f}%")
355
+ print(f"Deviation from Benford Law: {result.mad:.2f}%")
342
356
  ```
343
357
 
344
358
  ### Business Intelligence
@@ -386,7 +400,7 @@ import lawkit
386
400
  result = lawkit.analyze_zipf('document.txt',
387
401
  lawkit.LawkitOptions(output='json'))
388
402
 
389
- print(f"Text follows Zipf's Law: {result.p_value > 0.05}")
403
+ print(f"Text follows Zipf Law: {result.p_value > 0.05}")
390
404
  print(f"Power law exponent: {result.exponent:.3f}")
391
405
  ```
392
406
 
@@ -394,12 +408,15 @@ print(f"Power law exponent: {result.exponent:.3f}")
394
408
 
395
409
  ### Main Functions
396
410
 
397
- - `analyze_benford(input_data, options)` - Benford's Law analysis
411
+ - `analyze_benford(input_data, options)` - Benford Law analysis
398
412
  - `analyze_pareto(input_data, options)` - Pareto principle analysis
399
- - `analyze_zipf(input_data, options)` - Zipf's Law analysis
413
+ - `analyze_zipf(input_data, options)` - Zipf Law analysis
400
414
  - `analyze_normal(input_data, options)` - Normal distribution analysis
401
415
  - `analyze_poisson(input_data, options)` - Poisson distribution analysis
402
- - `compare_laws(input_data, options)` - Multi-law comparison
416
+ - `analyze_laws(input_data, options)` - Multi-law analysis
417
+ - `validate_laws(input_data, options)` - Data validation and consistency check
418
+ - `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
419
+ - `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
403
420
  - `generate_data(law_type, samples, **kwargs)` - Generate sample data
404
421
  - `analyze_string(content, law_type, options)` - Analyze string data directly
405
422
 
@@ -15,14 +15,14 @@ This will automatically download the appropriate `lawkit` binary for your system
15
15
  ```python
16
16
  import lawkit
17
17
 
18
- # Analyze financial data with Benford's Law
18
+ # Analyze financial data with Benford Law
19
19
  result = lawkit.analyze_benford('financial_data.csv')
20
20
  print(result)
21
21
 
22
22
  # Get structured JSON output
23
23
  json_result = lawkit.analyze_benford(
24
24
  'accounting.csv',
25
- lawkit.LawkitOptions(format='csv', output='json')
25
+ lawkit.LawkitOptions(format='json')
26
26
  )
27
27
  print(f"Risk level: {json_result.risk_level}")
28
28
  print(f"P-value: {json_result.p_value}")
@@ -30,7 +30,7 @@ print(f"P-value: {json_result.p_value}")
30
30
  # Check if data follows Pareto principle (80/20 rule)
31
31
  pareto_result = lawkit.analyze_pareto(
32
32
  'sales_data.csv',
33
- lawkit.LawkitOptions(output='json', gini_coefficient=True)
33
+ lawkit.LawkitOptions(format='json', gini_coefficient=True)
34
34
  )
35
35
  print(f"Gini coefficient: {pareto_result.gini_coefficient}")
36
36
  print(f"80/20 concentration: {pareto_result.concentration_80_20}")
@@ -40,9 +40,9 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
40
40
 
41
41
  ### Statistical Laws Supported
42
42
 
43
- - **Benford's Law**: Detect fraud and anomalies in numerical data
43
+ - **Benford Law**: Detect fraud and anomalies in numerical data
44
44
  - **Pareto Principle**: Analyze 80/20 distributions and concentration
45
- - **Zipf's Law**: Analyze word frequencies and power-law distributions
45
+ - **Zipf Law**: Analyze word frequencies and power-law distributions
46
46
  - **Normal Distribution**: Test for normality and detect outliers
47
47
  - **Poisson Distribution**: Analyze rare events and count data
48
48
 
@@ -69,7 +69,7 @@ print(f"80/20 concentration: {pareto_result.concentration_80_20}")
69
69
  ```python
70
70
  import lawkit
71
71
 
72
- # Analyze with Benford's Law
72
+ # Analyze with Benford Law
73
73
  result = lawkit.analyze_benford('invoice_data.csv')
74
74
  print(result)
75
75
 
@@ -119,13 +119,27 @@ if normal_result.p_value < 0.05:
119
119
  if normal_result.outliers:
120
120
  print(f"Found {len(normal_result.outliers)} outliers")
121
121
 
122
- # Multi-law comparison
123
- comparison = lawkit.compare_laws(
122
+ # Multi-law analysis
123
+ analysis = lawkit.analyze_laws(
124
124
  'complex_dataset.csv',
125
- lawkit.LawkitOptions(output='json')
125
+ lawkit.LawkitOptions(format='json', laws='benf,pareto,zipf')
126
126
  )
127
- print(f"Best fitting law: {comparison.data.get('best_law')}")
128
- print(f"Overall risk level: {comparison.risk_level}")
127
+ print(f"Analysis results: {analysis.data}")
128
+ print(f"Overall risk level: {analysis.risk_level}")
129
+
130
+ # Data validation
131
+ validation = lawkit.validate_laws(
132
+ 'complex_dataset.csv',
133
+ lawkit.LawkitOptions(format='json', consistency_check=True)
134
+ )
135
+ print(f"Validation status: {validation.data}")
136
+
137
+ # Conflict diagnosis
138
+ diagnosis = lawkit.diagnose_laws(
139
+ 'complex_dataset.csv',
140
+ lawkit.LawkitOptions(format='json', report='detailed')
141
+ )
142
+ print(f"Diagnosis: {diagnosis.data}")
129
143
  ```
130
144
 
131
145
  ### Generate Sample Data
@@ -133,7 +147,7 @@ print(f"Overall risk level: {comparison.risk_level}")
133
147
  ```python
134
148
  import lawkit
135
149
 
136
- # Generate Benford's Law compliant data
150
+ # Generate Benford Law compliant data
137
151
  benford_data = lawkit.generate_data('benf', samples=1000, seed=42)
138
152
  print(benford_data)
139
153
 
@@ -165,7 +179,7 @@ csv_data = """amount
165
179
  result = lawkit.analyze_string(
166
180
  csv_data,
167
181
  'benf',
168
- lawkit.LawkitOptions(format='csv', output='json')
182
+ lawkit.LawkitOptions(format='json')
169
183
  )
170
184
  print(f"Risk assessment: {result.risk_level}")
171
185
 
@@ -174,7 +188,7 @@ json_data = '{"values": [12, 23, 34, 45, 56, 67, 78, 89]}'
174
188
  result = lawkit.analyze_string(
175
189
  json_data,
176
190
  'normal',
177
- lawkit.LawkitOptions(format='json', output='json')
191
+ lawkit.LawkitOptions(format='json')
178
192
  )
179
193
  print(f"Is normal: {result.p_value > 0.05}")
180
194
  ```
@@ -298,7 +312,7 @@ result = lawkit.analyze_benford('invoices.csv',
298
312
  if result.risk_level in ['High', 'Critical']:
299
313
  print("🚨 Potential fraud detected in invoice data")
300
314
  print(f"Statistical significance: p={result.p_value:.6f}")
301
- print(f"Deviation from Benford's Law: {result.mad:.2f}%")
315
+ print(f"Deviation from Benford Law: {result.mad:.2f}%")
302
316
  ```
303
317
 
304
318
  ### Business Intelligence
@@ -346,7 +360,7 @@ import lawkit
346
360
  result = lawkit.analyze_zipf('document.txt',
347
361
  lawkit.LawkitOptions(output='json'))
348
362
 
349
- print(f"Text follows Zipf's Law: {result.p_value > 0.05}")
363
+ print(f"Text follows Zipf Law: {result.p_value > 0.05}")
350
364
  print(f"Power law exponent: {result.exponent:.3f}")
351
365
  ```
352
366
 
@@ -354,12 +368,15 @@ print(f"Power law exponent: {result.exponent:.3f}")
354
368
 
355
369
  ### Main Functions
356
370
 
357
- - `analyze_benford(input_data, options)` - Benford's Law analysis
371
+ - `analyze_benford(input_data, options)` - Benford Law analysis
358
372
  - `analyze_pareto(input_data, options)` - Pareto principle analysis
359
- - `analyze_zipf(input_data, options)` - Zipf's Law analysis
373
+ - `analyze_zipf(input_data, options)` - Zipf Law analysis
360
374
  - `analyze_normal(input_data, options)` - Normal distribution analysis
361
375
  - `analyze_poisson(input_data, options)` - Poisson distribution analysis
362
- - `compare_laws(input_data, options)` - Multi-law comparison
376
+ - `analyze_laws(input_data, options)` - Multi-law analysis
377
+ - `validate_laws(input_data, options)` - Data validation and consistency check
378
+ - `diagnose_laws(input_data, options)` - Conflict diagnosis and detailed reporting
379
+ - `compare_laws(input_data, options)` - Alias for analyze_laws (backward compatibility)
363
380
  - `generate_data(law_type, samples, **kwargs)` - Generate sample data
364
381
  - `analyze_string(content, law_type, options)` - Analyze string data directly
365
382
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "lawkit-python"
7
- version = "2.1.0"
7
+ version = "2.1.2"
8
8
  description = "Python wrapper for lawkit - Statistical law analysis toolkit for fraud detection and data quality assessment"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -13,6 +13,9 @@ from .lawkit import (
13
13
  analyze_zipf,
14
14
  analyze_normal,
15
15
  analyze_poisson,
16
+ analyze_laws,
17
+ validate_laws,
18
+ diagnose_laws,
16
19
  compare_laws,
17
20
  generate_data,
18
21
  analyze_string,
@@ -38,6 +41,9 @@ __all__ = [
38
41
  "analyze_zipf",
39
42
  "analyze_normal",
40
43
  "analyze_poisson",
44
+ "analyze_laws",
45
+ "validate_laws",
46
+ "diagnose_laws",
41
47
  "compare_laws",
42
48
 
43
49
  # Utility functions
@@ -22,20 +22,44 @@ LawType = Literal["benf", "pareto", "zipf", "normal", "poisson"]
22
22
  class LawkitOptions:
23
23
  """Options for lawkit operations"""
24
24
  format: Optional[Format] = None
25
- output: Optional[OutputFormat] = None
26
- min_count: Optional[int] = None
27
- threshold: Optional[float] = None
28
- confidence: Optional[float] = None
25
+ quiet: bool = False
29
26
  verbose: bool = False
27
+ filter: Optional[str] = None
28
+ min_count: Optional[int] = None
30
29
  optimize: bool = False
31
- international: bool = False
30
+
31
+ # Integration-specific options
32
+ laws: Optional[str] = None # "benf,pareto,zipf,normal,poisson"
33
+ focus: Optional[str] = None # "quality", "concentration", "distribution", "anomaly"
34
+ threshold: Optional[float] = None # Conflict detection threshold
35
+ recommend: bool = False
36
+ report: Optional[str] = None # "summary", "detailed", "conflicting"
37
+ consistency_check: bool = False
38
+ cross_validation: bool = False
39
+ confidence_level: Optional[float] = None
40
+ purpose: Optional[str] = None # "quality", "fraud", "concentration", "anomaly", "distribution", "general"
41
+
32
42
  # Law-specific options
33
43
  gini_coefficient: bool = False
34
44
  percentiles: Optional[str] = None
35
45
  business_analysis: bool = False
46
+ concentration: Optional[float] = None
47
+
48
+ # Benford-specific options
49
+ threshold_level: Optional[str] = None # "low", "medium", "high", "critical", "auto"
50
+
51
+ # Generate-specific options
52
+ samples: Optional[int] = None
53
+ seed: Optional[int] = None
54
+ output_file: Optional[str] = None
55
+ fraud_rate: Optional[float] = None
56
+ range: Optional[str] = None # "1,100000"
57
+ scale: Optional[float] = None
58
+
36
59
  # Statistical options
37
60
  test_type: Optional[str] = None
38
61
  alpha: Optional[float] = None
62
+
39
63
  # Advanced options
40
64
  outlier_detection: bool = False
41
65
  time_series: bool = False
@@ -144,14 +168,17 @@ def _execute_lawkit(args: List[str]) -> tuple[str, str]:
144
168
  [lawkit_path] + args,
145
169
  capture_output=True,
146
170
  text=True,
147
- check=True
171
+ check=False # Don't raise exception on non-zero exit
148
172
  )
149
- return result.stdout, result.stderr
150
- except subprocess.CalledProcessError as e:
173
+
174
+ # Exit codes 10-19 are typically warnings, not fatal errors
175
+ if result.returncode == 0 or (result.returncode >= 10 and result.returncode <= 19):
176
+ return result.stdout, result.stderr
177
+
151
178
  raise LawkitError(
152
- f"lawkit exited with code {e.returncode}",
153
- e.returncode,
154
- e.stderr or ""
179
+ f"lawkit exited with code {result.returncode}",
180
+ result.returncode,
181
+ result.stderr or ""
155
182
  )
156
183
  except FileNotFoundError:
157
184
  raise LawkitError(
@@ -192,10 +219,14 @@ def analyze_benford(
192
219
  # Add common options
193
220
  _add_common_options(args, options)
194
221
 
222
+ # Add Benford-specific options
223
+ if options.threshold_level:
224
+ args.extend(["--threshold", options.threshold_level])
225
+
195
226
  stdout, stderr = _execute_lawkit(args)
196
227
 
197
228
  # If output format is JSON, parse the result
198
- if options.output == "json":
229
+ if options.format == "json":
199
230
  try:
200
231
  json_data = json.loads(stdout)
201
232
  return LawkitResult(json_data, "benford")
@@ -238,6 +269,9 @@ def analyze_pareto(
238
269
  _add_common_options(args, options)
239
270
 
240
271
  # Add Pareto-specific options
272
+ if options.concentration is not None:
273
+ args.extend(["--concentration", str(options.concentration)])
274
+
241
275
  if options.gini_coefficient:
242
276
  args.append("--gini-coefficient")
243
277
 
@@ -250,7 +284,7 @@ def analyze_pareto(
250
284
  stdout, stderr = _execute_lawkit(args)
251
285
 
252
286
  # If output format is JSON, parse the result
253
- if options.output == "json":
287
+ if options.format == "json":
254
288
  try:
255
289
  json_data = json.loads(stdout)
256
290
  return LawkitResult(json_data, "pareto")
@@ -294,7 +328,7 @@ def analyze_zipf(
294
328
  stdout, stderr = _execute_lawkit(args)
295
329
 
296
330
  # If output format is JSON, parse the result
297
- if options.output == "json":
331
+ if options.format == "json":
298
332
  try:
299
333
  json_data = json.loads(stdout)
300
334
  return LawkitResult(json_data, "zipf")
@@ -347,7 +381,7 @@ def analyze_normal(
347
381
  stdout, stderr = _execute_lawkit(args)
348
382
 
349
383
  # If output format is JSON, parse the result
350
- if options.output == "json":
384
+ if options.format == "json":
351
385
  try:
352
386
  json_data = json.loads(stdout)
353
387
  return LawkitResult(json_data, "normal")
@@ -391,7 +425,7 @@ def analyze_poisson(
391
425
  stdout, stderr = _execute_lawkit(args)
392
426
 
393
427
  # If output format is JSON, parse the result
394
- if options.output == "json":
428
+ if options.format == "json":
395
429
  try:
396
430
  json_data = json.loads(stdout)
397
431
  return LawkitResult(json_data, "poisson")
@@ -402,12 +436,12 @@ def analyze_poisson(
402
436
  return stdout
403
437
 
404
438
 
405
- def compare_laws(
439
+ def analyze_laws(
406
440
  input_data: str,
407
441
  options: Optional[LawkitOptions] = None
408
442
  ) -> Union[str, LawkitResult]:
409
443
  """
410
- Compare multiple statistical laws on the same data
444
+ Analyze data using multiple statistical laws (basic analysis)
411
445
 
412
446
  Args:
413
447
  input_data: Path to input file or '-' for stdin
@@ -417,17 +451,105 @@ def compare_laws(
417
451
  String output for text format, or LawkitResult for JSON format
418
452
 
419
453
  Examples:
420
- >>> result = compare_laws('dataset.csv')
454
+ >>> result = analyze_laws('dataset.csv')
421
455
  >>> print(result)
422
456
 
423
- >>> json_result = compare_laws('complex_data.json',
424
- ... LawkitOptions(output='json'))
457
+ >>> json_result = analyze_laws('complex_data.json',
458
+ ... LawkitOptions(format='json'))
425
459
  >>> print(f"Risk level: {json_result.risk_level}")
426
460
  """
427
461
  if options is None:
428
462
  options = LawkitOptions()
429
463
 
430
- args = ["compare", input_data]
464
+ args = ["analyze", input_data]
465
+
466
+ # Add common options
467
+ _add_common_options(args, options)
468
+
469
+ stdout, stderr = _execute_lawkit(args)
470
+
471
+ # If output format is JSON, parse the result
472
+ if options.format == "json":
473
+ try:
474
+ json_data = json.loads(stdout)
475
+ return LawkitResult(json_data, "analyze")
476
+ except json.JSONDecodeError as e:
477
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
478
+
479
+ # Return raw output for other formats
480
+ return stdout
481
+
482
+
483
+ def validate_laws(
484
+ input_data: str,
485
+ options: Optional[LawkitOptions] = None
486
+ ) -> Union[str, LawkitResult]:
487
+ """
488
+ Validate data consistency using multiple statistical laws
489
+
490
+ Args:
491
+ input_data: Path to input file or '-' for stdin
492
+ options: Analysis options
493
+
494
+ Returns:
495
+ String output for text format, or LawkitResult for JSON format
496
+
497
+ Examples:
498
+ >>> result = validate_laws('dataset.csv')
499
+ >>> print(result)
500
+
501
+ >>> json_result = validate_laws('complex_data.json',
502
+ ... LawkitOptions(format='json'))
503
+ >>> print(f"Validation result: {json_result.data}")
504
+ """
505
+ if options is None:
506
+ options = LawkitOptions()
507
+
508
+ args = ["validate", input_data]
509
+
510
+ # Add common options
511
+ _add_common_options(args, options)
512
+
513
+ stdout, stderr = _execute_lawkit(args)
514
+
515
+ # If output format is JSON, parse the result
516
+ if options.format == "json":
517
+ try:
518
+ json_data = json.loads(stdout)
519
+ return LawkitResult(json_data, "validate")
520
+ except json.JSONDecodeError as e:
521
+ raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
522
+
523
+ # Return raw output for other formats
524
+ return stdout
525
+
526
+
527
+ def diagnose_laws(
528
+ input_data: str,
529
+ options: Optional[LawkitOptions] = None
530
+ ) -> Union[str, LawkitResult]:
531
+ """
532
+ Diagnose conflicts and generate detailed analysis report
533
+
534
+ Args:
535
+ input_data: Path to input file or '-' for stdin
536
+ options: Analysis options
537
+
538
+ Returns:
539
+ String output for text format, or LawkitResult for JSON format
540
+
541
+ Examples:
542
+ >>> result = diagnose_laws('dataset.csv')
543
+ >>> print(result)
544
+
545
+ >>> json_result = diagnose_laws('complex_data.json',
546
+ ... LawkitOptions(format='json'))
547
+ >>> print(f"Diagnosis: {json_result.data}")
548
+ """
549
+ if options is None:
550
+ options = LawkitOptions()
551
+
552
+ args = ["diagnose", input_data]
431
553
 
432
554
  # Add common options
433
555
  _add_common_options(args, options)
@@ -435,10 +557,10 @@ def compare_laws(
435
557
  stdout, stderr = _execute_lawkit(args)
436
558
 
437
559
  # If output format is JSON, parse the result
438
- if options.output == "json":
560
+ if options.format == "json":
439
561
  try:
440
562
  json_data = json.loads(stdout)
441
- return LawkitResult(json_data, "compare")
563
+ return LawkitResult(json_data, "diagnose")
442
564
  except json.JSONDecodeError as e:
443
565
  raise LawkitError(f"Failed to parse JSON output: {e}", -1, "")
444
566
 
@@ -446,10 +568,13 @@ def compare_laws(
446
568
  return stdout
447
569
 
448
570
 
571
+ # Backward compatibility alias
572
+ compare_laws = analyze_laws
573
+
574
+
449
575
  def generate_data(
450
576
  law_type: LawType,
451
- samples: int = 1000,
452
- seed: Optional[int] = None,
577
+ options: Optional[LawkitOptions] = None,
453
578
  **kwargs
454
579
  ) -> str:
455
580
  """
@@ -457,26 +582,46 @@ def generate_data(
457
582
 
458
583
  Args:
459
584
  law_type: Type of statistical law to use
460
- samples: Number of samples to generate
461
- seed: Random seed for reproducibility
462
- **kwargs: Law-specific parameters
585
+ options: Generation options (samples, seed, etc.)
586
+ **kwargs: Law-specific parameters (deprecated, use options instead)
463
587
 
464
588
  Returns:
465
589
  Generated data as string
466
590
 
467
591
  Examples:
468
- >>> data = generate_data('benf', samples=1000, seed=42)
592
+ >>> data = generate_data('benf', LawkitOptions(samples=1000, seed=42))
469
593
  >>> print(data)
470
594
 
471
- >>> normal_data = generate_data('normal', samples=500, mean=100, stddev=15)
472
- >>> pareto_data = generate_data('pareto', samples=1000, concentration=0.8)
595
+ >>> options = LawkitOptions(samples=500, fraud_rate=0.1, range="1,10000")
596
+ >>> normal_data = generate_data('normal', options)
597
+ >>> pareto_data = generate_data('pareto', LawkitOptions(concentration=0.8))
473
598
  """
474
- args = ["generate", law_type, "--samples", str(samples)]
599
+ if options is None:
600
+ options = LawkitOptions()
475
601
 
476
- if seed is not None:
477
- args.extend(["--seed", str(seed)])
602
+ args = ["generate", law_type]
478
603
 
479
- # Add law-specific parameters
604
+ # Add common options
605
+ _add_common_options(args, options)
606
+
607
+ # Add generate-specific options
608
+ if options.samples is not None:
609
+ args.extend(["--samples", str(options.samples)])
610
+
611
+ if options.seed is not None:
612
+ args.extend(["--seed", str(options.seed)])
613
+
614
+ if options.output_file:
615
+ args.extend(["--output-file", options.output_file])
616
+
617
+ if options.fraud_rate is not None:
618
+ args.extend(["--fraud-rate", str(options.fraud_rate)])
619
+
620
+ # Note: --range option not available in current CLI
621
+
622
+ # Note: --scale option may not be available for all law types
623
+
624
+ # Add law-specific parameters (backward compatibility)
480
625
  for key, value in kwargs.items():
481
626
  key_formatted = key.replace("_", "-")
482
627
  args.extend([f"--{key_formatted}", str(value)])
@@ -539,29 +684,52 @@ def _add_common_options(args: List[str], options: LawkitOptions) -> None:
539
684
  if options.format:
540
685
  args.extend(["--format", options.format])
541
686
 
542
- if options.output:
543
- args.extend(["--output", options.output])
687
+ if options.quiet:
688
+ args.append("--quiet")
689
+
690
+ if options.verbose:
691
+ args.append("--verbose")
692
+
693
+ if options.filter:
694
+ args.extend(["--filter", options.filter])
544
695
 
545
696
  if options.min_count is not None:
546
697
  args.extend(["--min-count", str(options.min_count)])
547
698
 
699
+ if options.optimize:
700
+ args.append("--optimize")
701
+
702
+ # Integration-specific options
703
+ if options.laws:
704
+ args.extend(["--laws", options.laws])
705
+
706
+ if options.focus:
707
+ args.extend(["--focus", options.focus])
708
+
548
709
  if options.threshold is not None:
549
710
  args.extend(["--threshold", str(options.threshold)])
550
711
 
551
- if options.confidence is not None:
552
- args.extend(["--confidence", str(options.confidence)])
712
+ if options.recommend:
713
+ args.append("--recommend")
553
714
 
554
- if options.alpha is not None:
555
- args.extend(["--alpha", str(options.alpha)])
715
+ if options.report:
716
+ args.extend(["--report", options.report])
556
717
 
557
- if options.verbose:
558
- args.append("--verbose")
718
+ if options.consistency_check:
719
+ args.append("--consistency-check")
559
720
 
560
- if options.optimize:
561
- args.append("--optimize")
721
+ if options.cross_validation:
722
+ args.append("--cross-validation")
723
+
724
+ if options.confidence_level is not None:
725
+ args.extend(["--confidence-level", str(options.confidence_level)])
726
+
727
+ if options.purpose:
728
+ args.extend(["--purpose", options.purpose])
562
729
 
563
- if options.international:
564
- args.append("--international")
730
+ # Advanced options
731
+ if options.alpha is not None:
732
+ args.extend(["--alpha", str(options.alpha)])
565
733
 
566
734
  if options.time_series:
567
735
  args.append("--time-series")