elm-tool 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
elm/__init__.py ADDED
@@ -0,0 +1,48 @@
1
+ """
2
+ ELM Tool - Extract, Load and Mask Tool for Database Operations
3
+
4
+ This package provides tools for:
5
+ - Managing database environments
6
+ - Copying data between databases or files
7
+ - Masking sensitive data
8
+ - Generating test data
9
+
10
+ It can be used both as a command-line tool and as a Python library.
11
+ """
12
+
13
+ __version__ = "0.1.0"
14
+
15
+ # Import main API functions for easy access
16
+ from elm.api import (
17
+ # Environment Management
18
+ create_environment,
19
+ list_environments,
20
+ get_environment,
21
+ delete_environment,
22
+ test_environment,
23
+ execute_sql,
24
+
25
+ # Data Copy
26
+ copy_db_to_file,
27
+ copy_file_to_db,
28
+ copy_db_to_db,
29
+
30
+ # Data Masking
31
+ add_mask,
32
+ remove_mask,
33
+ list_masks,
34
+ test_mask,
35
+
36
+ # Data Generation
37
+ generate_data,
38
+ generate_and_save,
39
+
40
+ # Configuration Management
41
+ get_config,
42
+ set_config,
43
+ reset_config,
44
+ get_config_info
45
+ )
46
+
47
+ # For backward compatibility
48
+ from elm.elm import cli
elm/api.py ADDED
@@ -0,0 +1,569 @@
1
+ """
2
+ ELM Tool API - Programmatic interface for the ELM Tool
3
+
4
+ This module provides functions for programmatically using the ELM Tool
5
+ without going through the command-line interface.
6
+
7
+ All API functions now use the unified core modules to ensure consistency
8
+ with CLI commands and eliminate code duplication.
9
+ """
10
+
11
+ import pandas as pd
12
+ from typing import Dict, List, Union, Optional, Any
13
+
14
+ # Import core modules for unified business logic
15
+ from elm.core import environment as core_env
16
+ from elm.core import copy as core_copy
17
+ from elm.core import masking as core_mask
18
+ from elm.core import generation as core_gen
19
+ from elm.core import config as core_config
20
+ from elm.core.types import OperationResult
21
+
22
+ # Environment Management Functions
23
+
24
+ def create_environment(
25
+ name: str,
26
+ host: str,
27
+ port: int,
28
+ user: str,
29
+ password: str,
30
+ service: str,
31
+ db_type: str,
32
+ encrypt: bool = False,
33
+ encryption_key: Optional[str] = None,
34
+ overwrite: bool = False,
35
+ connection_type: Optional[str] = None
36
+ ) -> bool:
37
+ """
38
+ Create a new database environment.
39
+
40
+ Args:
41
+ name: Environment name
42
+ host: Database host
43
+ port: Database port
44
+ user: Database username
45
+ password: Database password
46
+ service: Database service name (or SID for Oracle)
47
+ db_type: Database type (ORACLE, MYSQL, MSSQL, POSTGRES)
48
+ encrypt: Whether to encrypt the environment
49
+ encryption_key: Encryption key (required if encrypt=True)
50
+ overwrite: Whether to overwrite if environment already exists
51
+ connection_type: Oracle connection type ('service_name' or 'sid'). Defaults to 'service_name'
52
+
53
+ Returns:
54
+ bool: True if successful, False otherwise
55
+ """
56
+ result = core_env.create_environment(
57
+ name=name,
58
+ host=host,
59
+ port=port,
60
+ user=user,
61
+ password=password,
62
+ service=service,
63
+ db_type=db_type,
64
+ encrypt=encrypt,
65
+ encryption_key=encryption_key,
66
+ overwrite=overwrite,
67
+ connection_type=connection_type
68
+ )
69
+ return result.success
70
+
71
+ def list_environments(show_all: bool = False) -> List[Dict[str, Any]]:
72
+ """
73
+ List all environments.
74
+
75
+ Args:
76
+ show_all: Whether to show all details (passwords will be masked)
77
+
78
+ Returns:
79
+ List of environment dictionaries
80
+ """
81
+ result = core_env.list_environments(show_all=show_all)
82
+ return result.data if result.success else []
83
+
84
+ def get_environment(name: str, encryption_key: Optional[str] = None) -> Optional[Dict[str, Any]]:
85
+ """
86
+ Get details of a specific environment.
87
+
88
+ Args:
89
+ name: Environment name
90
+ encryption_key: Encryption key for encrypted environments
91
+
92
+ Returns:
93
+ Environment details dictionary or None if not found
94
+ """
95
+ result = core_env.get_environment(name=name, encryption_key=encryption_key)
96
+ return result.data if result.success else None
97
+
98
+ def delete_environment(name: str) -> bool:
99
+ """
100
+ Delete an environment.
101
+
102
+ Args:
103
+ name: Environment name
104
+
105
+ Returns:
106
+ bool: True if successful, False otherwise
107
+ """
108
+ result = core_env.delete_environment(name=name)
109
+ return result.success
110
+
111
+ def test_environment(name: str, encryption_key: Optional[str] = None) -> Dict[str, Any]:
112
+ """
113
+ Test database connection for an environment.
114
+
115
+ Args:
116
+ name: Environment name
117
+ encryption_key: Encryption key for encrypted environments
118
+
119
+ Returns:
120
+ Dictionary with test results
121
+ """
122
+ result = core_env.test_environment(name=name, encryption_key=encryption_key)
123
+ return result.to_dict()
124
+
125
+ def execute_sql(
126
+ environment: str,
127
+ query: str,
128
+ encryption_key: Optional[str] = None,
129
+ params: Optional[Dict[str, Any]] = None
130
+ ) -> pd.DataFrame:
131
+ """
132
+ Execute SQL query on an environment.
133
+
134
+ Args:
135
+ environment: Environment name
136
+ query: SQL query to execute
137
+ encryption_key: Encryption key for encrypted environments
138
+ params: Query parameters
139
+
140
+ Returns:
141
+ DataFrame with query results
142
+ """
143
+ result = core_env.execute_sql(
144
+ environment=environment,
145
+ query=query,
146
+ encryption_key=encryption_key,
147
+ params=params
148
+ )
149
+ if result.success and result.data:
150
+ return pd.DataFrame(result.data)
151
+ else:
152
+ return pd.DataFrame()
153
+
154
+ def update_environment(
155
+ name: str,
156
+ host: Optional[str] = None,
157
+ port: Optional[int] = None,
158
+ user: Optional[str] = None,
159
+ password: Optional[str] = None,
160
+ service: Optional[str] = None,
161
+ db_type: Optional[str] = None,
162
+ encrypt: Optional[bool] = None,
163
+ encryption_key: Optional[str] = None
164
+ ) -> bool:
165
+ """
166
+ Update an existing environment.
167
+
168
+ Args:
169
+ name: Environment name
170
+ host: New database host
171
+ port: New database port
172
+ user: New database username
173
+ password: New database password
174
+ service: New database service name
175
+ db_type: New database type
176
+ encrypt: Whether to encrypt the environment
177
+ encryption_key: Encryption key (required if encrypt=True)
178
+
179
+ Returns:
180
+ bool: True if successful, False otherwise
181
+ """
182
+ result = core_env.update_environment(
183
+ name=name,
184
+ host=host,
185
+ port=port,
186
+ user=user,
187
+ password=password,
188
+ service=service,
189
+ db_type=db_type,
190
+ encrypt=encrypt,
191
+ encryption_key=encryption_key
192
+ )
193
+ return result.success
194
+
195
+ # Data Copy Functions
196
+
197
+ def copy_db_to_file(
198
+ source_env: str,
199
+ query: str,
200
+ file_path: str,
201
+ file_format: str = 'csv',
202
+ mode: str = 'REPLACE',
203
+ batch_size: Optional[int] = None,
204
+ parallel_workers: int = 1,
205
+ source_encryption_key: Optional[str] = None,
206
+ apply_masks: bool = True,
207
+ verbose_batch_logs: bool = True,
208
+ ) -> Dict[str, Any]:
209
+ """
210
+ Copy data from database to file.
211
+
212
+ Args:
213
+ source_env: Source environment name
214
+ query: SQL query to execute
215
+ file_path: Output file path
216
+ file_format: Output file format (csv, json)
217
+ mode: Write mode (REPLACE, APPEND)
218
+ batch_size: Batch size for processing large datasets
219
+ parallel_workers: Number of parallel workers
220
+ source_encryption_key: Encryption key for source environment
221
+ apply_masks: Whether to apply masking rules
222
+ verbose_batch_logs: Whether to log per-batch timings (overall summary is always logged)
223
+
224
+ Returns:
225
+ Dictionary with operation results
226
+ """
227
+ result = core_copy.copy_db_to_file(
228
+ source_env=source_env,
229
+ query=query,
230
+ file_path=file_path,
231
+ file_format=file_format,
232
+ mode=mode,
233
+ batch_size=batch_size,
234
+ parallel_workers=parallel_workers,
235
+ source_encryption_key=source_encryption_key,
236
+ apply_masks=apply_masks,
237
+ verbose_batch_logs=verbose_batch_logs,
238
+ )
239
+ return result.to_dict()
240
+
241
+ def copy_file_to_db(
242
+ file_path: str,
243
+ target_env: str,
244
+ table: str,
245
+ file_format: str = 'csv',
246
+ mode: str = 'APPEND',
247
+ batch_size: Optional[int] = 1000,
248
+ parallel_workers: int = 1,
249
+ target_encryption_key: Optional[str] = None,
250
+ validate_target: bool = False,
251
+ create_if_not_exists: bool = False,
252
+ apply_masks: bool = True,
253
+ verbose_batch_logs: bool = True,
254
+ ) -> Dict[str, Any]:
255
+ """
256
+ Copy data from file to database.
257
+
258
+ Args:
259
+ file_path: Input file path
260
+ target_env: Target environment name
261
+ table: Target table name
262
+ file_format: Input file format (csv, json)
263
+ mode: Write mode (APPEND, REPLACE, FAIL)
264
+ batch_size: Batch size for writing
265
+ parallel_workers: Number of parallel workers
266
+ target_encryption_key: Encryption key for target environment
267
+ validate_target: Whether to validate target table
268
+ create_if_not_exists: Whether to create target table if it doesn't exist
269
+ apply_masks: Whether to apply masking rules
270
+ verbose_batch_logs: Whether to log per-batch timings (overall summary is always logged)
271
+
272
+ Returns:
273
+ Dictionary with operation results
274
+ """
275
+ result = core_copy.copy_file_to_db(
276
+ file_path=file_path,
277
+ target_env=target_env,
278
+ table=table,
279
+ file_format=file_format,
280
+ mode=mode,
281
+ batch_size=batch_size,
282
+ parallel_workers=parallel_workers,
283
+ target_encryption_key=target_encryption_key,
284
+ validate_target=validate_target,
285
+ create_if_not_exists=create_if_not_exists,
286
+ apply_masks=apply_masks,
287
+ verbose_batch_logs=verbose_batch_logs,
288
+ )
289
+ return result.to_dict()
290
+
291
+ def copy_db_to_db(
292
+ source_env: str,
293
+ target_env: str,
294
+ query: str,
295
+ table: str,
296
+ mode: str = 'APPEND',
297
+ batch_size: Optional[int] = 1000,
298
+ parallel_workers: int = 1,
299
+ source_encryption_key: Optional[str] = None,
300
+ target_encryption_key: Optional[str] = None,
301
+ validate_target: bool = False,
302
+ create_if_not_exists: bool = False,
303
+ apply_masks: bool = True,
304
+ verbose_batch_logs: bool = True,
305
+ ) -> Dict[str, Any]:
306
+ """
307
+ Copy data from database to database.
308
+
309
+ Args:
310
+ source_env: Source environment name
311
+ target_env: Target environment name
312
+ query: SQL query to execute on source
313
+ table: Target table name
314
+ mode: Write mode (APPEND, REPLACE, FAIL)
315
+ batch_size: Batch size for writing
316
+ parallel_workers: Number of parallel workers
317
+ source_encryption_key: Encryption key for source environment
318
+ target_encryption_key: Encryption key for target environment
319
+ validate_target: Whether to validate target table
320
+ create_if_not_exists: Whether to create target table if it doesn't exist
321
+ apply_masks: Whether to apply masking rules
322
+ verbose_batch_logs: Whether to print per-batch timing logs (in addition to summary)
323
+
324
+ Returns:
325
+ Dictionary with operation results
326
+ """
327
+ result = core_copy.copy_db_to_db(
328
+ source_env=source_env,
329
+ target_env=target_env,
330
+ query=query,
331
+ table=table,
332
+ mode=mode,
333
+ batch_size=batch_size,
334
+ parallel_workers=parallel_workers,
335
+ source_encryption_key=source_encryption_key,
336
+ target_encryption_key=target_encryption_key,
337
+ validate_target=validate_target,
338
+ create_if_not_exists=create_if_not_exists,
339
+ apply_masks=apply_masks,
340
+ verbose_batch_logs=verbose_batch_logs,
341
+ )
342
+ return result.to_dict()
343
+
344
+ # Data Masking Functions
345
+
346
+ def add_mask(
347
+ column: str,
348
+ algorithm: str,
349
+ environment: Optional[str] = None,
350
+ length: Optional[int] = None,
351
+ params: Optional[Dict[str, Any]] = None
352
+ ) -> bool:
353
+ """
354
+ Add a masking rule for a column.
355
+
356
+ Args:
357
+ column: Column name
358
+ algorithm: Masking algorithm (star, star_length, random, nullify)
359
+ environment: Environment name (None for global)
360
+ length: Length parameter for algorithms that need it
361
+ params: Additional algorithm parameters
362
+
363
+ Returns:
364
+ bool: True if successful, False otherwise
365
+ """
366
+ result = core_mask.add_mask(
367
+ column=column,
368
+ algorithm=algorithm,
369
+ environment=environment,
370
+ length=length,
371
+ params=params
372
+ )
373
+ return result.success
374
+
375
+ def remove_mask(column: str, environment: Optional[str] = None) -> bool:
376
+ """
377
+ Remove a masking rule for a column.
378
+
379
+ Args:
380
+ column: Column name
381
+ environment: Environment name (None for global)
382
+
383
+ Returns:
384
+ bool: True if successful, False otherwise
385
+ """
386
+ result = core_mask.remove_mask(column=column, environment=environment)
387
+ return result.success
388
+
389
+ def list_masks(environment: Optional[str] = None) -> Dict[str, Any]:
390
+ """
391
+ List masking rules.
392
+
393
+ Args:
394
+ environment: Environment name (None for all)
395
+
396
+ Returns:
397
+ Dictionary with masking rules
398
+ """
399
+ result = core_mask.list_masks(environment=environment)
400
+ return result.data if result.success else {}
401
+
402
+ def test_mask(
403
+ column: str,
404
+ value: str,
405
+ environment: Optional[str] = None
406
+ ) -> Dict[str, Any]:
407
+ """
408
+ Test a masking rule on a value.
409
+
410
+ Args:
411
+ column: Column name
412
+ value: Value to mask
413
+ environment: Environment name
414
+
415
+ Returns:
416
+ Dictionary with original and masked values
417
+ """
418
+ result = core_mask.test_mask(column=column, value=value, environment=environment)
419
+ return result.data if result.success else {}
420
+
421
+ # Data Generation Functions
422
+
423
+ def generate_data(
424
+ num_records: int = 10,
425
+ columns: Optional[List[str]] = None,
426
+ environment: Optional[str] = None,
427
+ table: Optional[str] = None,
428
+ string_length: int = 10,
429
+ pattern: Optional[Dict[str, str]] = None,
430
+ min_number: float = 0,
431
+ max_number: float = 100,
432
+ decimal_places: int = 2,
433
+ start_date: Optional[str] = None,
434
+ end_date: Optional[str] = None,
435
+ date_format: str = '%Y-%m-%d'
436
+ ) -> pd.DataFrame:
437
+ """
438
+ Generate random data for testing.
439
+
440
+ Args:
441
+ num_records: Number of records to generate
442
+ columns: List of column names
443
+ environment: Environment name to get table schema from
444
+ table: Table name to get schema from
445
+ string_length: Default length for string values
446
+ pattern: Dictionary of column patterns
447
+ min_number: Minimum value for numeric columns
448
+ max_number: Maximum value for numeric columns
449
+ decimal_places: Number of decimal places for numeric columns
450
+ start_date: Start date for date columns
451
+ end_date: End date for date columns
452
+ date_format: Date format for date columns
453
+
454
+ Returns:
455
+ DataFrame with generated data
456
+ """
457
+ result = core_gen.generate_data(
458
+ num_records=num_records,
459
+ columns=columns,
460
+ environment=environment,
461
+ table=table,
462
+ string_length=string_length,
463
+ pattern=pattern,
464
+ min_number=min_number,
465
+ max_number=max_number,
466
+ decimal_places=decimal_places,
467
+ start_date=start_date,
468
+ end_date=end_date,
469
+ date_format=date_format
470
+ )
471
+
472
+ if result.success and result.data:
473
+ return pd.DataFrame(result.data)
474
+ else:
475
+ return pd.DataFrame()
476
+
477
+ def generate_and_save(
478
+ num_records: int = 10,
479
+ columns: Optional[List[str]] = None,
480
+ environment: Optional[str] = None,
481
+ table: Optional[str] = None,
482
+ output_file: Optional[str] = None,
483
+ file_format: str = 'csv',
484
+ write_to_db: bool = False,
485
+ mode: str = 'APPEND',
486
+ encryption_key: Optional[str] = None,
487
+ **kwargs
488
+ ) -> Dict[str, Any]:
489
+ """
490
+ Generate random data and save it to a file or database.
491
+
492
+ Args:
493
+ num_records: Number of records to generate
494
+ columns: List of column names
495
+ environment: Environment name to get table schema from
496
+ table: Table name to get schema from
497
+ output_file: Output file path
498
+ file_format: Output file format (csv, json)
499
+ write_to_db: Whether to write to database
500
+ mode: Write mode (APPEND, REPLACE, FAIL)
501
+ encryption_key: Encryption key for encrypted environments
502
+ **kwargs: Additional parameters for generate_data
503
+
504
+ Returns:
505
+ Dictionary with operation results
506
+ """
507
+ result = core_gen.generate_and_save(
508
+ num_records=num_records,
509
+ columns=columns,
510
+ environment=environment,
511
+ table=table,
512
+ output_file=output_file,
513
+ file_format=file_format,
514
+ write_to_db=write_to_db,
515
+ mode=mode,
516
+ encryption_key=encryption_key,
517
+ **kwargs
518
+ )
519
+ return result.to_dict()
520
+
521
+
522
+ # Configuration Management Functions
523
+
524
+ def get_config() -> Dict[str, Any]:
525
+ """
526
+ Get current configuration.
527
+
528
+ Returns:
529
+ Dictionary with current configuration values
530
+ """
531
+ result = core_config.get_config()
532
+ return result.data if result.success else {}
533
+
534
+
535
+ def set_config(key: str, value: Any) -> bool:
536
+ """
537
+ Set a configuration value.
538
+
539
+ Args:
540
+ key: Configuration key
541
+ value: Configuration value
542
+
543
+ Returns:
544
+ bool: True if successful, False otherwise
545
+ """
546
+ result = core_config.set_config(key, value)
547
+ return result.success
548
+
549
+
550
+ def reset_config() -> bool:
551
+ """
552
+ Reset configuration to defaults.
553
+
554
+ Returns:
555
+ bool: True if successful, False otherwise
556
+ """
557
+ result = core_config.reset_config()
558
+ return result.success
559
+
560
+
561
+ def get_config_info() -> Dict[str, Any]:
562
+ """
563
+ Get configuration information including file paths.
564
+
565
+ Returns:
566
+ Dictionary with configuration and path information
567
+ """
568
+ result = core_config.show_config_info()
569
+ return result.data if result.success else {}
elm/cli.py ADDED
@@ -0,0 +1,8 @@
1
+ from elm.elm import cli
2
+
3
+ def main():
4
+ """Entry point for the ELM tool."""
5
+ cli()
6
+
7
+ if __name__ == '__main__':
8
+ main()
elm/core/__init__.py ADDED
@@ -0,0 +1,48 @@
1
+ """
2
+ ELM Tool Core Module
3
+
4
+ This module contains the core business logic for the ELM Tool, providing
5
+ unified implementations that are used by both CLI commands and API functions.
6
+
7
+ The core module ensures consistency between different interfaces and eliminates
8
+ code duplication by centralizing business logic.
9
+
10
+ Modules:
11
+ environment: Environment management operations
12
+ copy: Data copy operations (db2file, file2db, db2db)
13
+ masking: Data masking operations
14
+ generation: Data generation operations
15
+ exceptions: Custom exception classes
16
+ types: Type definitions and data models
17
+ """
18
+
19
+ from elm.core.exceptions import (
20
+ ELMError,
21
+ EnvironmentError,
22
+ CopyError,
23
+ MaskingError,
24
+ GenerationError,
25
+ ValidationError
26
+ )
27
+
28
+ from elm.core.types import (
29
+ EnvironmentConfig,
30
+ CopyConfig,
31
+ MaskingConfig,
32
+ GenerationConfig,
33
+ OperationResult
34
+ )
35
+
36
+ __all__ = [
37
+ 'ELMError',
38
+ 'EnvironmentError',
39
+ 'CopyError',
40
+ 'MaskingError',
41
+ 'GenerationError',
42
+ 'ValidationError',
43
+ 'EnvironmentConfig',
44
+ 'CopyConfig',
45
+ 'MaskingConfig',
46
+ 'GenerationConfig',
47
+ 'OperationResult'
48
+ ]