gptmed 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,40 +1,77 @@
1
1
  """
2
- Text data preprocessing and cleaning module
2
+ Text data preprocessing and cleaning module - Modular Architecture
3
3
 
4
- Handles text normalization, cleaning, tokenization, and validation
4
+ This module provides strategy-based text preprocessing with support for:
5
+ - Text cleaning (whitespace, special characters, HTML, URLs, emails)
6
+ - Unicode normalization with multiple forms
7
+ - Case conversion (lowercase, uppercase, title, sentence)
8
+ - Punctuation handling (removal, spacing normalization)
9
+ - Stopword removal with customizable lists
10
+ - Tokenization (word and sentence level)
11
+ - Text statistics extraction
12
+ - PDF text extraction and CSV export
13
+ - Batch file processing
14
+
15
+ Architecture:
16
+ Each preprocessing feature is implemented as a separate strategy class following
17
+ the Strategy Design Pattern and SOLID principles. This allows:
18
+ - Independent composition of preprocessing steps
19
+ - Easy addition of new strategies without modifying existing code
20
+ - Single Responsibility: Each class has one reason to change
21
+ - Dependency Inversion: Code depends on TextPreprocessingStrategy interface
22
+
23
+ Usage:
24
+ from gptmed.data_preparation.text import TextCleaner, CaseNormalizer, StopwordRemover
25
+
26
+ # Use individual strategies
27
+ cleaner = TextCleaner()
28
+ normalizer = CaseNormalizer(mode='lower')
29
+ stopwords = StopwordRemover()
30
+
31
+ # Compose into pipeline
32
+ text = "Your text here..."
33
+ text = cleaner.process(text)
34
+ text = normalizer.process(text)
35
+ text = stopwords.process(text)
36
+
37
+ # Get statistics
38
+ stats = cleaner.get_stats()
5
39
  """
6
40
 
7
- import re
8
- import string
9
- import unicodedata
10
41
  import logging
11
42
  from typing import Any, Dict, List, Optional
12
43
  from pathlib import Path
13
- import json
14
44
 
15
45
  from ..base import BaseDataPreprocessor, PreprocessingConfig
16
46
 
47
+ # Import all strategy classes
48
+ from .base_strategy import TextPreprocessingStrategy
49
+ from .text_cleaner import TextCleaner
50
+ from .unicode_normalizer import UnicodeNormalizer
51
+ from .case_normalizer import CaseNormalizer
52
+ from .punctuation_handler import PunctuationHandler
53
+ from .stopword_remover import StopwordRemover
54
+ from .tokenizer import Tokenizer
55
+ from .text_statistics import TextStatistics
56
+ from .pdf_processor import PDFProcessor
57
+
17
58
 
18
59
  logger = logging.getLogger(__name__)
19
60
 
20
61
 
21
62
  class TextPreprocessor(BaseDataPreprocessor):
22
63
  """
23
- Text preprocessing with cleaning, normalization, and validation
64
+ Orchestrator for composing text preprocessing strategies
24
65
 
25
- Features:
26
- - Text cleaning (whitespace, special characters)
27
- - Case normalization
28
- - Unicode normalization
29
- - Stopword removal
30
- - Punctuation handling
31
- - Language detection
32
- - Sentiment preservation
66
+ This class provides a unified interface for text preprocessing by composing
67
+ individual strategy classes. It maintains backward compatibility with the
68
+ previous monolithic implementation while enabling flexible strategy composition.
33
69
  """
34
70
 
35
71
  def __init__(
36
72
  self,
37
73
  config: Optional[PreprocessingConfig] = None,
74
+ strategies: Optional[List[TextPreprocessingStrategy]] = None,
38
75
  remove_stopwords: bool = False,
39
76
  remove_punctuation: bool = False,
40
77
  lowercase: bool = True,
@@ -46,11 +83,12 @@ class TextPreprocessor(BaseDataPreprocessor):
46
83
 
47
84
  Args:
48
85
  config: PreprocessingConfig instance
49
- remove_stopwords: Whether to remove common stopwords
50
- remove_punctuation: Whether to remove punctuation
51
- lowercase: Whether to convert to lowercase
52
- min_length: Minimum text length to keep
53
- max_length: Maximum text length (None for unlimited)
86
+ strategies: List of strategy instances to use in order
87
+ remove_stopwords: Whether to remove common stopwords (default False)
88
+ remove_punctuation: Whether to remove punctuation (default False)
89
+ lowercase: Whether to convert to lowercase (default True)
90
+ min_length: Minimum text length to keep (default 3)
91
+ max_length: Maximum text length, None for unlimited (default None)
54
92
  """
55
93
  if config is None:
56
94
  config = PreprocessingConfig(
@@ -61,26 +99,48 @@ class TextPreprocessor(BaseDataPreprocessor):
61
99
 
62
100
  super().__init__(config)
63
101
 
64
- self.remove_stopwords = remove_stopwords
65
- self.remove_punctuation = remove_punctuation
66
- self.lowercase = lowercase
67
102
  self.min_length = min_length
68
103
  self.max_length = max_length
69
104
 
70
- # Load stopwords
71
- self.stopwords = self._load_stopwords() if remove_stopwords else set()
105
+ # Initialize strategies
106
+ if strategies is None:
107
+ strategies = self._create_default_pipeline(
108
+ remove_stopwords=remove_stopwords,
109
+ remove_punctuation=remove_punctuation,
110
+ lowercase=lowercase,
111
+ )
112
+
113
+ self.strategies = strategies
114
+
115
+ # Initialize individual strategies for backward compatibility
116
+ self.text_cleaner = TextCleaner()
117
+ self.unicode_normalizer = UnicodeNormalizer()
118
+ self.tokenizer = Tokenizer()
119
+ self.text_stats = TextStatistics()
120
+ self.pdf_processor = PDFProcessor()
72
121
 
73
- def _load_stopwords(self) -> set:
74
- """Load common English stopwords"""
75
- # Basic English stopwords
76
- stopwords = {
77
- 'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have',
78
- 'i', 'it', 'for', 'not', 'on', 'with', 'he', 'as', 'you',
79
- 'do', 'at', 'this', 'but', 'his', 'by', 'from', 'is', 'was',
80
- 'are', 'been', 'were', 'or', 'an', 'which', 'their', 'what',
81
- 'so', 'up', 'out', 'if', 'about', 'who', 'get', 'them', 'me',
82
- }
83
- return stopwords
122
+ def _create_default_pipeline(
123
+ self,
124
+ remove_stopwords: bool = False,
125
+ remove_punctuation: bool = False,
126
+ lowercase: bool = True,
127
+ ) -> List[TextPreprocessingStrategy]:
128
+ """Create default preprocessing pipeline"""
129
+ pipeline = [
130
+ TextCleaner(),
131
+ UnicodeNormalizer(),
132
+ ]
133
+
134
+ if lowercase:
135
+ pipeline.append(CaseNormalizer(mode='lower'))
136
+
137
+ if remove_punctuation:
138
+ pipeline.append(PunctuationHandler(remove=True))
139
+
140
+ if remove_stopwords:
141
+ pipeline.append(StopwordRemover())
142
+
143
+ return pipeline
84
144
 
85
145
  def validate(self, data: Any) -> bool:
86
146
  """
@@ -106,73 +166,72 @@ class TextPreprocessor(BaseDataPreprocessor):
106
166
 
107
167
  return True
108
168
 
109
- def clean(self, text: str) -> str:
169
+ def clean(self, data: Any) -> Optional[str]:
110
170
  """
111
- Clean text by removing artifacts and normalizing
171
+ Clean text data (remove HTML, URLs, emails, etc.)
112
172
 
113
173
  Args:
114
- text: Raw text
174
+ data: Input text
115
175
 
116
176
  Returns:
117
177
  Cleaned text
118
178
  """
119
- # Remove leading/trailing whitespace
120
- text = text.strip()
179
+ if not self.validate(data):
180
+ return None
121
181
 
122
- # Normalize unicode (NFD - decomposed form)
123
- text = unicodedata.normalize('NFD', text)
124
- text = ''.join(ch for ch in text if unicodedata.category(ch) != 'Mn')
125
-
126
- # Remove HTML tags
127
- text = re.sub(r'<[^>]+>', '', text)
128
-
129
- # Remove URLs
130
- text = re.sub(r'http[s]?://\S+', '', text)
131
- text = re.sub(r'www\.\S+', '', text)
132
-
133
- # Remove email addresses
134
- text = re.sub(r'\S+@\S+', '', text)
182
+ text = data
183
+ # Apply TextCleaner from strategies
184
+ cleaner = TextCleaner()
185
+ text = cleaner.process(text)
186
+ return text
187
+
188
+ def normalize(self, data: Any) -> Optional[str]:
189
+ """
190
+ Normalize text (unicode, case, punctuation, stopwords)
135
191
 
136
- # Remove extra whitespace
137
- text = re.sub(r'\s+', ' ', text)
192
+ Args:
193
+ data: Input text (may be already cleaned)
194
+
195
+ Returns:
196
+ Normalized text
197
+ """
198
+ if not isinstance(data, str):
199
+ return None
138
200
 
139
- # Remove common control characters
140
- text = ''.join(ch for ch in text if ch.isprintable() or ch.isspace())
201
+ text = data
202
+ # Apply remaining strategies except TextCleaner
203
+ for strategy in self.strategies:
204
+ if not isinstance(strategy, TextCleaner):
205
+ text = strategy.process(text)
206
+ if not text:
207
+ return None
141
208
 
142
- return text.strip()
209
+ return text.strip() if text else None
143
210
 
144
- def normalize(self, text: str) -> str:
211
+ def process(self, data: Any) -> Optional[str]:
145
212
  """
146
- Normalize text
213
+ Process text through the pipeline of strategies
147
214
 
148
215
  Args:
149
- text: Cleaned text
216
+ data: Input text
150
217
 
151
218
  Returns:
152
- Normalized text
219
+ Processed text or None if validation fails
153
220
  """
154
- # Convert to lowercase if specified
155
- if self.lowercase:
156
- text = text.lower()
157
-
158
- # Optional punctuation removal
159
- if self.remove_punctuation:
160
- text = text.translate(str.maketrans('', '', string.punctuation))
161
- else:
162
- # Just normalize spacing around punctuation
163
- text = re.sub(r'\s+([.!?,;:])', r'\1', text)
221
+ if not self.validate(data):
222
+ return None
164
223
 
165
- # Remove stopwords if specified
166
- if self.remove_stopwords:
167
- words = text.split()
168
- words = [w for w in words if w not in self.stopwords]
169
- text = ' '.join(words)
224
+ text = data
225
+ for strategy in self.strategies:
226
+ text = strategy.process(text)
227
+ if not text:
228
+ return None
170
229
 
171
- return text.strip()
230
+ return text.strip() if text else None
172
231
 
173
232
  def tokenize(self, text: str) -> List[str]:
174
233
  """
175
- Simple word tokenization
234
+ Tokenize text into words
176
235
 
177
236
  Args:
178
237
  text: Text to tokenize
@@ -180,16 +239,15 @@ class TextPreprocessor(BaseDataPreprocessor):
180
239
  Returns:
181
240
  List of tokens
182
241
  """
183
- # Process text first
184
242
  processed = self.process(text)
185
243
  if processed is None:
186
244
  return []
187
245
 
188
- return processed.split()
246
+ return self.tokenizer.tokenize(processed)
189
247
 
190
248
  def get_text_stats(self, text: str) -> Dict[str, Any]:
191
249
  """
192
- Get statistics about the text
250
+ Get statistics about text (before processing)
193
251
 
194
252
  Args:
195
253
  text: Input text
@@ -197,23 +255,7 @@ class TextPreprocessor(BaseDataPreprocessor):
197
255
  Returns:
198
256
  Dictionary with text statistics
199
257
  """
200
- processed = self.process(text)
201
- if processed is None:
202
- return {}
203
-
204
- words = processed.split()
205
- sentences = re.split(r'[.!?]+', processed)
206
- sentences = [s.strip() for s in sentences if s.strip()]
207
-
208
- return {
209
- 'original_length': len(text),
210
- 'cleaned_length': len(processed),
211
- 'word_count': len(words),
212
- 'sentence_count': len(sentences),
213
- 'avg_word_length': sum(len(w) for w in words) / len(words) if words else 0,
214
- 'unique_words': len(set(words)),
215
- 'vocabulary_diversity': len(set(words)) / len(words) if words else 0,
216
- }
258
+ return self.text_stats.get_text_stats(text)
217
259
 
218
260
  def batch_process_files(
219
261
  self,
@@ -266,3 +308,51 @@ class TextPreprocessor(BaseDataPreprocessor):
266
308
 
267
309
  self.logger.info(f"Processed {len(results)} files")
268
310
  return {'results': results, 'stats': self.get_statistics()}
311
+
312
+ # PDF-related methods delegated to PDFProcessor
313
+ def extract_text_from_pdf(self, pdf_path: str) -> Optional[str]:
314
+ """Extract text from a single PDF file"""
315
+ try:
316
+ return self.pdf_processor.extract_text_from_pdf(pdf_path)
317
+ except Exception as e:
318
+ self.logger.error(f"Error extracting PDF: {str(e)}")
319
+ return None
320
+
321
+ def batch_process_pdfs(self, input_dir: str, output_dir: Optional[str] = None) -> Dict[str, Any]:
322
+ """Process multiple PDF files from a directory"""
323
+ output_dir = output_dir or self.config.output_path
324
+ return self.pdf_processor.batch_process_pdfs(input_dir, output_dir)
325
+
326
+ def export_to_csv(
327
+ self,
328
+ pdf_dir: str,
329
+ output_csv: str,
330
+ process_text: bool = True,
331
+ ) -> Dict[str, Any]:
332
+ """Export PDF text to CSV with filename and text content columns"""
333
+ return self.pdf_processor.export_to_csv(pdf_dir, output_csv)
334
+
335
+ def export_to_csv_detailed(
336
+ self,
337
+ pdf_dir: str,
338
+ output_csv: str,
339
+ process_text: bool = True,
340
+ include_stats: bool = True,
341
+ ) -> Dict[str, Any]:
342
+ """Export PDF text to CSV with additional statistics"""
343
+ return self.pdf_processor.export_to_csv_detailed(pdf_dir, output_csv)
344
+
345
+
346
+ # Export all strategy classes for direct import
347
+ __all__ = [
348
+ 'TextPreprocessor',
349
+ 'TextPreprocessingStrategy',
350
+ 'TextCleaner',
351
+ 'UnicodeNormalizer',
352
+ 'CaseNormalizer',
353
+ 'PunctuationHandler',
354
+ 'StopwordRemover',
355
+ 'Tokenizer',
356
+ 'TextStatistics',
357
+ 'PDFProcessor',
358
+ ]
@@ -0,0 +1,28 @@
1
+ """
2
+ Base strategy interface for text preprocessing
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any, Dict
7
+
8
+
9
+ class TextPreprocessingStrategy(ABC):
10
+ """Abstract base class for all text preprocessing strategies"""
11
+
12
+ @abstractmethod
13
+ def process(self, text: str) -> str:
14
+ """
15
+ Process the text according to the strategy
16
+
17
+ Args:
18
+ text: Input text to process
19
+
20
+ Returns:
21
+ Processed text
22
+ """
23
+ pass
24
+
25
+ @abstractmethod
26
+ def get_stats(self) -> Dict[str, Any]:
27
+ """Get statistics about the processing"""
28
+ pass
@@ -0,0 +1,274 @@
1
+ """
2
+ Batch PDF Processing (Intermediate Step)
3
+
4
+ Parallelly processes all PDFs from a directory and extracts text.
5
+ Note: Does not save files - text is processed in-memory for preprocessing pipeline.
6
+
7
+ Usage:
8
+ python3 batch_pdf_to_jsonl.py [--input-dir ./pdfs] [--output-dir ./output] [--workers 4]
9
+ """
10
+
11
+ import sys
12
+ import json
13
+ import logging
14
+ import time
15
+ from pathlib import Path
16
+ from typing import Dict, List, Optional, Any
17
+ from concurrent.futures import ThreadPoolExecutor, as_completed
18
+ from dataclasses import dataclass, asdict
19
+
20
+ # Adjust path if running from workspace
21
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
22
+
23
+ from gptmed.data_preparation.text import PDFProcessor
24
+
25
+
26
+ # Configure logging
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ @dataclass
35
+ class PDFRecord:
36
+ """Data class for a single PDF record"""
37
+ filename: str
38
+ text: str
39
+ word_count: int
40
+ char_count: int
41
+ sentence_count: int
42
+ extraction_time: float
43
+ status: str
44
+
45
+
46
+ class PDFBatchProcessor:
47
+ """Process multiple PDFs in parallel and export to JSONL"""
48
+
49
+ def __init__(
50
+ self,
51
+ input_dir: str = "./pdfs",
52
+ output_dir: str = "./output",
53
+ max_workers: int = 4,
54
+ ):
55
+ """
56
+ Initialize batch processor
57
+
58
+ Args:
59
+ input_dir: Directory containing PDF files
60
+ output_dir: Directory for output JSONL files
61
+ max_workers: Number of parallel workers
62
+ """
63
+ self.input_dir = Path(input_dir)
64
+ self.output_dir = Path(output_dir)
65
+ self.max_workers = max_workers
66
+
67
+ # Create directories if they don't exist
68
+ self.input_dir.mkdir(parents=True, exist_ok=True)
69
+ self.output_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ # Initialize PDF processor
72
+ self.pdf_processor = PDFProcessor(max_workers=max_workers, use_threading=True)
73
+
74
+ self.logger = logging.getLogger(self.__class__.__name__)
75
+
76
+ def _extract_pdf(self, pdf_file: Path) -> Optional[PDFRecord]:
77
+ """
78
+ Extract text from a single PDF file
79
+
80
+ Args:
81
+ pdf_file: Path to PDF file
82
+
83
+ Returns:
84
+ PDFRecord with extracted information or None if failed
85
+ """
86
+ start_time = time.time()
87
+
88
+ try:
89
+ text = self.pdf_processor.extract_text_from_pdf(str(pdf_file))
90
+
91
+ # Calculate statistics
92
+ word_count = len(text.split())
93
+ char_count = len(text)
94
+ sentence_count = len([s for s in text.split('.') if s.strip()])
95
+ extraction_time = time.time() - start_time
96
+
97
+ record = PDFRecord(
98
+ filename=pdf_file.name,
99
+ text=text,
100
+ word_count=word_count,
101
+ char_count=char_count,
102
+ sentence_count=sentence_count,
103
+ extraction_time=extraction_time,
104
+ status="success"
105
+ )
106
+
107
+ self.logger.info(
108
+ f"✓ Extracted: {pdf_file.name} "
109
+ f"({word_count} words, {char_count} chars) in {extraction_time:.2f}s"
110
+ )
111
+
112
+ return record
113
+
114
+ except Exception as e:
115
+ self.logger.error(f"✗ Failed to extract {pdf_file.name}: {str(e)}")
116
+ return PDFRecord(
117
+ filename=pdf_file.name,
118
+ text="",
119
+ word_count=0,
120
+ char_count=0,
121
+ sentence_count=0,
122
+ extraction_time=time.time() - start_time,
123
+ status=f"error: {str(e)}"
124
+ )
125
+
126
+ def _process_pdfs_parallel(self) -> List[PDFRecord]:
127
+ """
128
+ Process all PDFs in parallel
129
+
130
+ Returns:
131
+ List of PDFRecord objects
132
+ """
133
+ # Find all PDF files
134
+ pdf_files = sorted(self.input_dir.glob('*.pdf'))
135
+
136
+ if not pdf_files:
137
+ self.logger.warning(f"No PDF files found in {self.input_dir}")
138
+ return []
139
+
140
+ self.logger.info(
141
+ f"Processing {len(pdf_files)} PDF files with {self.max_workers} workers..."
142
+ )
143
+
144
+ records = []
145
+
146
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
147
+ # Submit all PDF extraction tasks
148
+ future_to_file = {
149
+ executor.submit(self._extract_pdf, pdf_file): pdf_file
150
+ for pdf_file in pdf_files
151
+ }
152
+
153
+ # Process completed tasks as they finish
154
+ for i, future in enumerate(as_completed(future_to_file), 1):
155
+ try:
156
+ record = future.result()
157
+ if record:
158
+ records.append(record)
159
+ except Exception as e:
160
+ self.logger.error(f"Error processing future: {str(e)}")
161
+
162
+ return records
163
+
164
+
165
+
166
+ def process(self) -> Dict[str, Any]:
167
+ """
168
+ Main processing pipeline
169
+
170
+ Returns:
171
+ Dictionary with processing results and statistics
172
+ """
173
+ start_time = time.time()
174
+
175
+ self.logger.info(f"Input directory: {self.input_dir}")
176
+ self.logger.info(f"Output directory: {self.output_dir}")
177
+
178
+ # Step 1: Process all PDFs in parallel
179
+ records = self._process_pdfs_parallel()
180
+
181
+ if not records:
182
+ self.logger.warning("No records to process")
183
+ return {
184
+ 'status': 'failure',
185
+ 'message': 'No PDFs were successfully processed',
186
+ 'total_pdfs': 0,
187
+ 'successful_extractions': 0,
188
+ 'failed_extractions': 0,
189
+ 'individual_jsonl_files': 0,
190
+ 'combined_jsonl_created': False,
191
+ 'total_time': 0,
192
+ }
193
+
194
+ # Count successes and failures
195
+ successful = [r for r in records if r.status == "success"]
196
+ failed = [r for r in records if r.status != "success"]
197
+
198
+ self.logger.info(f"\nExtraction Results:")
199
+ self.logger.info(f" ✓ Successful: {len(successful)}/{len(records)}")
200
+ self.logger.info(f" ✗ Failed: {len(failed)}/{len(records)}")
201
+
202
+ total_time = time.time() - start_time
203
+
204
+ # Calculate statistics
205
+ total_words = sum(r.word_count for r in successful)
206
+ total_chars = sum(r.char_count for r in successful)
207
+
208
+ # Print summary
209
+ self.logger.info(f"\n" + "="*60)
210
+ self.logger.info(f"Processing Summary")
211
+ self.logger.info(f"="*60)
212
+ self.logger.info(f"Total PDFs: {len(records)}")
213
+ self.logger.info(f"Successfully processed: {len(successful)}")
214
+ self.logger.info(f"Failed: {len(failed)}")
215
+ self.logger.info(f"Total words extracted: {total_words:,}")
216
+ self.logger.info(f"Total characters: {total_chars:,}")
217
+ self.logger.info(f"Note: Records processed in-memory for preprocessing")
218
+ self.logger.info(f"Total processing time: {total_time:.2f}s")
219
+ self.logger.info(f"="*60)
220
+
221
+ return {
222
+ 'status': 'success',
223
+ 'records': records,
224
+ 'total_pdfs': len(records),
225
+ 'successful_extractions': len(successful),
226
+ 'failed_extractions': len(failed),
227
+ 'total_words': total_words,
228
+ 'total_characters': total_chars,
229
+ 'total_time': total_time,
230
+ }
231
+
232
+
233
+ def main():
234
+ """Main entry point"""
235
+ import argparse
236
+
237
+ parser = argparse.ArgumentParser(
238
+ description='Batch process PDFs and export to JSONL format'
239
+ )
240
+ parser.add_argument(
241
+ '--input-dir',
242
+ default='./pdfs',
243
+ help='Input directory containing PDFs (default: ./pdfs)'
244
+ )
245
+ parser.add_argument(
246
+ '--output-dir',
247
+ default='./output',
248
+ help='Output directory for JSONL files (default: ./output)'
249
+ )
250
+ parser.add_argument(
251
+ '--workers',
252
+ type=int,
253
+ default=4,
254
+ help='Number of parallel workers (default: 4)'
255
+ )
256
+
257
+ args = parser.parse_args()
258
+
259
+ # Create and run processor
260
+ processor = PDFBatchProcessor(
261
+ input_dir=args.input_dir,
262
+ output_dir=args.output_dir,
263
+ max_workers=args.workers,
264
+ )
265
+
266
+ result = processor.process()
267
+
268
+ # Return exit code
269
+ return 0 if result['status'] == 'success' else 1
270
+
271
+
272
+ if __name__ == '__main__':
273
+ exit_code = main()
274
+ sys.exit(exit_code)