rust-crate-pipeline 1.2.5__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. rust_crate_pipeline/__init__.py +25 -25
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +309 -200
  4. rust_crate_pipeline/analysis.py +304 -368
  5. rust_crate_pipeline/azure_ai_processing.py +453 -0
  6. rust_crate_pipeline/config.py +57 -19
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +42 -36
  14. rust_crate_pipeline/main.py +386 -102
  15. rust_crate_pipeline/network.py +153 -133
  16. rust_crate_pipeline/pipeline.py +340 -264
  17. rust_crate_pipeline/production_config.py +35 -32
  18. rust_crate_pipeline/scraping/__init__.py +13 -0
  19. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  20. rust_crate_pipeline/unified_llm_processor.py +637 -0
  21. rust_crate_pipeline/unified_pipeline.py +548 -0
  22. rust_crate_pipeline/utils/file_utils.py +45 -14
  23. rust_crate_pipeline/utils/logging_utils.py +34 -17
  24. rust_crate_pipeline/version.py +47 -2
  25. rust_crate_pipeline-1.3.0.dist-info/METADATA +331 -0
  26. rust_crate_pipeline-1.3.0.dist-info/RECORD +30 -0
  27. rust_crate_pipeline-1.2.5.dist-info/METADATA +0 -573
  28. rust_crate_pipeline-1.2.5.dist-info/RECORD +0 -19
  29. {rust_crate_pipeline-1.2.5.dist-info → rust_crate_pipeline-1.3.0.dist-info}/WHEEL +0 -0
  30. {rust_crate_pipeline-1.2.5.dist-info → rust_crate_pipeline-1.3.0.dist-info}/entry_points.txt +0 -0
  31. {rust_crate_pipeline-1.2.5.dist-info → rust_crate_pipeline-1.3.0.dist-info}/licenses/LICENSE +0 -0
  32. {rust_crate_pipeline-1.2.5.dist-info → rust_crate_pipeline-1.3.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,36 @@
1
1
  # main.py
2
- import os
3
2
  import sys
4
3
  import time
5
4
  import logging
6
5
  import shutil
7
6
  import argparse
8
- from typing import Optional
7
+ import os
8
+ import subprocess
9
+ from typing import Any, TYPE_CHECKING
10
+
9
11
  from .config import PipelineConfig
10
12
  from .pipeline import CrateDataPipeline
11
13
  from .production_config import setup_production_environment
12
14
  from .github_token_checker import check_and_setup_github_token
13
15
 
14
- def parse_arguments():
16
+ # Optional Sigil import with fallback
17
+ _sigil_available = True
18
+ SigilCompliantPipeline = None
19
+
20
+ try:
21
+ sys.path.append(".") # Add current directory to path
22
+ from sigil_enhanced_pipeline import SigilCompliantPipeline
23
+
24
+ _sigil_available = True
25
+ except ImportError:
26
+ _sigil_available = False
27
+ if TYPE_CHECKING:
28
+ from sigil_enhanced_pipeline import SigilCompliantPipeline
29
+ else:
30
+ SigilCompliantPipeline = None # type: ignore[assignment,misc]
31
+
32
+
33
+ def parse_arguments() -> argparse.Namespace:
15
34
  """Parse command line arguments"""
16
35
  parser = argparse.ArgumentParser(
17
36
  description="Rust Crate Data Processing Pipeline",
@@ -24,211 +43,476 @@ Examples:
24
43
  python -m rust_crate_pipeline --output-dir ./data # Custom output directory
25
44
  python -m rust_crate_pipeline --log-level DEBUG # Verbose logging
26
45
  PRODUCTION=true python -m rust_crate_pipeline # Production mode (quieter)
27
- """
46
+ """,
28
47
  )
29
-
48
+
30
49
  parser.add_argument(
31
- '--limit', '-l',
50
+ "--limit",
51
+ "-l",
32
52
  type=int,
33
53
  default=None,
34
- help='Limit the number of crates to process (default: process all)'
54
+ help="Limit the number of crates to process (default: process all)",
35
55
  )
36
-
56
+
37
57
  parser.add_argument(
38
- '--batch-size', '-b',
58
+ "--batch-size",
59
+ "-b",
39
60
  type=int,
40
61
  default=10,
41
- help='Number of crates to process in each batch (default: 10)'
62
+ help="Number of crates to process in each batch (default: 10)",
42
63
  )
43
-
64
+
44
65
  parser.add_argument(
45
- '--workers', '-w',
66
+ "--workers",
67
+ "-w",
46
68
  type=int,
47
69
  default=4,
48
- help='Number of parallel workers for API requests (default: 4)'
70
+ help="Number of parallel workers for API requests (default: 4)",
49
71
  )
50
-
72
+
51
73
  parser.add_argument(
52
- '--output-dir', '-o',
74
+ "--output-dir",
75
+ "-o",
53
76
  type=str,
54
77
  default=None,
55
- help='Output directory for results (default: auto-generated timestamped directory)'
78
+ help=(
79
+ "Output directory for results (default: auto-generated timestamped "
80
+ "directory)"
81
+ ),
56
82
  )
57
-
83
+
58
84
  parser.add_argument(
59
- '--model-path', '-m',
85
+ "--model-path",
86
+ "-m",
60
87
  type=str,
61
88
  default=None,
62
- help='Path to the LLM model file (default: ~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf)'
89
+ help=(
90
+ "Path to the LLM model file (default: ~/models/deepseek/deepseek-coder-"
91
+ "6.7b-instruct.Q4_K_M.gguf)"
92
+ ),
63
93
  )
64
-
94
+
65
95
  parser.add_argument(
66
- '--max-tokens',
96
+ "--max-tokens",
67
97
  type=int,
68
98
  default=256,
69
- help='Maximum tokens for LLM generation (default: 256)'
99
+ help="Maximum tokens for LLM generation (default: 256)",
70
100
  )
71
-
101
+
72
102
  parser.add_argument(
73
- '--checkpoint-interval',
103
+ "--checkpoint-interval",
74
104
  type=int,
75
105
  default=10,
76
- help='Save checkpoint every N crates (default: 10)'
106
+ help="Save checkpoint every N crates (default: 10)",
107
+ )
108
+
109
+ parser.add_argument(
110
+ "--log-level",
111
+ choices=["DEBUG", "INFO", "WARNING", "ERROR"],
112
+ default="INFO",
113
+ help="Logging level (default: INFO)",
114
+ )
115
+
116
+ parser.add_argument(
117
+ "--skip-ai",
118
+ action="store_true",
119
+ help="Skip AI enrichment (faster, metadata only)",
120
+ )
121
+
122
+ parser.add_argument(
123
+ "--skip-source-analysis",
124
+ action="store_true",
125
+ help="Skip source code analysis",
77
126
  )
78
-
127
+
128
+ # Enhanced scraping with Crawl4AI
129
+ parser.add_argument(
130
+ "--enable-crawl4ai",
131
+ action="store_true",
132
+ default=True,
133
+ help="Enable enhanced web scraping with Crawl4AI (default: enabled)",
134
+ )
135
+
136
+ parser.add_argument(
137
+ "--disable-crawl4ai",
138
+ action="store_true",
139
+ help="Disable Crawl4AI enhanced scraping (use basic scraping only)",
140
+ )
141
+
79
142
  parser.add_argument(
80
- '--log-level',
81
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
82
- default='INFO',
83
- help='Logging level (default: INFO)'
143
+ "--crawl4ai-model",
144
+ type=str,
145
+ default="~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
146
+ help=(
147
+ "GGUF model path for Crawl4AI content analysis (default: ~/models/deepseek/"
148
+ "deepseek-coder-6.7b-instruct.Q4_K_M.gguf)"
149
+ ),
84
150
  )
85
-
151
+
86
152
  parser.add_argument(
87
- '--skip-ai',
88
- action='store_true',
89
- help='Skip AI enrichment (faster, metadata only)'
153
+ "--enable-sigil-protocol",
154
+ action="store_true",
155
+ help="Enable Sigil Protocol Sacred Chain processing (Rule Zero compliance)",
90
156
  )
91
-
157
+
92
158
  parser.add_argument(
93
- '--skip-source-analysis',
94
- action='store_true',
95
- help='Skip source code analysis'
159
+ "--sigil-mode",
160
+ choices=["enhanced", "direct-llm", "hybrid"],
161
+ default="enhanced",
162
+ help=(
163
+ "Sigil processing mode: enhanced (API-based), direct-llm (local), "
164
+ "hybrid (both)"
165
+ ),
96
166
  )
97
-
167
+
98
168
  parser.add_argument(
99
- '--crate-list',
169
+ "--crate-list",
100
170
  type=str,
101
- nargs='+',
102
- help='Specific crates to process (space-separated list)'
171
+ nargs="+",
172
+ help="Specific crates to process (space-separated list)",
103
173
  )
104
-
174
+
105
175
  parser.add_argument(
106
- '--config-file',
176
+ "--config-file",
107
177
  type=str,
108
- help='JSON config file to override default settings'
178
+ help="JSON config file to override default settings",
109
179
  )
110
-
180
+
111
181
  return parser.parse_args()
112
182
 
113
- def configure_logging(log_level: str = 'INFO'):
183
+
184
+ def configure_logging(log_level: str = "INFO") -> None:
114
185
  """Configure logging with both console and file output"""
115
186
  level = getattr(logging, log_level.upper())
116
-
187
+
117
188
  # Clear any existing handlers to avoid conflicts
118
189
  root_logger = logging.getLogger()
119
190
  for handler in root_logger.handlers[:]:
120
191
  root_logger.removeHandler(handler)
121
-
192
+
122
193
  # Set root logger level
123
194
  root_logger.setLevel(level)
124
-
195
+
125
196
  # Create formatters
126
197
  detailed_formatter = logging.Formatter(
127
198
  "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
128
- datefmt='%Y-%m-%d %H:%M:%S'
129
- )
130
- simple_formatter = logging.Formatter(
131
- "%(asctime)s [%(levelname)s] %(message)s"
199
+ datefmt="%Y-%m-%d %H:%M:%S",
132
200
  )
133
-
201
+ simple_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
202
+
134
203
  # Console handler
135
204
  console_handler = logging.StreamHandler()
136
205
  console_handler.setLevel(level)
137
206
  console_handler.setFormatter(simple_formatter)
138
207
  root_logger.addHandler(console_handler)
139
-
208
+
140
209
  # File handler with unique timestamp
141
210
  log_filename = f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log"
142
211
  try:
143
- file_handler = logging.FileHandler(log_filename, mode='w', encoding='utf-8')
212
+ file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8")
144
213
  file_handler.setLevel(logging.DEBUG) # Always capture DEBUG+ to file
145
214
  file_handler.setFormatter(detailed_formatter)
146
215
  root_logger.addHandler(file_handler)
147
-
216
+
148
217
  # Log a test message to verify file handler works
149
218
  logging.info(f"Logging initialized - file: {log_filename}")
150
-
219
+
151
220
  except Exception as e:
152
221
  logging.error(f"Failed to create log file {log_filename}: {e}")
153
222
  print(f"Warning: Could not create log file: {e}")
154
-
223
+
155
224
  # Set library loggers to less verbose levels
156
- logging.getLogger('requests').setLevel(logging.WARNING)
157
- logging.getLogger('urllib3').setLevel(logging.WARNING)
158
- logging.getLogger('requests_cache').setLevel(logging.WARNING)
159
- logging.getLogger('llama_cpp').setLevel(logging.WARNING)
225
+ logging.getLogger("requests").setLevel(logging.WARNING)
226
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
227
+ logging.getLogger("requests_cache").setLevel(logging.WARNING)
228
+ logging.getLogger("llama_cpp").setLevel(logging.WARNING)
160
229
 
161
- def check_disk_space():
230
+
231
+ def check_disk_space() -> None:
232
+ """Check if there is at least 1GB of free disk space, log a warning if not."""
162
233
  if shutil.disk_usage(".").free < 1_000_000_000: # 1GB
163
234
  logging.warning("Low disk space! This may affect performance.")
164
235
 
165
- def main():
236
+
237
+ def enforce_rule_zero_reinforcement() -> None:
238
+ """
239
+ Enforce Rule Zero rigor by validating the canonical DB hash/signature
240
+ before pipeline actions.
241
+
242
+ Allows override for local dev, but enforces in CI/prod. Logs all events
243
+ for traceability.
244
+ """
245
+ enforce: bool = (
246
+ os.environ.get("ENFORCE_RULE_ZERO", "false").lower() == "true"
247
+ or os.environ.get("CI", "false").lower() == "true"
248
+ or os.environ.get("PRODUCTION", "false").lower() == "true"
249
+ )
250
+ if not enforce:
251
+ logging.info("Rule Zero DB hash/signature check skipped (dev mode or override)")
252
+ return
253
+
254
+ # Detect project root robustly (works in subdirs, CI, etc.)
255
+ try:
256
+ result = subprocess.run(
257
+ ["git", "rev-parse", "--show-toplevel"],
258
+ capture_output=True,
259
+ text=True,
260
+ check=True,
261
+ )
262
+ project_root: str = result.stdout.strip()
263
+ except Exception as e:
264
+ logging.critical(f"Failed to detect project root for Rule Zero validation: {e}")
265
+ sys.exit(1)
266
+
267
+ db_path: str = os.path.join(project_root, "sigil_rag_cache.db")
268
+ hash_path: str = os.path.join(project_root, "sigil_rag_cache.hash")
269
+
270
+ # Validate DB hash/signature using the provided script with explicit arguments
271
+ try:
272
+ logging.info("Validating Rule Zero DB hash/signature...")
273
+ result = subprocess.run(
274
+ [
275
+ sys.executable,
276
+ os.path.join(project_root, "audits", "validate_db_hash.py"),
277
+ "--db",
278
+ db_path,
279
+ "--expected-hash",
280
+ hash_path,
281
+ ],
282
+ capture_output=True,
283
+ text=True,
284
+ check=False,
285
+ )
286
+ if result.returncode != 0:
287
+ logging.error(
288
+ f"Rule Zero DB hash/signature validation failed: "
289
+ f"{result.stdout}\n{result.stderr}"
290
+ )
291
+ # Allow manual override with justification
292
+ override_justification = os.environ.get("RULE_ZERO_OVERRIDE", "")
293
+ if override_justification:
294
+ logging.warning(
295
+ "Manual override of Rule Zero DB hash/signature validation enabled."
296
+ )
297
+ logging.warning(f"Override justification: {override_justification}")
298
+ else:
299
+ logging.critical(
300
+ "Rule Zero DB hash/signature validation failed and no override "
301
+ "provided. Exiting."
302
+ )
303
+ sys.exit(1)
304
+ else:
305
+ logging.info("Rule Zero DB hash/signature validation successful.")
306
+ except Exception as e:
307
+ logging.critical(
308
+ f"Exception during Rule Zero DB hash/signature validation: {e}"
309
+ )
310
+ sys.exit(1)
311
+
312
+ # Log environment metadata for traceability
313
+ try:
314
+ subprocess.run(
315
+ [
316
+ sys.executable,
317
+ os.path.join(project_root, "scripts", "cache_env_metadata.py"),
318
+ ],
319
+ capture_output=True,
320
+ text=True,
321
+ check=False,
322
+ )
323
+ except Exception as e:
324
+ logging.warning(f"Failed to cache environment metadata: {e}")
325
+
326
+
327
+ def main() -> None:
328
+ # Enforce Rule Zero rigor before any pipeline action
329
+ enforce_rule_zero_reinforcement()
330
+
166
331
  # Setup production environment first for optimal logging
167
- prod_config = setup_production_environment()
168
-
332
+ logging.debug("Starting main() function - setting up production environment")
333
+ prod_config: dict[str, Any] = setup_production_environment()
334
+ logging.debug(f"Production environment setup complete: {bool(prod_config)}")
335
+
336
+ logging.debug("Parsing command line arguments")
169
337
  args = parse_arguments()
338
+ logging.debug(f"Arguments parsed: {vars(args)}")
339
+
340
+ logging.debug(f"Configuring logging with level: {args.log_level}")
170
341
  configure_logging(args.log_level)
342
+ logging.info("Logging configuration complete")
343
+
344
+ logging.debug("Checking disk space")
171
345
  check_disk_space()
172
-
346
+ logging.debug("Disk space check complete")
347
+
173
348
  # Check GitHub token before proceeding
349
+ logging.debug("Checking GitHub token setup")
174
350
  if not check_and_setup_github_token():
175
351
  logging.error("GitHub token setup cancelled or failed. Exiting.")
176
352
  sys.exit(1)
177
-
353
+ logging.info("GitHub token validation successful")
354
+
178
355
  try:
179
356
  # Create config from command line arguments
180
- config_kwargs = {}
181
-
357
+ logging.debug("Building configuration from arguments")
358
+ config_kwargs: dict[str, Any] = {}
359
+
182
360
  # Apply production optimizations if available
183
361
  if prod_config:
184
- config_kwargs.update({
185
- 'max_retries': prod_config.get('max_retries', 3),
186
- 'batch_size': prod_config.get('batch_size', 10),
187
- 'checkpoint_interval': prod_config.get('checkpoint_interval', 10),
188
- 'cache_ttl': prod_config.get('cache_ttl', 3600),
189
- })
190
-
362
+ logging.debug(f"Applying production config: {prod_config}")
363
+ config_kwargs.update(
364
+ {
365
+ "max_retries": prod_config.get("max_retries", 3),
366
+ "batch_size": prod_config.get("batch_size", 10),
367
+ "checkpoint_interval": prod_config.get("checkpoint_interval", 10),
368
+ }
369
+ )
370
+
191
371
  if args.batch_size:
192
- config_kwargs['batch_size'] = args.batch_size
372
+ logging.debug(f"Setting batch_size to {args.batch_size}")
373
+ config_kwargs["batch_size"] = args.batch_size
193
374
  if args.workers:
194
- config_kwargs['n_workers'] = args.workers
375
+ logging.debug(f"Setting n_workers to {args.workers}")
376
+ config_kwargs["n_workers"] = args.workers
195
377
  if args.model_path:
196
- config_kwargs['model_path'] = args.model_path
378
+ logging.debug(f"Setting model_path to {args.model_path}")
379
+ config_kwargs["model_path"] = args.model_path
197
380
  if args.max_tokens:
198
- config_kwargs['max_tokens'] = args.max_tokens
381
+ logging.debug(f"Setting max_tokens to {args.max_tokens}")
382
+ config_kwargs["max_tokens"] = args.max_tokens
199
383
  if args.checkpoint_interval:
200
- config_kwargs['checkpoint_interval'] = args.checkpoint_interval
201
-
384
+ logging.debug(f"Setting checkpoint_interval to {args.checkpoint_interval}")
385
+ config_kwargs["checkpoint_interval"] = args.checkpoint_interval
386
+
202
387
  # Load config file if provided
203
388
  if args.config_file:
389
+ logging.debug(f"Loading config file: {args.config_file}")
204
390
  import json
205
- with open(args.config_file, 'r') as f:
391
+
392
+ with open(args.config_file) as f:
206
393
  file_config = json.load(f)
207
- config_kwargs.update(file_config)
208
-
394
+ logging.debug(f"Config file loaded: {file_config}")
395
+ config_kwargs.update(file_config) # type: ignore
396
+
397
+ # Handle Crawl4AI configuration
398
+ logging.debug("Configuring Crawl4AI settings")
399
+ enable_crawl4ai = (
400
+ args.enable_crawl4ai and not args.disable_crawl4ai
401
+ if hasattr(args, "disable_crawl4ai")
402
+ else True
403
+ )
404
+ logging.debug(f"Crawl4AI enabled: {enable_crawl4ai}")
405
+ config_kwargs.update(
406
+ {
407
+ "enable_crawl4ai": enable_crawl4ai,
408
+ "crawl4ai_model": getattr(
409
+ args,
410
+ "crawl4ai_model",
411
+ "~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
412
+ ),
413
+ }
414
+ )
415
+
416
+ logging.debug(f"Creating PipelineConfig with kwargs: {config_kwargs}")
209
417
  config = PipelineConfig(**config_kwargs)
210
-
418
+ logging.info("Pipeline configuration created successfully")
419
+
211
420
  # Pass additional arguments to pipeline
212
- pipeline_kwargs = {}
421
+ logging.debug("Building pipeline kwargs")
422
+ pipeline_kwargs: dict[str, Any] = {}
213
423
  if args.output_dir:
214
- pipeline_kwargs['output_dir'] = args.output_dir
424
+ logging.debug(f"Setting output_dir to {args.output_dir}")
425
+ pipeline_kwargs["output_dir"] = args.output_dir
215
426
  if args.limit:
216
- pipeline_kwargs['limit'] = args.limit
427
+ logging.debug(f"Setting limit to {args.limit}")
428
+ pipeline_kwargs["limit"] = args.limit
217
429
  if args.crate_list:
218
- pipeline_kwargs['crate_list'] = args.crate_list
430
+ logging.debug(f"Setting crate_list to {args.crate_list}")
431
+ pipeline_kwargs["crate_list"] = args.crate_list
219
432
  if args.skip_ai:
220
- pipeline_kwargs['skip_ai'] = True
433
+ logging.debug("Enabling skip_ai mode")
434
+ pipeline_kwargs["skip_ai"] = True
221
435
  if args.skip_source_analysis:
222
- pipeline_kwargs['skip_source'] = True
223
-
224
- pipeline = CrateDataPipeline(config, **pipeline_kwargs)
225
-
226
- logging.info(f"Starting pipeline with {len(vars(args))} arguments")
227
- pipeline.run()
228
-
436
+ logging.debug("Enabling skip_source mode")
437
+ pipeline_kwargs["skip_source"] = True
438
+
439
+ logging.debug(f"Pipeline kwargs: {pipeline_kwargs}")
440
+
441
+ # Sigil Protocol integration - handle pipeline creation properly
442
+ if hasattr(args, "enable_sigil_protocol") and args.enable_sigil_protocol:
443
+ logging.info("Sigil Protocol mode requested")
444
+ logging.debug(
445
+ f"Sigil available: {_sigil_available}, SigilCompliantPipeline: {
446
+ SigilCompliantPipeline is not None}"
447
+ )
448
+
449
+ # Import Sigil enhanced pipeline
450
+ if _sigil_available and SigilCompliantPipeline is not None:
451
+ logging.info("Creating Sigil Protocol compliant pipeline")
452
+ sigil_pipeline = SigilCompliantPipeline(config, **pipeline_kwargs)
453
+ logging.info(
454
+ "Starting Sigil Protocol compliant pipeline with "
455
+ "Sacred Chain processing"
456
+ )
457
+
458
+ # Run Sigil pipeline (synchronous)
459
+ logging.debug("About to run Sigil pipeline - this is synchronous")
460
+ result = sigil_pipeline.run() # type: ignore[misc]
461
+ logging.debug(f"Sigil pipeline run() returned: {result}")
462
+
463
+ if result:
464
+ logging.info("Sigil pipeline completed successfully")
465
+ else:
466
+ logging.warning("Sigil pipeline completed with no results")
467
+ else:
468
+ logging.warning("Sigil enhanced pipeline not available")
469
+ logging.info("Falling back to standard pipeline")
470
+
471
+ logging.debug("Creating standard pipeline as Sigil fallback")
472
+ standard_pipeline = CrateDataPipeline(config)
473
+ logging.debug("Standard pipeline created, about to run asynchronously")
474
+
475
+ # Run standard pipeline (asynchronous)
476
+ import asyncio
477
+
478
+ logging.debug("Starting asyncio.run() for standard pipeline")
479
+ result = asyncio.run(
480
+ standard_pipeline.run()
481
+ ) # type: ignore[misc,assignment]
482
+ logging.debug(f"Standard pipeline asyncio.run() returned: {result}")
483
+
484
+ if result:
485
+ logging.info("Standard pipeline completed successfully")
486
+ else:
487
+ logging.warning("Standard pipeline completed with no results")
488
+ else:
489
+ logging.info("Standard pipeline mode")
490
+ logging.debug("Creating standard pipeline")
491
+ standard_pipeline = CrateDataPipeline(config)
492
+ logging.info(f"Starting pipeline with {len(vars(args))} arguments")
493
+ logging.debug("Standard pipeline created, about to run asynchronously")
494
+
495
+ # Run standard pipeline (asynchronous)
496
+ import asyncio
497
+
498
+ logging.debug("Starting asyncio.run() for standard pipeline")
499
+ result = asyncio.run(
500
+ standard_pipeline.run()
501
+ ) # type: ignore[misc,assignment]
502
+ logging.debug(f"Standard pipeline asyncio.run() returned: {result}")
503
+
504
+ if result:
505
+ logging.info("Standard pipeline completed successfully")
506
+ else:
507
+ logging.warning("Standard pipeline completed with no results")
508
+
509
+ logging.info("Main function execution completed successfully")
510
+
229
511
  except Exception as e:
230
512
  logging.critical(f"Pipeline failed: {str(e)}")
513
+ logging.debug(f"Exception details: {type(e).__name__}: {str(e)}", exc_info=True)
231
514
  sys.exit(1)
232
515
 
516
+
233
517
  if __name__ == "__main__":
234
- main()
518
+ main()