rust-crate-pipeline 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. rust_crate_pipeline/__init__.py +18 -27
  2. rust_crate_pipeline/__main__.py +1 -0
  3. rust_crate_pipeline/ai_processing.py +718 -596
  4. rust_crate_pipeline/analysis.py +330 -363
  5. rust_crate_pipeline/azure_ai_processing.py +462 -0
  6. rust_crate_pipeline/config.py +46 -28
  7. rust_crate_pipeline/core/__init__.py +19 -0
  8. rust_crate_pipeline/core/canon_registry.py +133 -0
  9. rust_crate_pipeline/core/irl_engine.py +256 -0
  10. rust_crate_pipeline/core/sacred_chain.py +117 -0
  11. rust_crate_pipeline/crate_analysis.py +54 -0
  12. rust_crate_pipeline/crate_list.txt +424 -0
  13. rust_crate_pipeline/github_token_checker.py +108 -112
  14. rust_crate_pipeline/main.py +329 -109
  15. rust_crate_pipeline/network.py +317 -308
  16. rust_crate_pipeline/pipeline.py +300 -375
  17. rust_crate_pipeline/production_config.py +24 -27
  18. rust_crate_pipeline/progress_monitor.py +334 -0
  19. rust_crate_pipeline/scraping/__init__.py +13 -0
  20. rust_crate_pipeline/scraping/unified_scraper.py +259 -0
  21. rust_crate_pipeline/unified_llm_processor.py +637 -0
  22. rust_crate_pipeline/unified_pipeline.py +548 -0
  23. rust_crate_pipeline/utils/file_utils.py +32 -5
  24. rust_crate_pipeline/utils/logging_utils.py +21 -16
  25. rust_crate_pipeline/version.py +76 -47
  26. rust_crate_pipeline-1.4.1.dist-info/METADATA +515 -0
  27. rust_crate_pipeline-1.4.1.dist-info/RECORD +31 -0
  28. rust_crate_pipeline-1.4.0.dist-info/METADATA +0 -585
  29. rust_crate_pipeline-1.4.0.dist-info/RECORD +0 -19
  30. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/WHEEL +0 -0
  31. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/entry_points.txt +0 -0
  32. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/licenses/LICENSE +0 -0
  33. {rust_crate_pipeline-1.4.0.dist-info → rust_crate_pipeline-1.4.1.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,33 @@ import time
4
4
  import logging
5
5
  import shutil
6
6
  import argparse
7
+ import os
8
+ import subprocess
9
+ from typing import Any, TYPE_CHECKING
10
+
7
11
  from .config import PipelineConfig
8
12
  from .pipeline import CrateDataPipeline
9
13
  from .production_config import setup_production_environment
10
14
  from .github_token_checker import check_and_setup_github_token
11
15
 
16
+ # Optional Sigil import with fallback
17
+ _sigil_available = True
18
+ SigilCompliantPipeline = None
19
+
20
+ try:
21
+ sys.path.append(".") # Add current directory to path
22
+ from sigil_enhanced_pipeline import SigilCompliantPipeline
23
+
24
+ _sigil_available = True
25
+ except ImportError:
26
+ _sigil_available = False
27
+ if TYPE_CHECKING:
28
+ from sigil_enhanced_pipeline import SigilCompliantPipeline
29
+ else:
30
+ SigilCompliantPipeline = None # type: ignore[assignment,misc]
12
31
 
13
- def parse_arguments():
32
+
33
+ def parse_arguments() -> argparse.Namespace:
14
34
  """Parse command line arguments"""
15
35
  parser = argparse.ArgumentParser(
16
36
  description="Rust Crate Data Processing Pipeline",
@@ -23,126 +43,145 @@ Examples:
23
43
  python -m rust_crate_pipeline --output-dir ./data # Custom output directory
24
44
  python -m rust_crate_pipeline --log-level DEBUG # Verbose logging
25
45
  PRODUCTION=true python -m rust_crate_pipeline # Production mode (quieter)
26
- """
46
+ """,
27
47
  )
28
48
 
29
49
  parser.add_argument(
30
- '--limit', '-l',
50
+ "--limit",
51
+ "-l",
31
52
  type=int,
32
53
  default=None,
33
- help='Limit the number of crates to process (default: process all)'
54
+ help="Limit the number of crates to process (default: process all)",
34
55
  )
35
56
 
36
57
  parser.add_argument(
37
- '--batch-size', '-b',
58
+ "--batch-size",
59
+ "-b",
38
60
  type=int,
39
61
  default=10,
40
- help='Number of crates to process in each batch (default: 10)'
62
+ help="Number of crates to process in each batch (default: 10)",
41
63
  )
42
64
 
43
65
  parser.add_argument(
44
- '--workers', '-w',
66
+ "--workers",
67
+ "-w",
45
68
  type=int,
46
69
  default=4,
47
- help='Number of parallel workers for API requests (default: 4)'
70
+ help="Number of parallel workers for API requests (default: 4)",
48
71
  )
49
72
 
50
73
  parser.add_argument(
51
- '--output-dir', '-o',
74
+ "--output-dir",
75
+ "-o",
52
76
  type=str,
53
77
  default=None,
54
- help='Output directory for results (default: auto-generated timestamped directory)'
78
+ help=(
79
+ "Output directory for results (default: auto-generated timestamped "
80
+ "directory)"
81
+ ),
55
82
  )
56
83
 
57
84
  parser.add_argument(
58
- '--model-path', '-m',
85
+ "--model-path",
86
+ "-m",
59
87
  type=str,
60
88
  default=None,
61
- help='Path to the LLM model file (default: ~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf)'
89
+ help=(
90
+ "Path to the LLM model file (default: ~/models/deepseek/deepseek-coder-"
91
+ "6.7b-instruct.Q4_K_M.gguf)"
92
+ ),
62
93
  )
63
94
 
64
95
  parser.add_argument(
65
- '--max-tokens',
96
+ "--max-tokens",
66
97
  type=int,
67
98
  default=256,
68
- help='Maximum tokens for LLM generation (default: 256)'
99
+ help="Maximum tokens for LLM generation (default: 256)",
69
100
  )
70
101
 
71
102
  parser.add_argument(
72
- '--checkpoint-interval',
103
+ "--checkpoint-interval",
73
104
  type=int,
74
105
  default=10,
75
- help='Save checkpoint every N crates (default: 10)'
106
+ help="Save checkpoint every N crates (default: 10)",
76
107
  )
77
108
 
78
- parser.add_argument('--log-level',
79
- choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'],
80
- default='INFO',
81
- help='Logging level (default: INFO)'
82
- )
109
+ parser.add_argument(
110
+ "--log-level",
111
+ choices=["DEBUG", "INFO", "WARNING", "ERROR"],
112
+ default="INFO",
113
+ help="Logging level (default: INFO)",
114
+ )
83
115
 
84
116
  parser.add_argument(
85
- '--skip-ai',
86
- action='store_true',
87
- help='Skip AI enrichment (faster, metadata only)'
117
+ "--skip-ai",
118
+ action="store_true",
119
+ help="Skip AI enrichment (faster, metadata only)",
88
120
  )
89
121
 
90
122
  parser.add_argument(
91
- '--skip-source-analysis',
92
- action='store_true',
93
- help='Skip source code analysis'
123
+ "--skip-source-analysis",
124
+ action="store_true",
125
+ help="Skip source code analysis",
94
126
  )
95
127
 
96
128
  # Enhanced scraping with Crawl4AI
97
129
  parser.add_argument(
98
- '--enable-crawl4ai',
99
- action='store_true',
130
+ "--enable-crawl4ai",
131
+ action="store_true",
100
132
  default=True,
101
- help='Enable enhanced web scraping with Crawl4AI (default: enabled)'
133
+ help="Enable enhanced web scraping with Crawl4AI (default: enabled)",
102
134
  )
103
135
 
104
136
  parser.add_argument(
105
- '--disable-crawl4ai',
106
- action='store_true',
107
- help='Disable Crawl4AI enhanced scraping (use basic scraping only)'
137
+ "--disable-crawl4ai",
138
+ action="store_true",
139
+ help="Disable Crawl4AI enhanced scraping (use basic scraping only)",
108
140
  )
109
141
 
110
142
  parser.add_argument(
111
- '--crawl4ai-model',
143
+ "--crawl4ai-model",
112
144
  type=str,
113
- default='ollama/deepseek-coder:6.7b',
114
- help='Model to use with Crawl4AI (default: ollama/deepseek-coder:6.7b)'
145
+ default="~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
146
+ help=(
147
+ "GGUF model path for Crawl4AI content analysis (default: ~/models/deepseek/"
148
+ "deepseek-coder-6.7b-instruct.Q4_K_M.gguf)"
149
+ ),
115
150
  )
116
151
 
117
152
  parser.add_argument(
118
- '--enable-sigil-protocol',
119
- action='store_true',
120
- help='Enable Sigil Protocol Sacred Chain processing (Rule Zero compliance)')
153
+ "--enable-sigil-protocol",
154
+ action="store_true",
155
+ help="Enable Sigil Protocol Sacred Chain processing (Rule Zero compliance)",
156
+ )
121
157
 
122
158
  parser.add_argument(
123
- '--sigil-mode',
124
- choices=['enhanced', 'direct-llm', 'hybrid'],
125
- default='enhanced',
126
- help='Sigil processing mode: enhanced (API-based), direct-llm (local), hybrid (both)'
159
+ "--sigil-mode",
160
+ choices=["enhanced", "direct-llm", "hybrid"],
161
+ default="enhanced",
162
+ help=(
163
+ "Sigil processing mode: enhanced (API-based), direct-llm (local), "
164
+ "hybrid (both)"
165
+ ),
127
166
  )
128
167
 
129
168
  parser.add_argument(
130
- '--crate-list',
169
+ "--crate-list",
131
170
  type=str,
132
- nargs='+',
133
- help='Specific crates to process (space-separated list)'
171
+ nargs="+",
172
+ help="Specific crates to process (space-separated list)",
134
173
  )
135
174
 
136
175
  parser.add_argument(
137
- '--config-file',
176
+ "--config-file",
138
177
  type=str,
139
- help='JSON config file to override default settings'
178
+ help="JSON config file to override default settings",
140
179
  )
141
180
 
142
181
  return parser.parse_args()
143
182
 
144
183
 
145
- def configure_logging(log_level: str = 'INFO'):
184
+ def configure_logging(log_level: str = "INFO") -> None:
146
185
  """Configure logging with both console and file output"""
147
186
  level = getattr(logging, log_level.upper())
148
187
 
@@ -157,11 +196,9 @@ def configure_logging(log_level: str = 'INFO'):
157
196
  # Create formatters
158
197
  detailed_formatter = logging.Formatter(
159
198
  "%(asctime)s [%(levelname)s] %(name)s: %(message)s",
160
- datefmt='%Y-%m-%d %H:%M:%S'
161
- )
162
- simple_formatter = logging.Formatter(
163
- "%(asctime)s [%(levelname)s] %(message)s"
199
+ datefmt="%Y-%m-%d %H:%M:%S",
164
200
  )
201
+ simple_formatter = logging.Formatter("%(asctime)s [%(levelname)s] %(message)s")
165
202
 
166
203
  # Console handler
167
204
  console_handler = logging.StreamHandler()
@@ -172,8 +209,7 @@ def configure_logging(log_level: str = 'INFO'):
172
209
  # File handler with unique timestamp
173
210
  log_filename = f"crate_enrichment_{time.strftime('%Y%m%d-%H%M%S')}.log"
174
211
  try:
175
- file_handler = logging.FileHandler(
176
- log_filename, mode='w', encoding='utf-8')
212
+ file_handler = logging.FileHandler(log_filename, mode="w", encoding="utf-8")
177
213
  file_handler.setLevel(logging.DEBUG) # Always capture DEBUG+ to file
178
214
  file_handler.setFormatter(detailed_formatter)
179
215
  root_logger.addHandler(file_handler)
@@ -186,110 +222,294 @@ def configure_logging(log_level: str = 'INFO'):
186
222
  print(f"Warning: Could not create log file: {e}")
187
223
 
188
224
  # Set library loggers to less verbose levels
189
- logging.getLogger('requests').setLevel(logging.WARNING)
190
- logging.getLogger('urllib3').setLevel(logging.WARNING)
191
- logging.getLogger('requests_cache').setLevel(logging.WARNING)
192
- logging.getLogger('llama_cpp').setLevel(logging.WARNING)
225
+ logging.getLogger("requests").setLevel(logging.WARNING)
226
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
227
+ logging.getLogger("requests_cache").setLevel(logging.WARNING)
228
+ logging.getLogger("llama_cpp").setLevel(logging.WARNING)
193
229
 
194
230
 
195
- def check_disk_space():
231
+ def check_disk_space() -> None:
232
+ """Check if there is at least 1GB of free disk space, log a warning if not."""
196
233
  if shutil.disk_usage(".").free < 1_000_000_000: # 1GB
197
234
  logging.warning("Low disk space! This may affect performance.")
198
235
 
199
236
 
200
- def main():
237
+ def enforce_rule_zero_reinforcement() -> None:
238
+ """
239
+ Enforce Rule Zero rigor by validating the canonical DB hash/signature
240
+ before pipeline actions.
241
+
242
+ Allows override for local dev, but enforces in CI/prod. Logs all events
243
+ for traceability.
244
+ """
245
+ enforce: bool = (
246
+ os.environ.get("ENFORCE_RULE_ZERO", "false").lower() == "true"
247
+ or os.environ.get("CI", "false").lower() == "true"
248
+ or os.environ.get("PRODUCTION", "false").lower() == "true"
249
+ )
250
+ if not enforce:
251
+ logging.info("Rule Zero DB hash/signature check skipped (dev mode or override)")
252
+ return
253
+
254
+ # Detect project root robustly (works in subdirs, CI, etc.)
255
+ try:
256
+ result = subprocess.run(
257
+ ["git", "rev-parse", "--show-toplevel"],
258
+ capture_output=True,
259
+ text=True,
260
+ check=True,
261
+ )
262
+ project_root: str = result.stdout.strip()
263
+ except Exception as e:
264
+ logging.critical(f"Failed to detect project root for Rule Zero validation: {e}")
265
+ sys.exit(1)
266
+
267
+ db_path: str = os.path.join(project_root, "sigil_rag_cache.db")
268
+ hash_path: str = os.path.join(project_root, "sigil_rag_cache.hash")
269
+
270
+ # Validate DB hash/signature using the provided script with explicit arguments
271
+ try:
272
+ logging.info("Validating Rule Zero DB hash/signature...")
273
+ result = subprocess.run(
274
+ [
275
+ sys.executable,
276
+ os.path.join(project_root, "audits", "validate_db_hash.py"),
277
+ "--db",
278
+ db_path,
279
+ "--expected-hash",
280
+ hash_path,
281
+ ],
282
+ capture_output=True,
283
+ text=True,
284
+ check=False,
285
+ )
286
+ if result.returncode != 0:
287
+ logging.error(
288
+ f"Rule Zero DB hash/signature validation failed: "
289
+ f"{result.stdout}\n{result.stderr}"
290
+ )
291
+ # Allow manual override with justification
292
+ override_justification = os.environ.get("RULE_ZERO_OVERRIDE", "")
293
+ if override_justification:
294
+ logging.warning(
295
+ "Manual override of Rule Zero DB hash/signature validation enabled."
296
+ )
297
+ logging.warning(f"Override justification: {override_justification}")
298
+ else:
299
+ logging.critical(
300
+ "Rule Zero DB hash/signature validation failed and no override "
301
+ "provided. Exiting."
302
+ )
303
+ sys.exit(1)
304
+ else:
305
+ logging.info("Rule Zero DB hash/signature validation successful.")
306
+ except Exception as e:
307
+ logging.critical(
308
+ f"Exception during Rule Zero DB hash/signature validation: {e}"
309
+ )
310
+ sys.exit(1)
311
+
312
+ # Log environment metadata for traceability
313
+ try:
314
+ subprocess.run(
315
+ [
316
+ sys.executable,
317
+ os.path.join(project_root, "scripts", "cache_env_metadata.py"),
318
+ ],
319
+ capture_output=True,
320
+ text=True,
321
+ check=False,
322
+ )
323
+ except Exception as e:
324
+ logging.warning(f"Failed to cache environment metadata: {e}")
325
+
326
+
327
+ def main() -> None:
328
+ # Enforce Rule Zero rigor before any pipeline action
329
+ enforce_rule_zero_reinforcement()
330
+
201
331
  # Setup production environment first for optimal logging
202
- prod_config = setup_production_environment()
332
+ logging.debug("Starting main() function - setting up production environment")
333
+ prod_config: dict[str, Any] = setup_production_environment()
334
+ logging.debug(f"Production environment setup complete: {bool(prod_config)}")
203
335
 
336
+ logging.debug("Parsing command line arguments")
204
337
  args = parse_arguments()
338
+ logging.debug(f"Arguments parsed: {vars(args)}")
339
+
340
+ logging.debug(f"Configuring logging with level: {args.log_level}")
205
341
  configure_logging(args.log_level)
342
+ logging.info("Logging configuration complete")
343
+
344
+ logging.debug("Checking disk space")
206
345
  check_disk_space()
346
+ logging.debug("Disk space check complete")
207
347
 
208
348
  # Check GitHub token before proceeding
349
+ logging.debug("Checking GitHub token setup")
209
350
  if not check_and_setup_github_token():
210
351
  logging.error("GitHub token setup cancelled or failed. Exiting.")
211
352
  sys.exit(1)
353
+ logging.info("GitHub token validation successful")
212
354
 
213
355
  try:
214
356
  # Create config from command line arguments
215
- config_kwargs = {}
357
+ logging.debug("Building configuration from arguments")
358
+ config_kwargs: dict[str, Any] = {}
216
359
 
217
360
  # Apply production optimizations if available
218
361
  if prod_config:
219
- config_kwargs.update({
220
- 'max_retries': prod_config.get('max_retries', 3),
221
- 'batch_size': prod_config.get('batch_size', 10),
222
- 'checkpoint_interval': prod_config.get('checkpoint_interval', 10),
223
- 'cache_ttl': prod_config.get('cache_ttl', 3600),
224
- })
362
+ logging.debug(f"Applying production config: {prod_config}")
363
+ config_kwargs.update(
364
+ {
365
+ "max_retries": prod_config.get("max_retries", 3),
366
+ "batch_size": prod_config.get("batch_size", 10),
367
+ "checkpoint_interval": prod_config.get("checkpoint_interval", 10),
368
+ }
369
+ )
225
370
 
226
371
  if args.batch_size:
227
- config_kwargs['batch_size'] = args.batch_size
372
+ logging.debug(f"Setting batch_size to {args.batch_size}")
373
+ config_kwargs["batch_size"] = args.batch_size
228
374
  if args.workers:
229
- config_kwargs['n_workers'] = args.workers
375
+ logging.debug(f"Setting n_workers to {args.workers}")
376
+ config_kwargs["n_workers"] = args.workers
230
377
  if args.model_path:
231
- config_kwargs['model_path'] = args.model_path
378
+ logging.debug(f"Setting model_path to {args.model_path}")
379
+ config_kwargs["model_path"] = args.model_path
232
380
  if args.max_tokens:
233
- config_kwargs['max_tokens'] = args.max_tokens
381
+ logging.debug(f"Setting max_tokens to {args.max_tokens}")
382
+ config_kwargs["max_tokens"] = args.max_tokens
234
383
  if args.checkpoint_interval:
235
- config_kwargs['checkpoint_interval'] = args.checkpoint_interval
236
- # Load config file if provided
384
+ logging.debug(f"Setting checkpoint_interval to {args.checkpoint_interval}")
385
+ config_kwargs["checkpoint_interval"] = args.checkpoint_interval
386
+
387
+ # Load config file if provided
237
388
  if args.config_file:
389
+ logging.debug(f"Loading config file: {args.config_file}")
238
390
  import json
239
- with open(args.config_file, 'r') as f:
391
+
392
+ with open(args.config_file) as f:
240
393
  file_config = json.load(f)
241
- config_kwargs.update(file_config)
394
+ logging.debug(f"Config file loaded: {file_config}")
395
+ config_kwargs.update(file_config) # type: ignore
242
396
 
243
397
  # Handle Crawl4AI configuration
244
- enable_crawl4ai = args.enable_crawl4ai and not args.disable_crawl4ai if hasattr(
245
- args, 'disable_crawl4ai') else True
246
- config_kwargs.update({
247
- 'enable_crawl4ai': enable_crawl4ai,
248
- 'crawl4ai_model': getattr(args, 'crawl4ai_model', 'ollama/deepseek-coder:6.7b')
249
- })
250
-
398
+ logging.debug("Configuring Crawl4AI settings")
399
+ enable_crawl4ai = (
400
+ args.enable_crawl4ai and not args.disable_crawl4ai
401
+ if hasattr(args, "disable_crawl4ai")
402
+ else True
403
+ )
404
+ logging.debug(f"Crawl4AI enabled: {enable_crawl4ai}")
405
+ config_kwargs.update(
406
+ {
407
+ "enable_crawl4ai": enable_crawl4ai,
408
+ "crawl4ai_model": getattr(
409
+ args,
410
+ "crawl4ai_model",
411
+ "~/models/deepseek/deepseek-coder-6.7b-instruct.Q4_K_M.gguf",
412
+ ),
413
+ }
414
+ )
415
+
416
+ logging.debug(f"Creating PipelineConfig with kwargs: {config_kwargs}")
251
417
  config = PipelineConfig(**config_kwargs)
418
+ logging.info("Pipeline configuration created successfully")
252
419
 
253
420
  # Pass additional arguments to pipeline
254
- pipeline_kwargs = {}
421
+ logging.debug("Building pipeline kwargs")
422
+ pipeline_kwargs: dict[str, Any] = {}
255
423
  if args.output_dir:
256
- pipeline_kwargs['output_dir'] = args.output_dir
424
+ logging.debug(f"Setting output_dir to {args.output_dir}")
425
+ pipeline_kwargs["output_dir"] = args.output_dir
257
426
  if args.limit:
258
- pipeline_kwargs['limit'] = args.limit
427
+ logging.debug(f"Setting limit to {args.limit}")
428
+ pipeline_kwargs["limit"] = args.limit
259
429
  if args.crate_list:
260
- pipeline_kwargs['crate_list'] = args.crate_list
430
+ logging.debug(f"Setting crate_list to {args.crate_list}")
431
+ pipeline_kwargs["crate_list"] = args.crate_list
261
432
  if args.skip_ai:
262
- pipeline_kwargs['skip_ai'] = True
433
+ logging.debug("Enabling skip_ai mode")
434
+ pipeline_kwargs["skip_ai"] = True
263
435
  if args.skip_source_analysis:
264
- pipeline_kwargs['skip_source'] = True
436
+ logging.debug("Enabling skip_source mode")
437
+ pipeline_kwargs["skip_source"] = True
265
438
 
266
- # Sigil Protocol integration
267
- if hasattr(
268
- args,
269
- 'enable_sigil_protocol') and args.enable_sigil_protocol:
270
- # Import Sigil enhanced pipeline
271
- try:
272
- import sys
273
- sys.path.append('.') # Add current directory to path
274
- from sigil_enhanced_pipeline import SigilCompliantPipeline
439
+ logging.debug(f"Pipeline kwargs: {pipeline_kwargs}")
440
+
441
+ # Sigil Protocol integration - handle pipeline creation properly
442
+ if hasattr(args, "enable_sigil_protocol") and args.enable_sigil_protocol:
443
+ logging.info("Sigil Protocol mode requested")
444
+ logging.debug(
445
+ f"Sigil available: {_sigil_available}, SigilCompliantPipeline: {SigilCompliantPipeline is not None}"
446
+ )
275
447
 
276
- pipeline = SigilCompliantPipeline(config, **pipeline_kwargs)
448
+ # Import Sigil enhanced pipeline
449
+ if _sigil_available and SigilCompliantPipeline is not None:
450
+ logging.info("Creating Sigil Protocol compliant pipeline")
451
+ sigil_pipeline = SigilCompliantPipeline(config, **pipeline_kwargs)
277
452
  logging.info(
278
- "Starting Sigil Protocol compliant pipeline with Sacred Chain processing")
279
- except ImportError as e:
280
- logging.warning(f"Sigil enhanced pipeline not available: {e}")
453
+ "Starting Sigil Protocol compliant pipeline with "
454
+ "Sacred Chain processing"
455
+ )
456
+
457
+ # Run Sigil pipeline (synchronous)
458
+ logging.debug("About to run Sigil pipeline - this is synchronous")
459
+ result = sigil_pipeline.run() # type: ignore[misc]
460
+ logging.debug(f"Sigil pipeline run() returned: {result}")
461
+
462
+ if result:
463
+ logging.info("Sigil pipeline completed successfully")
464
+ else:
465
+ logging.warning("Sigil pipeline completed with no results")
466
+ else:
467
+ logging.warning("Sigil enhanced pipeline not available")
281
468
  logging.info("Falling back to standard pipeline")
282
- pipeline = CrateDataPipeline(config, **pipeline_kwargs)
469
+
470
+ logging.debug("Creating standard pipeline as Sigil fallback")
471
+ standard_pipeline = CrateDataPipeline(config)
472
+ logging.debug("Standard pipeline created, about to run asynchronously")
473
+
474
+ # Run standard pipeline (asynchronous)
475
+ import asyncio
476
+
477
+ logging.debug("Starting asyncio.run() for standard pipeline")
478
+ result = asyncio.run(
479
+ standard_pipeline.run()
480
+ ) # type: ignore[misc,assignment]
481
+ logging.debug(f"Standard pipeline asyncio.run() returned: {result}")
482
+
483
+ if result:
484
+ logging.info("Standard pipeline completed successfully")
485
+ else:
486
+ logging.warning("Standard pipeline completed with no results")
283
487
  else:
284
- pipeline = CrateDataPipeline(config, **pipeline_kwargs)
285
- logging.info(f"Starting pipeline with {len(vars(args))} arguments")
488
+ logging.info("Standard pipeline mode")
489
+ logging.debug("Creating standard pipeline")
490
+ standard_pipeline = CrateDataPipeline(config)
491
+ logging.info(f"Starting pipeline with {len(vars(args))} arguments")
492
+ logging.debug("Standard pipeline created, about to run asynchronously")
493
+
494
+ # Run standard pipeline (asynchronous)
495
+ import asyncio
496
+
497
+ logging.debug("Starting asyncio.run() for standard pipeline")
498
+ result = asyncio.run(
499
+ standard_pipeline.run()
500
+ ) # type: ignore[misc,assignment]
501
+ logging.debug(f"Standard pipeline asyncio.run() returned: {result}")
502
+
503
+ if result:
504
+ logging.info("Standard pipeline completed successfully")
505
+ else:
506
+ logging.warning("Standard pipeline completed with no results")
286
507
 
287
- # Run the pipeline asynchronously
288
- import asyncio
289
- asyncio.run(pipeline.run())
508
+ logging.info("Main function execution completed successfully")
290
509
 
291
510
  except Exception as e:
292
511
  logging.critical(f"Pipeline failed: {str(e)}")
512
+ logging.debug(f"Exception details: {type(e).__name__}: {str(e)}", exc_info=True)
293
513
  sys.exit(1)
294
514
 
295
515