sf-config-builder 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sfconfig/config.py ADDED
@@ -0,0 +1,767 @@
1
+ """SFConfig class for managing Screaming Frog configuration files."""
2
+
3
+ import json
4
+ import subprocess
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional, Union
7
+
8
+ from .diff import SFDiff
9
+ from .exceptions import SFConfigError, SFCrawlError, SFParseError, SFValidationError
10
+ from .paths import get_classpath_separator, get_java_path, get_sf_cli_path, get_sf_jar_path
11
+
12
+
13
+ class SFConfig:
14
+ """Manage Screaming Frog configuration files.
15
+
16
+ This class wraps the Java ConfigBuilder CLI to provide a Pythonic interface
17
+ for inspecting, modifying, and using .seospiderconfig files.
18
+
19
+ Example:
20
+ >>> config = SFConfig.load("base.seospiderconfig")
21
+ >>> config.max_urls = 100000
22
+ >>> config.add_extraction("Price", "//span[@class='price']")
23
+ >>> config.save("client.seospiderconfig")
24
+ >>> config.run_crawl("https://example.com", output_folder="./results")
25
+ """
26
+
27
+ JAR_PATH = Path(__file__).parent / "java" / "ConfigBuilder.jar"
28
+
29
+ def __init__(
30
+ self,
31
+ data: Dict[str, Any],
32
+ path: Optional[str] = None,
33
+ sf_path: Optional[str] = None,
34
+ ):
35
+ """Initialize SFConfig with inspection data.
36
+
37
+ Args:
38
+ data: Parsed JSON response from Java CLI --inspect command.
39
+ path: Path to the source config file.
40
+ sf_path: Optional custom path to SF installation directory.
41
+ """
42
+ self._data = data
43
+ self._path = path
44
+ self._sf_path = sf_path
45
+ self._patches: Dict[str, Any] = {}
46
+ self._extraction_ops: List[Dict[str, Any]] = []
47
+ self._exclude_ops: List[Dict[str, Any]] = []
48
+ self._include_ops: List[Dict[str, Any]] = []
49
+
50
+ # ==================== Loading ====================
51
+
52
+ @classmethod
53
+ def load(cls, path: str, sf_path: Optional[str] = None) -> "SFConfig":
54
+ """Load a config file.
55
+
56
+ Args:
57
+ path: Path to the .seospiderconfig file.
58
+ sf_path: Optional custom path to SF installation directory.
59
+ Auto-detects if not provided.
60
+
61
+ Returns:
62
+ SFConfig instance with loaded configuration.
63
+
64
+ Raises:
65
+ SFParseError: If the config file cannot be parsed.
66
+ SFNotFoundError: If Screaming Frog is not installed.
67
+ """
68
+ result = cls._run_java("--inspect", "--config", str(path), sf_path=sf_path)
69
+ return cls(result, str(path), sf_path=sf_path)
70
+
71
+ @classmethod
72
+ def default(cls) -> "SFConfig":
73
+ """Create config from SF's default settings.
74
+
75
+ Returns:
76
+ SFConfig instance with default SF configuration.
77
+
78
+ Raises:
79
+ NotImplementedError: This feature is not yet implemented.
80
+ """
81
+ from .paths import get_default_config_path
82
+
83
+ default_path = get_default_config_path()
84
+ if default_path:
85
+ return cls.load(str(default_path))
86
+
87
+ raise NotImplementedError(
88
+ "Default config not found. "
89
+ "Please load an existing config file instead."
90
+ )
91
+
92
+ # ==================== Inspection ====================
93
+
94
+ def to_dict(self) -> Dict[str, Any]:
95
+ """Return full config as dictionary.
96
+
97
+ Returns:
98
+ Dictionary containing all config data from the Java CLI.
99
+ """
100
+ return self._data
101
+
102
+ def get(self, path: str, default: Any = None) -> Any:
103
+ """Get a specific field value.
104
+
105
+ Args:
106
+ path: Dot-separated field path (e.g., "mCrawlConfig.mMaxUrls").
107
+ default: Value to return if field is not found.
108
+
109
+ Returns:
110
+ The field value, or default if not found.
111
+ """
112
+ # Check patches first (pending changes take precedence)
113
+ if path in self._patches:
114
+ return self._patches[path]
115
+
116
+ # Search in loaded data
117
+ for field in self._data.get("fields", []):
118
+ if field.get("path") == path:
119
+ return field.get("value")
120
+
121
+ return default
122
+
123
+ def fields(self, prefix: Optional[str] = None) -> List[Dict[str, Any]]:
124
+ """List all fields, optionally filtered by prefix.
125
+
126
+ Args:
127
+ prefix: Optional path prefix to filter fields.
128
+
129
+ Returns:
130
+ List of field dictionaries containing path, type, value, etc.
131
+ """
132
+ fields = self._data.get("fields", [])
133
+ if prefix:
134
+ fields = [f for f in fields if f.get("path", "").startswith(prefix)]
135
+ return fields
136
+
137
+ @property
138
+ def sf_version(self) -> str:
139
+ """Get the Screaming Frog version that created this config."""
140
+ return self._data.get("sfVersion", "unknown")
141
+
142
+ @property
143
+ def config_version(self) -> str:
144
+ """Get the config file version."""
145
+ return self._data.get("configVersion", "unknown")
146
+
147
+ @property
148
+ def path(self) -> Optional[str]:
149
+ """Get the path to the loaded config file."""
150
+ return self._path
151
+
152
+ # ==================== Modification ====================
153
+
154
+ def set(self, path: str, value: Any) -> "SFConfig":
155
+ """Set a field value.
156
+
157
+ Args:
158
+ path: Dot-separated field path (e.g., "mCrawlConfig.mMaxUrls").
159
+ value: The value to set.
160
+
161
+ Returns:
162
+ Self for method chaining.
163
+ """
164
+ self._patches[path] = value
165
+ return self
166
+
167
+ # Convenience properties
168
+ @property
169
+ def max_urls(self) -> int:
170
+ """Get the maximum URLs to crawl."""
171
+ return self.get("mCrawlConfig.mMaxUrls", 0)
172
+
173
+ @max_urls.setter
174
+ def max_urls(self, value: int):
175
+ """Set the maximum URLs to crawl."""
176
+ self.set("mCrawlConfig.mMaxUrls", value)
177
+
178
+ @property
179
+ def rendering_mode(self) -> str:
180
+ """Get the rendering mode (STATIC or JAVASCRIPT)."""
181
+ return self.get("mCrawlConfig.mRenderingMode", "STATIC")
182
+
183
+ @rendering_mode.setter
184
+ def rendering_mode(self, value: str):
185
+ """Set the rendering mode (STATIC or JAVASCRIPT)."""
186
+ self.set("mCrawlConfig.mRenderingMode", value)
187
+
188
+ @property
189
+ def robots_mode(self) -> str:
190
+ """Get the robots.txt handling mode (RESPECT or IGNORE)."""
191
+ return self.get("mCrawlConfig.mRobotsTxtMode", "RESPECT")
192
+
193
+ @robots_mode.setter
194
+ def robots_mode(self, value: str):
195
+ """Set the robots.txt handling mode (RESPECT or IGNORE)."""
196
+ self.set("mCrawlConfig.mRobotsTxtMode", value)
197
+
198
+ @property
199
+ def max_depth(self) -> int:
200
+ """Get the maximum crawl depth."""
201
+ return self.get("mCrawlConfig.mMaxDepth", 0)
202
+
203
+ @max_depth.setter
204
+ def max_depth(self, value: int):
205
+ """Set the maximum crawl depth."""
206
+ self.set("mCrawlConfig.mMaxDepth", value)
207
+
208
+ @property
209
+ def crawl_delay(self) -> float:
210
+ """Get the crawl delay in seconds."""
211
+ return self.get("mCrawlConfig.mCrawlDelay", 0.0)
212
+
213
+ @crawl_delay.setter
214
+ def crawl_delay(self, value: float):
215
+ """Set the crawl delay in seconds."""
216
+ self.set("mCrawlConfig.mCrawlDelay", value)
217
+
218
+ @property
219
+ def user_agent(self) -> str:
220
+ """Get the user agent string."""
221
+ return self.get("mUserAgentConfig.mUserAgent", "")
222
+
223
+ @user_agent.setter
224
+ def user_agent(self, value: str):
225
+ """Set the user agent string."""
226
+ self.set("mUserAgentConfig.mUserAgent", value)
227
+
228
+ # ==================== Extractions ====================
229
+
230
+ def add_extraction(
231
+ self,
232
+ name: str,
233
+ selector: str,
234
+ selector_type: str = "XPATH",
235
+ extract_mode: str = "TEXT",
236
+ attribute: Optional[str] = None,
237
+ ) -> "SFConfig":
238
+ """Add a custom extraction rule.
239
+
240
+ Args:
241
+ name: Name for the extraction (appears as column header in exports).
242
+ selector: The selector pattern (XPath, CSS, or Regex).
243
+ selector_type: Type of selector - "XPATH", "CSS", or "REGEX".
244
+ extract_mode: What to extract - "TEXT", "HTML_ELEMENT", "INNER_HTML",
245
+ or "FUNCTION_VALUE".
246
+ attribute: Optional attribute to extract (for ATTRIBUTE mode).
247
+
248
+ Returns:
249
+ Self for method chaining.
250
+
251
+ Example:
252
+ >>> config.add_extraction("Price", "//span[@class='price']")
253
+ >>> config.add_extraction("SKU", ".sku-code", selector_type="CSS")
254
+ """
255
+ op = {
256
+ "op": "add",
257
+ "name": name,
258
+ "selector": selector,
259
+ "selectorType": selector_type.upper(),
260
+ "extractMode": extract_mode.upper(),
261
+ }
262
+ if attribute:
263
+ op["attribute"] = attribute
264
+ self._extraction_ops.append(op)
265
+ return self
266
+
267
+ def remove_extraction(self, name: str) -> "SFConfig":
268
+ """Remove an extraction rule by name.
269
+
270
+ Args:
271
+ name: Name of the extraction rule to remove.
272
+
273
+ Returns:
274
+ Self for method chaining.
275
+ """
276
+ self._extraction_ops.append({"op": "remove", "name": name})
277
+ return self
278
+
279
+ def clear_extractions(self) -> "SFConfig":
280
+ """Remove all extraction rules.
281
+
282
+ Returns:
283
+ Self for method chaining.
284
+ """
285
+ self._extraction_ops.append({"op": "clear"})
286
+ return self
287
+
288
+ @property
289
+ def extractions(self) -> List[Dict[str, Any]]:
290
+ """List current extraction rules.
291
+
292
+ Returns:
293
+ List of extraction rule dictionaries.
294
+ """
295
+ # Try to get from virtual field
296
+ extractions = self.get("mCustomExtractionConfig.extractions", [])
297
+ if extractions:
298
+ return extractions
299
+
300
+ # Fall back to parsing mFilters if available
301
+ return []
302
+
303
+ # ==================== Excludes ====================
304
+
305
+ def add_exclude(self, pattern: str) -> "SFConfig":
306
+ """Add an exclude pattern (regex).
307
+
308
+ URLs matching this pattern will be excluded from crawling.
309
+
310
+ Args:
311
+ pattern: Regex pattern to exclude.
312
+
313
+ Returns:
314
+ Self for method chaining.
315
+
316
+ Example:
317
+ >>> config.add_exclude(r".*\\.pdf$") # Exclude PDFs
318
+ >>> config.add_exclude(r".*/admin/.*") # Exclude admin paths
319
+ """
320
+ self._exclude_ops.append({"op": "append", "values": [pattern]})
321
+ return self
322
+
323
+ def remove_exclude(self, pattern: str) -> "SFConfig":
324
+ """Remove an exclude pattern.
325
+
326
+ Args:
327
+ pattern: The exact pattern to remove.
328
+
329
+ Returns:
330
+ Self for method chaining.
331
+ """
332
+ self._exclude_ops.append({"op": "remove", "values": [pattern]})
333
+ return self
334
+
335
+ def clear_excludes(self) -> "SFConfig":
336
+ """Remove all exclude patterns.
337
+
338
+ Returns:
339
+ Self for method chaining.
340
+ """
341
+ self._exclude_ops.append({"op": "clear"})
342
+ return self
343
+
344
+ @property
345
+ def excludes(self) -> List[str]:
346
+ """List current exclude patterns.
347
+
348
+ Returns:
349
+ List of regex patterns.
350
+ """
351
+ return self.get("mExcludeManager.mExcludePatterns", [])
352
+
353
+ # ==================== Includes ====================
354
+
355
+ def add_include(self, pattern: str) -> "SFConfig":
356
+ """Add an include pattern (regex).
357
+
358
+ Only URLs matching include patterns will be crawled.
359
+
360
+ Args:
361
+ pattern: Regex pattern to include.
362
+
363
+ Returns:
364
+ Self for method chaining.
365
+ """
366
+ self._include_ops.append({"op": "append", "values": [pattern]})
367
+ return self
368
+
369
+ def remove_include(self, pattern: str) -> "SFConfig":
370
+ """Remove an include pattern.
371
+
372
+ Args:
373
+ pattern: The exact pattern to remove.
374
+
375
+ Returns:
376
+ Self for method chaining.
377
+ """
378
+ self._include_ops.append({"op": "remove", "values": [pattern]})
379
+ return self
380
+
381
+ def clear_includes(self) -> "SFConfig":
382
+ """Remove all include patterns.
383
+
384
+ Returns:
385
+ Self for method chaining.
386
+ """
387
+ self._include_ops.append({"op": "clear"})
388
+ return self
389
+
390
+ @property
391
+ def includes(self) -> List[str]:
392
+ """List current include patterns.
393
+
394
+ Returns:
395
+ List of regex patterns.
396
+ """
397
+ return self.get("mCrawlConfig.mIncludePatterns", [])
398
+
399
+ # ==================== Allowed Domains ====================
400
+
401
+ def add_allowed_domain(self, domain: str) -> "SFConfig":
402
+ """Add an allowed domain for crawling.
403
+
404
+ Args:
405
+ domain: Domain to allow (e.g., "example.com").
406
+
407
+ Returns:
408
+ Self for method chaining.
409
+ """
410
+ # This typically maps to a specific SF config field
411
+ # Implementation depends on SF version
412
+ self.set("mCrawlConfig.mAllowedDomains",
413
+ self.get("mCrawlConfig.mAllowedDomains", []) + [domain])
414
+ return self
415
+
416
+ # ==================== Saving ====================
417
+
418
+ def save(self, output_path: Optional[str] = None) -> "SFConfig":
419
+ """Save config to file.
420
+
421
+ Args:
422
+ output_path: Path to save to. If None, overwrites the original file.
423
+
424
+ Returns:
425
+ Self for method chaining.
426
+
427
+ Raises:
428
+ SFConfigError: If no output path is specified and no original path exists.
429
+ SFValidationError: If patches contain invalid fields or values.
430
+ """
431
+ output = output_path or self._path
432
+ if not output:
433
+ raise SFConfigError("No output path specified and no original path to overwrite")
434
+
435
+ # Build patches dict
436
+ patches = dict(self._patches)
437
+
438
+ if self._extraction_ops:
439
+ patches["extractions"] = self._extraction_ops
440
+
441
+ if self._exclude_ops:
442
+ # Convert ops to the format expected by Java CLI
443
+ if len(self._exclude_ops) == 1 and self._exclude_ops[0].get("op") == "clear":
444
+ patches["mExcludeManager.mExcludePatterns"] = {"op": "clear"}
445
+ else:
446
+ # Combine all ops
447
+ combined_op = {"op": "append", "values": []}
448
+ for op in self._exclude_ops:
449
+ if op.get("op") == "append":
450
+ combined_op["values"].extend(op.get("values", []))
451
+ elif op.get("op") == "remove":
452
+ combined_op = {"op": "remove", "values": op.get("values", [])}
453
+ elif op.get("op") == "clear":
454
+ combined_op = {"op": "clear"}
455
+ patches["mExcludeManager.mExcludePatterns"] = combined_op
456
+
457
+ if self._include_ops:
458
+ if len(self._include_ops) == 1 and self._include_ops[0].get("op") == "clear":
459
+ patches["mCrawlConfig.mIncludePatterns"] = {"op": "clear"}
460
+ else:
461
+ combined_op = {"op": "append", "values": []}
462
+ for op in self._include_ops:
463
+ if op.get("op") == "append":
464
+ combined_op["values"].extend(op.get("values", []))
465
+ elif op.get("op") == "remove":
466
+ combined_op = {"op": "remove", "values": op.get("values", [])}
467
+ elif op.get("op") == "clear":
468
+ combined_op = {"op": "clear"}
469
+ patches["mCrawlConfig.mIncludePatterns"] = combined_op
470
+
471
+ patches_json = json.dumps(patches)
472
+
473
+ self._run_java(
474
+ "--build",
475
+ "--template", self._path,
476
+ "--output", str(output),
477
+ "--patches", patches_json,
478
+ sf_path=self._sf_path,
479
+ )
480
+
481
+ # Update state
482
+ self._path = str(output)
483
+ self._patches = {}
484
+ self._extraction_ops = []
485
+ self._exclude_ops = []
486
+ self._include_ops = []
487
+
488
+ # Reload to get fresh data
489
+ result = self._run_java("--inspect", "--config", str(output), sf_path=self._sf_path)
490
+ self._data = result
491
+
492
+ return self
493
+
494
+ def preview_save(self) -> List[Dict[str, Any]]:
495
+ """Preview changes without saving.
496
+
497
+ Returns:
498
+ List of change dictionaries showing what would be modified.
499
+ """
500
+ patches = dict(self._patches)
501
+ if self._extraction_ops:
502
+ patches["extractions"] = self._extraction_ops
503
+ if self._exclude_ops:
504
+ patches["mExcludeManager.mExcludePatterns"] = self._exclude_ops
505
+ if self._include_ops:
506
+ patches["mCrawlConfig.mIncludePatterns"] = self._include_ops
507
+
508
+ patches_json = json.dumps(patches)
509
+
510
+ # Use NUL on Windows, /dev/null on Unix
511
+ import platform
512
+ null_path = "NUL" if platform.system() == "Windows" else "/dev/null"
513
+
514
+ result = self._run_java(
515
+ "--build",
516
+ "--template", self._path,
517
+ "--output", null_path,
518
+ "--patches", patches_json,
519
+ "--dry-run",
520
+ sf_path=self._sf_path,
521
+ )
522
+
523
+ return result.get("changes", [])
524
+
525
+ # ==================== Crawling ====================
526
+
527
+ def run_crawl(
528
+ self,
529
+ url: str,
530
+ output_folder: str,
531
+ export_tabs: Optional[List[str]] = None,
532
+ export_format: str = "csv",
533
+ timeout: Optional[int] = None,
534
+ ) -> None:
535
+ """Run a crawl (blocking).
536
+
537
+ Args:
538
+ url: The URL to start crawling from.
539
+ output_folder: Directory to save crawl results.
540
+ export_tabs: List of tabs to export (e.g., ["Internal:All", "Response Codes:All"]).
541
+ export_format: Export format - "csv" or "xlsx".
542
+ timeout: Maximum time in seconds to wait for crawl completion.
543
+
544
+ Raises:
545
+ SFCrawlError: If the crawl fails or times out.
546
+ SFConfigError: If the config hasn't been saved yet.
547
+ """
548
+ process = self.run_crawl_async(url, output_folder, export_tabs, export_format)
549
+
550
+ try:
551
+ process.wait(timeout=timeout)
552
+ except subprocess.TimeoutExpired:
553
+ process.kill()
554
+ raise SFCrawlError(f"Crawl timed out after {timeout} seconds")
555
+
556
+ if process.returncode != 0:
557
+ raise SFCrawlError(f"Crawl failed with exit code {process.returncode}")
558
+
559
+ def run_crawl_async(
560
+ self,
561
+ url: str,
562
+ output_folder: str,
563
+ export_tabs: Optional[List[str]] = None,
564
+ export_format: str = "csv",
565
+ ) -> subprocess.Popen:
566
+ """Run a crawl (non-blocking).
567
+
568
+ Args:
569
+ url: The URL to start crawling from.
570
+ output_folder: Directory to save crawl results.
571
+ export_tabs: List of tabs to export.
572
+ export_format: Export format - "csv" or "xlsx".
573
+
574
+ Returns:
575
+ subprocess.Popen handle for the crawl process.
576
+
577
+ Raises:
578
+ SFConfigError: If the config hasn't been saved yet.
579
+ """
580
+ if not self._path:
581
+ raise SFConfigError("Save config before running crawl")
582
+
583
+ cli = get_sf_cli_path()
584
+
585
+ cmd = [
586
+ cli,
587
+ "--crawl", url,
588
+ "--config", self._path,
589
+ "--headless",
590
+ "--output-folder", str(output_folder),
591
+ "--export-format", export_format,
592
+ ]
593
+
594
+ if export_tabs:
595
+ cmd.extend(["--export-tabs", ",".join(export_tabs)])
596
+
597
+ return subprocess.Popen(cmd)
598
+
599
+ # ==================== Test Extraction ====================
600
+
601
+ def test_extraction(
602
+ self,
603
+ url: str,
604
+ extraction_name: Optional[str] = None,
605
+ selector: Optional[str] = None,
606
+ selector_type: str = "XPATH",
607
+ extract_mode: str = "TEXT",
608
+ render_js: bool = False,
609
+ ) -> Dict[str, Any]:
610
+ """Test an extraction against a live URL.
611
+
612
+ Args:
613
+ url: URL to fetch and test against.
614
+ extraction_name: Name of an existing extraction rule to test.
615
+ selector: Inline selector to test (alternative to extraction_name).
616
+ selector_type: Type of selector - "XPATH", "CSS", or "REGEX".
617
+ extract_mode: What to extract - "TEXT", "HTML_ELEMENT", etc.
618
+ render_js: Whether to render JavaScript before extraction.
619
+
620
+ Returns:
621
+ Dictionary containing:
622
+ - success: Whether the test succeeded
623
+ - matches: List of matched values
624
+ - match_count: Number of matches
625
+ - warnings: Any warnings
626
+
627
+ Raises:
628
+ SFValidationError: If neither extraction_name nor selector is provided.
629
+ """
630
+ if extraction_name:
631
+ # Find extraction in config
632
+ for ext in self.extractions:
633
+ if ext.get("name") == extraction_name:
634
+ selector = ext.get("selector")
635
+ selector_type = ext.get("selectorType", "XPATH")
636
+ extract_mode = ext.get("extractMode", "TEXT")
637
+ break
638
+ else:
639
+ raise SFValidationError(f"Extraction '{extraction_name}' not found")
640
+
641
+ if not selector:
642
+ raise SFValidationError("Provide extraction_name or selector")
643
+
644
+ args = [
645
+ "--test-extraction",
646
+ "--url", url,
647
+ "--selector", selector,
648
+ "--selector-type", selector_type.upper(),
649
+ "--extract-mode", extract_mode.upper(),
650
+ ]
651
+
652
+ if render_js:
653
+ args.append("--render-js")
654
+
655
+ result = self._run_java(*args, sf_path=self._sf_path)
656
+ return result
657
+
658
+ # ==================== Diff ====================
659
+
660
+ @classmethod
661
+ def diff(
662
+ cls,
663
+ config_a: Union[str, "SFConfig"],
664
+ config_b: Union[str, "SFConfig"],
665
+ prefix: Optional[str] = None,
666
+ sf_path: Optional[str] = None,
667
+ ) -> SFDiff:
668
+ """Compare two configs.
669
+
670
+ Args:
671
+ config_a: First config (path or SFConfig instance).
672
+ config_b: Second config (path or SFConfig instance).
673
+ prefix: Optional path prefix to filter differences.
674
+ sf_path: Optional custom path to SF installation directory.
675
+
676
+ Returns:
677
+ SFDiff object representing the differences.
678
+
679
+ Example:
680
+ >>> diff = SFConfig.diff("old.seospiderconfig", "new.seospiderconfig")
681
+ >>> if diff.has_changes:
682
+ ... print(diff)
683
+ """
684
+ path_a = config_a._path if isinstance(config_a, SFConfig) else str(config_a)
685
+ path_b = config_b._path if isinstance(config_b, SFConfig) else str(config_b)
686
+
687
+ # Get sf_path from config if not provided
688
+ if sf_path is None and isinstance(config_a, SFConfig):
689
+ sf_path = config_a._sf_path
690
+
691
+ args = ["--diff", "--config-a", path_a, "--config-b", path_b]
692
+ if prefix:
693
+ args.extend(["--prefix", prefix])
694
+
695
+ result = cls._run_java(*args, sf_path=sf_path)
696
+ return SFDiff(result)
697
+
698
+ # ==================== Internal ====================
699
+
700
+ @classmethod
701
+ def _run_java(cls, *args: str, sf_path: Optional[str] = None) -> Dict[str, Any]:
702
+ """Execute Java CLI and return parsed JSON result.
703
+
704
+ Args:
705
+ *args: Command line arguments to pass to the Java CLI.
706
+ sf_path: Optional custom path to SF installation directory.
707
+
708
+ Returns:
709
+ Parsed JSON response from the CLI.
710
+
711
+ Raises:
712
+ SFParseError: If the CLI output is not valid JSON.
713
+ SFValidationError: If the CLI returns a validation error.
714
+ SFConfigError: If the CLI returns any other error.
715
+ """
716
+ java = get_java_path(sf_path)
717
+ sf_jar_path = get_sf_jar_path(sf_path)
718
+ cp_sep = get_classpath_separator()
719
+
720
+ # Build classpath
721
+ classpath = f"{cls.JAR_PATH}{cp_sep}{sf_jar_path}/*"
722
+
723
+ cmd = [java, "-cp", classpath, "ConfigBuilder", *args]
724
+
725
+ result = subprocess.run(
726
+ cmd,
727
+ capture_output=True,
728
+ text=True,
729
+ )
730
+
731
+ # Handle empty output
732
+ if not result.stdout.strip():
733
+ if result.stderr:
734
+ raise SFConfigError(f"Java CLI error: {result.stderr}")
735
+ raise SFParseError("No output from Java CLI")
736
+
737
+ try:
738
+ data = json.loads(result.stdout)
739
+ except json.JSONDecodeError as e:
740
+ raise SFParseError(
741
+ f"Invalid JSON from CLI: {result.stdout[:200]}...\n"
742
+ f"Parse error: {e}"
743
+ )
744
+
745
+ if not data.get("success", True):
746
+ error_type = data.get("errorType", "UNKNOWN")
747
+ error_msg = data.get("error", "Unknown error")
748
+ details = data.get("details", {})
749
+
750
+ if error_type == "VALIDATION_ERROR":
751
+ raise SFValidationError(f"{error_msg}: {details}" if details else error_msg)
752
+ elif error_type == "PARSE_ERROR":
753
+ raise SFParseError(error_msg)
754
+ elif error_type == "IO_ERROR":
755
+ raise SFConfigError(f"I/O error: {error_msg}")
756
+ else:
757
+ raise SFConfigError(error_msg)
758
+
759
+ return data
760
+
761
+ def __repr__(self) -> str:
762
+ """Return developer-friendly representation."""
763
+ return f"<SFConfig path={self._path!r} version={self.config_version}>"
764
+
765
+ def __str__(self) -> str:
766
+ """Return human-readable string representation."""
767
+ return f"SFConfig({self._path or 'unsaved'})"