sf-config-builder 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sf_config_builder-0.1.1.dist-info/METADATA +316 -0
- sf_config_builder-0.1.1.dist-info/RECORD +10 -0
- sf_config_builder-0.1.1.dist-info/WHEEL +5 -0
- sf_config_builder-0.1.1.dist-info/licenses/LICENSE +21 -0
- sf_config_builder-0.1.1.dist-info/top_level.txt +1 -0
- sfconfig/__init__.py +34 -0
- sfconfig/config.py +767 -0
- sfconfig/diff.py +145 -0
- sfconfig/exceptions.py +26 -0
- sfconfig/paths.py +217 -0
sfconfig/diff.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""SFDiff class for comparing Screaming Frog configurations."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class SFDiff:
|
|
7
|
+
"""Represents a diff between two SF configs.
|
|
8
|
+
|
|
9
|
+
Example:
|
|
10
|
+
>>> diff = SFConfig.diff("old.seospiderconfig", "new.seospiderconfig")
|
|
11
|
+
>>> if diff.has_changes:
|
|
12
|
+
... print(f"Found {diff.change_count} differences:")
|
|
13
|
+
... print(diff)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, data: Dict[str, Any]):
|
|
17
|
+
"""Initialize SFDiff with diff data from Java CLI.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
data: Parsed JSON response from the Java CLI --diff command.
|
|
21
|
+
"""
|
|
22
|
+
self._data = data
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def has_changes(self) -> bool:
|
|
26
|
+
"""Check if there are any differences between the configs."""
|
|
27
|
+
return len(self.changes) > 0
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def change_count(self) -> int:
|
|
31
|
+
"""Get the total number of differences."""
|
|
32
|
+
return self._data.get("totalDifferences", len(self.changes))
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def changes(self) -> List[Dict[str, Any]]:
|
|
36
|
+
"""Get list of all changes.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of change dictionaries, each containing:
|
|
40
|
+
- path: The field path that changed
|
|
41
|
+
- valueA / old: The value in the first config
|
|
42
|
+
- valueB / new: The value in the second config
|
|
43
|
+
"""
|
|
44
|
+
return self._data.get("differences", [])
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def config_version_a(self) -> str:
|
|
48
|
+
"""Get the version of the first config."""
|
|
49
|
+
return self._data.get("configVersionA", "unknown")
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def config_version_b(self) -> str:
|
|
53
|
+
"""Get the version of the second config."""
|
|
54
|
+
return self._data.get("configVersionB", "unknown")
|
|
55
|
+
|
|
56
|
+
def changes_for(self, prefix: str) -> List[Dict[str, Any]]:
|
|
57
|
+
"""Get changes filtered by path prefix.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
prefix: The path prefix to filter by (e.g., "mCrawlConfig").
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List of changes where the path starts with the given prefix.
|
|
64
|
+
"""
|
|
65
|
+
return [c for c in self.changes if c.get("path", "").startswith(prefix)]
|
|
66
|
+
|
|
67
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
68
|
+
"""Return the full diff as a dictionary.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Dictionary containing:
|
|
72
|
+
- config_version_a: Version of first config
|
|
73
|
+
- config_version_b: Version of second config
|
|
74
|
+
- total_differences: Number of differences
|
|
75
|
+
- differences: List of change objects
|
|
76
|
+
"""
|
|
77
|
+
return {
|
|
78
|
+
"config_version_a": self.config_version_a,
|
|
79
|
+
"config_version_b": self.config_version_b,
|
|
80
|
+
"total_differences": self.change_count,
|
|
81
|
+
"differences": self.changes,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def __str__(self) -> str:
|
|
85
|
+
"""Return human-readable string representation of the diff."""
|
|
86
|
+
if not self.has_changes:
|
|
87
|
+
return "No changes"
|
|
88
|
+
|
|
89
|
+
lines = []
|
|
90
|
+
for change in self.changes:
|
|
91
|
+
path = change.get("path", "unknown")
|
|
92
|
+
# Handle both naming conventions from Java CLI
|
|
93
|
+
old = change.get("valueA", change.get("old", "?"))
|
|
94
|
+
new = change.get("valueB", change.get("new", "?"))
|
|
95
|
+
|
|
96
|
+
# Format values for display
|
|
97
|
+
old_str = self._format_value(old)
|
|
98
|
+
new_str = self._format_value(new)
|
|
99
|
+
|
|
100
|
+
lines.append(f"{path}: {old_str} -> {new_str}")
|
|
101
|
+
|
|
102
|
+
return "\n".join(lines)
|
|
103
|
+
|
|
104
|
+
def __repr__(self) -> str:
|
|
105
|
+
"""Return developer-friendly representation."""
|
|
106
|
+
return f"<SFDiff changes={self.change_count}>"
|
|
107
|
+
|
|
108
|
+
def __len__(self) -> int:
|
|
109
|
+
"""Return the number of changes."""
|
|
110
|
+
return self.change_count
|
|
111
|
+
|
|
112
|
+
def __bool__(self) -> bool:
|
|
113
|
+
"""Return True if there are changes."""
|
|
114
|
+
return self.has_changes
|
|
115
|
+
|
|
116
|
+
def __iter__(self):
|
|
117
|
+
"""Iterate over changes."""
|
|
118
|
+
return iter(self.changes)
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def _format_value(value: Any) -> str:
|
|
122
|
+
"""Format a value for display.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
value: The value to format.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
String representation of the value.
|
|
129
|
+
"""
|
|
130
|
+
if value is None:
|
|
131
|
+
return "null"
|
|
132
|
+
if isinstance(value, str):
|
|
133
|
+
# Truncate long strings
|
|
134
|
+
if len(value) > 50:
|
|
135
|
+
return f'"{value[:47]}..."'
|
|
136
|
+
return f'"{value}"'
|
|
137
|
+
if isinstance(value, list):
|
|
138
|
+
if len(value) == 0:
|
|
139
|
+
return "[]"
|
|
140
|
+
if len(value) <= 3:
|
|
141
|
+
return str(value)
|
|
142
|
+
return f"[{len(value)} items]"
|
|
143
|
+
if isinstance(value, dict):
|
|
144
|
+
return f"{{{len(value)} keys}}"
|
|
145
|
+
return str(value)
|
sfconfig/exceptions.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Custom exceptions for sf-config-tool."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SFConfigError(Exception):
|
|
5
|
+
"""Base exception for sf-config-tool."""
|
|
6
|
+
pass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SFNotFoundError(SFConfigError):
|
|
10
|
+
"""Screaming Frog is not installed or not found."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SFValidationError(SFConfigError):
|
|
15
|
+
"""Invalid field path or value."""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SFParseError(SFConfigError):
|
|
20
|
+
"""Could not parse config file."""
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class SFCrawlError(SFConfigError):
|
|
25
|
+
"""Crawl execution failed."""
|
|
26
|
+
pass
|
sfconfig/paths.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Screaming Frog installation path detection."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import platform
|
|
5
|
+
import shutil
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from .exceptions import SFNotFoundError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Default installation paths by platform (list for multiple possible locations)
|
|
13
|
+
SF_PATHS = {
|
|
14
|
+
"Darwin": ["/Applications/Screaming Frog SEO Spider.app/Contents/Resources/Java"],
|
|
15
|
+
"Windows": [
|
|
16
|
+
"C:/Program Files/Screaming Frog SEO Spider",
|
|
17
|
+
"C:/Program Files (x86)/Screaming Frog SEO Spider",
|
|
18
|
+
],
|
|
19
|
+
"Linux": ["/usr/share/screamingfrogseospider"],
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
SF_CLI_NAMES = {
|
|
23
|
+
"Darwin": "ScreamingFrogSEOSpider",
|
|
24
|
+
"Windows": "ScreamingFrogSEOSpiderCli.exe",
|
|
25
|
+
"Linux": "screamingfrogseospider",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_platform() -> str:
|
|
30
|
+
"""Get the current platform name."""
|
|
31
|
+
return platform.system()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_sf_jar_path(sf_path: Optional[str] = None) -> str:
|
|
35
|
+
"""Get path to SF's JAR files directory.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
sf_path: Optional custom path to SF installation.
|
|
39
|
+
If not provided, checks SF_PATH env var then common locations.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Path to the directory containing SF's JAR files.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
SFNotFoundError: If Screaming Frog installation is not found.
|
|
46
|
+
"""
|
|
47
|
+
# Try explicit argument first
|
|
48
|
+
if sf_path and os.path.exists(sf_path):
|
|
49
|
+
return sf_path
|
|
50
|
+
|
|
51
|
+
# Try custom path from env var
|
|
52
|
+
env_path = os.environ.get("SF_PATH")
|
|
53
|
+
if env_path and os.path.exists(env_path):
|
|
54
|
+
return env_path
|
|
55
|
+
|
|
56
|
+
# Try default paths for current platform
|
|
57
|
+
plat = get_platform()
|
|
58
|
+
paths = SF_PATHS.get(plat, [])
|
|
59
|
+
|
|
60
|
+
for path in paths:
|
|
61
|
+
if os.path.exists(path):
|
|
62
|
+
return path
|
|
63
|
+
|
|
64
|
+
raise SFNotFoundError(
|
|
65
|
+
"Screaming Frog not found.\n"
|
|
66
|
+
f"Checked: {paths}\n"
|
|
67
|
+
"Install from: https://www.screamingfrog.co.uk/seo-spider/\n"
|
|
68
|
+
"Or pass sf_path argument, or set SF_PATH environment variable."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_sf_cli_path(sf_path: Optional[str] = None) -> str:
|
|
73
|
+
"""Get path to SF CLI executable.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
sf_path: Optional custom path to SF installation directory.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Path to the Screaming Frog CLI executable.
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
SFNotFoundError: If CLI executable is not found.
|
|
83
|
+
"""
|
|
84
|
+
# Try custom path from env var
|
|
85
|
+
custom = os.environ.get("SF_CLI_PATH")
|
|
86
|
+
if custom and os.path.exists(custom):
|
|
87
|
+
return custom
|
|
88
|
+
|
|
89
|
+
plat = get_platform()
|
|
90
|
+
cli_name = SF_CLI_NAMES.get(plat, "screamingfrogseospider")
|
|
91
|
+
|
|
92
|
+
# If sf_path provided, derive CLI path from it
|
|
93
|
+
if sf_path:
|
|
94
|
+
if plat == "Darwin":
|
|
95
|
+
# macOS: /Applications/Screaming Frog SEO Spider.app/Contents/MacOS/ScreamingFrogSEOSpider
|
|
96
|
+
cli_path = sf_path.replace("/Contents/Resources/Java", f"/Contents/MacOS/{cli_name}")
|
|
97
|
+
else:
|
|
98
|
+
cli_path = os.path.join(sf_path, cli_name)
|
|
99
|
+
if os.path.exists(cli_path):
|
|
100
|
+
return cli_path
|
|
101
|
+
|
|
102
|
+
# Try default paths
|
|
103
|
+
paths = SF_PATHS.get(plat, [])
|
|
104
|
+
for base_path in paths:
|
|
105
|
+
if plat == "Darwin":
|
|
106
|
+
cli_path = base_path.replace("/Contents/Resources/Java", f"/Contents/MacOS/{cli_name}")
|
|
107
|
+
else:
|
|
108
|
+
cli_path = os.path.join(base_path, cli_name)
|
|
109
|
+
if os.path.exists(cli_path):
|
|
110
|
+
return cli_path
|
|
111
|
+
|
|
112
|
+
# On Linux, check if it's in PATH
|
|
113
|
+
if plat == "Linux":
|
|
114
|
+
which_result = shutil.which("screamingfrogseospider")
|
|
115
|
+
if which_result:
|
|
116
|
+
return which_result
|
|
117
|
+
|
|
118
|
+
raise SFNotFoundError(
|
|
119
|
+
"Screaming Frog CLI not found.\n"
|
|
120
|
+
"Or set SF_CLI_PATH environment variable."
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_java_path(sf_path: Optional[str] = None) -> str:
|
|
125
|
+
"""Get path to Java executable.
|
|
126
|
+
|
|
127
|
+
Prefers SF's bundled JRE, falls back to system Java.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
sf_path: Optional custom path to SF installation directory.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Path to Java executable.
|
|
134
|
+
|
|
135
|
+
Raises:
|
|
136
|
+
SFNotFoundError: If no Java installation is found.
|
|
137
|
+
"""
|
|
138
|
+
# Try custom path from env var
|
|
139
|
+
custom = os.environ.get("JAVA_HOME")
|
|
140
|
+
if custom:
|
|
141
|
+
java_path = os.path.join(custom, "bin", "java")
|
|
142
|
+
if get_platform() == "Windows":
|
|
143
|
+
java_path += ".exe"
|
|
144
|
+
if os.path.exists(java_path):
|
|
145
|
+
return java_path
|
|
146
|
+
|
|
147
|
+
plat = get_platform()
|
|
148
|
+
java_name = "java.exe" if plat == "Windows" else "java"
|
|
149
|
+
|
|
150
|
+
# If sf_path provided, try its bundled JRE
|
|
151
|
+
if sf_path:
|
|
152
|
+
if plat == "Darwin":
|
|
153
|
+
jre_path = sf_path.replace(
|
|
154
|
+
"/Contents/Resources/Java",
|
|
155
|
+
f"/Contents/PlugIns/jre.bundle/Contents/Home/bin/{java_name}"
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
jre_path = os.path.join(sf_path, "jre", "bin", java_name)
|
|
159
|
+
if os.path.exists(jre_path):
|
|
160
|
+
return jre_path
|
|
161
|
+
|
|
162
|
+
# Try default SF paths for bundled JRE
|
|
163
|
+
paths = SF_PATHS.get(plat, [])
|
|
164
|
+
for base_path in paths:
|
|
165
|
+
if plat == "Darwin":
|
|
166
|
+
jre_path = base_path.replace(
|
|
167
|
+
"/Contents/Resources/Java",
|
|
168
|
+
f"/Contents/PlugIns/jre.bundle/Contents/Home/bin/{java_name}"
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
jre_path = os.path.join(base_path, "jre", "bin", java_name)
|
|
172
|
+
if os.path.exists(jre_path):
|
|
173
|
+
return jre_path
|
|
174
|
+
|
|
175
|
+
# Fall back to system Java
|
|
176
|
+
which_result = shutil.which(java_name)
|
|
177
|
+
if which_result:
|
|
178
|
+
return which_result
|
|
179
|
+
|
|
180
|
+
raise SFNotFoundError(
|
|
181
|
+
"Java not found.\n"
|
|
182
|
+
"Screaming Frog installation may be corrupted or Java is not installed.\n"
|
|
183
|
+
"Set JAVA_HOME environment variable if Java is installed elsewhere."
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def get_classpath_separator() -> str:
|
|
188
|
+
"""Get the classpath separator for the current platform."""
|
|
189
|
+
return ";" if get_platform() == "Windows" else ":"
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def get_default_config_path() -> Optional[Path]:
|
|
193
|
+
"""Get path to SF's default config file location.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
Path to default config, or None if not found.
|
|
197
|
+
"""
|
|
198
|
+
plat = get_platform()
|
|
199
|
+
|
|
200
|
+
if plat == "Windows":
|
|
201
|
+
appdata = os.environ.get("APPDATA")
|
|
202
|
+
if appdata:
|
|
203
|
+
path = Path(appdata) / "Screaming Frog SEO Spider" / "spider.config"
|
|
204
|
+
if path.exists():
|
|
205
|
+
return path
|
|
206
|
+
elif plat == "Darwin":
|
|
207
|
+
home = Path.home()
|
|
208
|
+
path = home / "Library" / "Application Support" / "Screaming Frog SEO Spider" / "spider.config"
|
|
209
|
+
if path.exists():
|
|
210
|
+
return path
|
|
211
|
+
elif plat == "Linux":
|
|
212
|
+
home = Path.home()
|
|
213
|
+
path = home / ".ScreamingFrogSEOSpider" / "spider.config"
|
|
214
|
+
if path.exists():
|
|
215
|
+
return path
|
|
216
|
+
|
|
217
|
+
return None
|