sf-config-builder 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ Metadata-Version: 2.4
2
+ Name: sf-config-builder
3
+ Version: 0.1.1
4
+ Summary: Manage Screaming Frog configs programmatically
5
+ Author: Antonio
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/Amaculus/sf-config-builder
8
+ Project-URL: Documentation, https://github.com/Amaculus/sf-config-builder#readme
9
+ Project-URL: Repository, https://github.com/Amaculus/sf-config-builder
10
+ Project-URL: Issues, https://github.com/Amaculus/sf-config-builder/issues
11
+ Keywords: seo,screaming-frog,crawling,automation,config
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
23
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
24
+ Requires-Python: >=3.8
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0; extra == "dev"
29
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # sf-config-tool
33
+
34
+ Manage Screaming Frog `.seospiderconfig` files programmatically.
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install sf-config-tool
40
+ ```
41
+
42
+ ### Requirements
43
+
44
+ - **Screaming Frog SEO Spider** must be installed (provides JARs for deserialization)
45
+ - Python 3.8+
46
+
47
+ ## Quick Start
48
+
49
+ ```python
50
+ from sfconfig import SFConfig
51
+
52
+ # Load existing config
53
+ config = SFConfig.load("base.seospiderconfig")
54
+
55
+ # Configure for e-commerce audit
56
+ config.max_urls = 100000
57
+ config.rendering_mode = "JAVASCRIPT"
58
+
59
+ # Add custom extractions
60
+ config.add_extraction("Price", "//span[@class='price']")
61
+ config.add_extraction("SKU", "//span[@itemprop='sku']")
62
+ config.add_extraction("Stock", ".availability", selector_type="CSS")
63
+
64
+ # Add exclude patterns
65
+ config.add_exclude(r".*\.pdf$")
66
+ config.add_exclude(r".*/admin/.*")
67
+
68
+ # Save and run
69
+ config.save("client-audit.seospiderconfig")
70
+ config.run_crawl("https://example.com", output_folder="./results")
71
+ ```
72
+
73
+ ## Features
74
+
75
+ ### Inspect Configs
76
+
77
+ ```python
78
+ config = SFConfig.load("my.seospiderconfig")
79
+
80
+ # Get specific field
81
+ max_urls = config.get("mCrawlConfig.mMaxUrls")
82
+
83
+ # List all fields
84
+ for field in config.fields():
85
+ print(f"{field['path']}: {field['value']}")
86
+
87
+ # Filter by prefix
88
+ crawl_fields = config.fields(prefix="mCrawlConfig")
89
+ ```
90
+
91
+ ### Modify Configs
92
+
93
+ ```python
94
+ # Direct field access
95
+ config.set("mCrawlConfig.mMaxUrls", 100000)
96
+
97
+ # Convenience properties
98
+ config.max_urls = 100000
99
+ config.max_depth = 10
100
+ config.rendering_mode = "JAVASCRIPT" # STATIC | JAVASCRIPT
101
+ config.robots_mode = "IGNORE" # RESPECT | IGNORE
102
+ config.crawl_delay = 0.5
103
+ config.user_agent = "MyBot/1.0"
104
+ ```
105
+
106
+ ### Custom Extractions
107
+
108
+ ```python
109
+ # Add extraction rules
110
+ config.add_extraction(
111
+ name="Price",
112
+ selector="//span[@class='price']",
113
+ selector_type="XPATH", # XPATH | CSS | REGEX
114
+ extract_mode="TEXT" # TEXT | HTML_ELEMENT | INNER_HTML
115
+ )
116
+
117
+ # List extractions
118
+ for ext in config.extractions:
119
+ print(f"{ext['name']}: {ext['selector']}")
120
+
121
+ # Remove by name
122
+ config.remove_extraction("Price")
123
+
124
+ # Clear all
125
+ config.clear_extractions()
126
+ ```
127
+
128
+ ### Exclude/Include Patterns
129
+
130
+ ```python
131
+ # Excludes (URLs matching these patterns are skipped)
132
+ config.add_exclude(r".*\.pdf$")
133
+ config.add_exclude(r".*/admin/.*")
134
+
135
+ # Includes (only URLs matching these are crawled)
136
+ config.add_include(r".*/products/.*")
137
+
138
+ # List patterns
139
+ print(config.excludes)
140
+ print(config.includes)
141
+ ```
142
+
143
+ ### Compare Configs
144
+
145
+ ```python
146
+ from sfconfig import SFConfig
147
+
148
+ diff = SFConfig.diff("old.seospiderconfig", "new.seospiderconfig")
149
+
150
+ if diff.has_changes:
151
+ print(f"Found {diff.change_count} differences:")
152
+ print(diff)
153
+
154
+ # Filter by prefix
155
+ crawl_changes = diff.changes_for("mCrawlConfig")
156
+ ```
157
+
158
+ ### Test Extractions
159
+
160
+ ```python
161
+ # Test selector against live URL before full crawl
162
+ result = config.test_extraction(
163
+ url="https://example.com/product",
164
+ selector="//span[@class='price']",
165
+ selector_type="XPATH"
166
+ )
167
+
168
+ if result["match_count"] > 0:
169
+ print(f"Found: {result['matches']}")
170
+ else:
171
+ print("Selector didn't match - fix before crawling")
172
+ ```
173
+
174
+ ### Run Crawls
175
+
176
+ ```python
177
+ # Blocking crawl
178
+ config.run_crawl(
179
+ url="https://example.com",
180
+ output_folder="./results",
181
+ export_tabs=["Internal:All", "Response Codes:All"],
182
+ export_format="csv",
183
+ timeout=3600
184
+ )
185
+
186
+ # Async crawl
187
+ process = config.run_crawl_async(
188
+ url="https://example.com",
189
+ output_folder="./results"
190
+ )
191
+ # Do other work...
192
+ process.wait() # Block until complete
193
+ ```
194
+
195
+ ## Multi-Client Workflow
196
+
197
+ ```python
198
+ from sfconfig import SFConfig
199
+
200
+ clients = [
201
+ {"domain": "client1.com", "max_urls": 50000},
202
+ {"domain": "client2.com", "max_urls": 100000},
203
+ ]
204
+
205
+ for client in clients:
206
+ config = SFConfig.load("agency-base.seospiderconfig")
207
+ config.max_urls = client["max_urls"]
208
+ config.add_extraction("Price", "//span[@class='price']")
209
+
210
+ config.save(f"/tmp/{client['domain']}.seospiderconfig")
211
+ config.run_crawl(
212
+ url=f"https://{client['domain']}",
213
+ output_folder=f"./results/{client['domain']}"
214
+ )
215
+ ```
216
+
217
+ ## Error Handling
218
+
219
+ ```python
220
+ from sfconfig import (
221
+ SFConfig,
222
+ SFNotFoundError,
223
+ SFValidationError,
224
+ SFParseError,
225
+ SFCrawlError
226
+ )
227
+
228
+ try:
229
+ config = SFConfig.load("my.seospiderconfig")
230
+ config.set("mInvalidField", 123)
231
+ config.save()
232
+ except SFNotFoundError:
233
+ print("Install Screaming Frog first")
234
+ except SFValidationError as e:
235
+ print(f"Invalid field: {e}")
236
+ except SFParseError as e:
237
+ print(f"Could not parse config: {e}")
238
+ except SFCrawlError as e:
239
+ print(f"Crawl failed: {e}")
240
+ ```
241
+
242
+ ## Environment Variables
243
+
244
+ | Variable | Description |
245
+ |----------|-------------|
246
+ | `SF_PATH` | Custom path to SF's JAR directory |
247
+ | `SF_CLI_PATH` | Custom path to SF CLI executable |
248
+ | `JAVA_HOME` | Custom Java installation path |
249
+
250
+ ## Architecture
251
+
252
+ ```
253
+ User Python code
254
+ |
255
+ v
256
+ +------------------+
257
+ | sfconfig | (Python wrapper)
258
+ | - SFConfig |
259
+ | - SFDiff |
260
+ +--------+---------+
261
+ | subprocess.run()
262
+ v
263
+ +------------------+
264
+ | ConfigBuilder | (Java CLI, bundled ~50KB)
265
+ | .jar |
266
+ +--------+---------+
267
+ | classpath includes
268
+ v
269
+ +------------------+
270
+ | SF's JARs | (from user's local SF install, NOT bundled)
271
+ +------------------+
272
+ ```
273
+
274
+ At runtime, the library builds a classpath combining:
275
+ - `ConfigBuilder.jar` (bundled with this package)
276
+ - `{SF_INSTALL_PATH}/*` (user's local Screaming Frog JARs)
277
+
278
+ This means:
279
+ - Only our small JAR is distributed (no licensing issues)
280
+ - SF's proprietary JARs are used from the user's existing installation
281
+ - Compatibility is maintained across SF versions
282
+
283
+ ## Development
284
+
285
+ ### Building the Java CLI
286
+
287
+ The Java CLI lives in a separate repo (`sf-config-builder`). To build:
288
+
289
+ ```bash
290
+ cd /path/to/sf-config-builder
291
+
292
+ # Compile against SF's JARs (as compile-time dependency)
293
+ javac -cp "C:/Program Files/Screaming Frog SEO Spider/*" \
294
+ -d bin src/ConfigBuilder.java
295
+
296
+ # Package into JAR
297
+ cd bin
298
+ jar cfe ConfigBuilder.jar ConfigBuilder *.class
299
+
300
+ # Copy to Python package
301
+ cp ConfigBuilder.jar /path/to/sf-config-tool/sfconfig/java/
302
+ ```
303
+
304
+ **Important**: Only bundle `ConfigBuilder.jar`. Do NOT bundle any JARs from SF's install directory - those are proprietary and already on the user's machine.
305
+
306
+ ### Installing for Development
307
+
308
+ ```bash
309
+ cd sf-config-tool
310
+ pip install -e ".[dev]"
311
+ pytest tests/
312
+ ```
313
+
314
+ ## License
315
+
316
+ MIT
@@ -0,0 +1,10 @@
1
+ sf_config_builder-0.1.1.dist-info/licenses/LICENSE,sha256=gY8ByiA5FLdBA_UPHEij4N2I3ACFJK_axSAGB8lvhxA,1064
2
+ sfconfig/__init__.py,sha256=XsFEy2jxz61Ub5pUVfjoouIlJd5sfY8QrtofAbJrSTQ,902
3
+ sfconfig/config.py,sha256=zTDPBN8yRxb4aboWIv-lTKZPajquMff9J_DOymZH_fQ,25169
4
+ sfconfig/diff.py,sha256=p3gh-wcalAeR8UwZmpbGgkZVRMkwBKpdMMhIQIQUqsg,4597
5
+ sfconfig/exceptions.py,sha256=AsMRv9WZgpoxo3BZCVmk6WyZ4dULL7ojL79BHjfp0zY,492
6
+ sfconfig/paths.py,sha256=kOdnoA3IYIAUTnurcGlDoyfhns4a8J73PE5pYDnLBmE,6502
7
+ sf_config_builder-0.1.1.dist-info/METADATA,sha256=fkYwSe_ZJPbf4-lEEfQQJKNkA7eDoi_2U5iggAivLP0,7965
8
+ sf_config_builder-0.1.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
9
+ sf_config_builder-0.1.1.dist-info/top_level.txt,sha256=-vINvIw8-ocnRNzJE8L7cn0qTGjRooBvTPHF4I1An2M,9
10
+ sf_config_builder-0.1.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Antonio
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ sfconfig
sfconfig/__init__.py ADDED
@@ -0,0 +1,34 @@
1
+ """sf-config-tool: Manage Screaming Frog configs programmatically.
2
+
3
+ This library provides a Python interface for managing Screaming Frog
4
+ .seospiderconfig files, enabling inspection, diffing, modification,
5
+ and crawl execution without using the SF GUI.
6
+
7
+ Example:
8
+ >>> from sfconfig import SFConfig
9
+ >>> config = SFConfig.load("base.seospiderconfig")
10
+ >>> config.add_extraction("Price", "//span[@class='price']")
11
+ >>> config.save("client.seospiderconfig")
12
+ >>> config.run_crawl("https://example.com", output_folder="./results")
13
+ """
14
+
15
+ from .config import SFConfig
16
+ from .diff import SFDiff
17
+ from .exceptions import (
18
+ SFConfigError,
19
+ SFCrawlError,
20
+ SFNotFoundError,
21
+ SFParseError,
22
+ SFValidationError,
23
+ )
24
+
25
+ __version__ = "0.1.1"
26
+ __all__ = [
27
+ "SFConfig",
28
+ "SFDiff",
29
+ "SFConfigError",
30
+ "SFNotFoundError",
31
+ "SFValidationError",
32
+ "SFParseError",
33
+ "SFCrawlError",
34
+ ]