blueprint-html2slim 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +21 -0
- data/lib/blueprint/html2slim/slim_extractor.rb +160 -17
- data/lib/blueprint/html2slim/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8659e98075970808c3f6b98f3bf5630df930b0164f4010cc57d2393a6646e403
|
4
|
+
data.tar.gz: cbae380fbe76e804e24d79ba393480864f9cdbd2858d4395d55f1527dacba5aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85ee1ae5d5bc5ed1a67478c9255681fe3925141c575c29ae439abbfb77409b74002e881a12cbec444f2133659feaca90487012d44e8e73acbe1ac47ed7886458
|
7
|
+
data.tar.gz: 173c15886f0865772500f67cb04d06cfa425921fb0dc2fca77a0975003c6f8835ab013a3b78593dcf86824bba9899e22c5ddbb95b7b47168e2030bd248039379
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,27 @@
|
|
2
2
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
4
4
|
|
5
|
+
## [1.3.1] - 2025-01-16
|
6
|
+
|
7
|
+
### Fixed
|
8
|
+
- **SlimExtractor CSS Selector Support**:
|
9
|
+
- Fixed child combinator selectors (`body > section`) not working
|
10
|
+
- Now correctly extracts multiple matching elements instead of just the first
|
11
|
+
- Added support for parent-child relationship validation in selectors
|
12
|
+
- **Improved Content Extraction**:
|
13
|
+
- Enhanced default removal list to include `html`, `body`, `script` elements
|
14
|
+
- Added automatic cleanup of orphaned comments when sections are removed
|
15
|
+
- Fixed extraction logic to handle multiple sections properly
|
16
|
+
- **Enhanced CSS Selector Parser**:
|
17
|
+
- Added `parse_simple_selector()` for individual selector components
|
18
|
+
- Implemented `matches_child_selector()` for parent-child verification
|
19
|
+
- Added `find_parent_item()` for structure hierarchy navigation
|
20
|
+
|
21
|
+
### Changed
|
22
|
+
- Default remove list now includes: `doctype html head nav header footer script body`
|
23
|
+
- CSS selector extraction now handles multiple matching elements in single operation
|
24
|
+
- Orphaned comments are automatically cleaned up during extraction
|
25
|
+
|
5
26
|
## [1.3.0] - 2025-01-16
|
6
27
|
|
7
28
|
### Added
|
@@ -22,7 +22,7 @@ module Blueprint
|
|
22
22
|
|
23
23
|
# Default removals if not keeping specific sections
|
24
24
|
if sections_to_keep.empty? && sections_to_remove.empty?
|
25
|
-
sections_to_remove = %w[doctype head nav header footer script]
|
25
|
+
sections_to_remove = %w[doctype html head nav header footer script body]
|
26
26
|
end
|
27
27
|
|
28
28
|
# Extract content
|
@@ -32,6 +32,9 @@ module Blueprint
|
|
32
32
|
# Remove wrapper if requested (not for outline mode)
|
33
33
|
extracted = remove_outer_wrapper(extracted) if options[:remove_wrapper] && !options[:outline]
|
34
34
|
|
35
|
+
# Clean up orphaned comments
|
36
|
+
extracted = clean_orphaned_comments(extracted)
|
37
|
+
|
35
38
|
# Rebuild the Slim content
|
36
39
|
new_content = rebuild_extracted_content(extracted)
|
37
40
|
|
@@ -203,26 +206,48 @@ module Blueprint
|
|
203
206
|
|
204
207
|
def extract_by_selector(structure, selector)
|
205
208
|
result = []
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
+
@current_structure = structure # Store for parent lookup
|
210
|
+
|
209
211
|
# Parse the CSS selector
|
210
212
|
selector_parts = parse_css_selector(selector)
|
211
213
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
214
|
+
# For child selectors like "body > section", find all matching sections
|
215
|
+
if selector_parts[:combinator] == :child
|
216
|
+
structure.each do |item|
|
217
|
+
if matches_selector?(item, selector_parts)
|
218
|
+
# Add this item and all its children
|
219
|
+
result << item
|
220
|
+
# Add children until we hit the same or lower indent level
|
221
|
+
item_index = structure.index(item)
|
222
|
+
next unless item_index
|
223
|
+
|
224
|
+
(item_index + 1...structure.size).each do |i|
|
225
|
+
child_item = structure[i]
|
226
|
+
break if child_item[:indent_level] <= item[:indent_level]
|
227
|
+
result << child_item
|
228
|
+
end
|
229
|
+
end
|
217
230
|
end
|
231
|
+
else
|
232
|
+
# Original single-section logic for simple selectors
|
233
|
+
in_selected_section = false
|
234
|
+
selected_indent = nil
|
235
|
+
|
236
|
+
structure.each do |item|
|
237
|
+
# Check if we're exiting a selected section
|
238
|
+
if in_selected_section && selected_indent && item[:indent_level] <= selected_indent
|
239
|
+
in_selected_section = false
|
240
|
+
selected_indent = nil
|
241
|
+
end
|
218
242
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
243
|
+
# Check if this item matches the selector
|
244
|
+
if !in_selected_section && matches_selector?(item, selector_parts)
|
245
|
+
in_selected_section = true
|
246
|
+
selected_indent = item[:indent_level]
|
247
|
+
result << item
|
248
|
+
elsif in_selected_section
|
249
|
+
result << item
|
250
|
+
end
|
226
251
|
end
|
227
252
|
end
|
228
253
|
|
@@ -230,7 +255,27 @@ module Blueprint
|
|
230
255
|
end
|
231
256
|
|
232
257
|
def parse_css_selector(selector)
|
233
|
-
# Support
|
258
|
+
# Support CSS selectors: element, #id, .class, element.class, element#id
|
259
|
+
# Also support child combinator: parent > child
|
260
|
+
parts = {}
|
261
|
+
|
262
|
+
# Handle child combinator (e.g., "body > section")
|
263
|
+
if selector.include?(' > ')
|
264
|
+
parent_child = selector.split(' > ').map(&:strip)
|
265
|
+
if parent_child.size == 2
|
266
|
+
parts[:parent] = parse_simple_selector(parent_child[0])
|
267
|
+
parts[:child] = parse_simple_selector(parent_child[1])
|
268
|
+
parts[:combinator] = :child
|
269
|
+
return parts
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
# Handle simple selectors
|
274
|
+
parts.merge!(parse_simple_selector(selector))
|
275
|
+
parts
|
276
|
+
end
|
277
|
+
|
278
|
+
def parse_simple_selector(selector)
|
234
279
|
parts = {}
|
235
280
|
|
236
281
|
# Handle complex selectors like div.container#main
|
@@ -261,6 +306,48 @@ module Blueprint
|
|
261
306
|
end
|
262
307
|
|
263
308
|
def matches_selector?(item, selector_parts)
|
309
|
+
# Handle child combinator selectors
|
310
|
+
if selector_parts[:combinator] == :child
|
311
|
+
return matches_child_selector?(item, selector_parts)
|
312
|
+
end
|
313
|
+
|
314
|
+
# Handle simple selectors
|
315
|
+
line = item[:stripped]
|
316
|
+
element_info = element_selector(line)
|
317
|
+
|
318
|
+
return false unless element_info
|
319
|
+
|
320
|
+
# Check element match
|
321
|
+
return false if selector_parts[:element] && !(element_info[:element] == selector_parts[:element])
|
322
|
+
|
323
|
+
# Check ID match
|
324
|
+
return false if selector_parts[:id] && !element_info[:selector].include?("##{selector_parts[:id]}")
|
325
|
+
|
326
|
+
# Check class matches
|
327
|
+
if selector_parts[:classes]
|
328
|
+
selector_parts[:classes].each do |cls|
|
329
|
+
return false unless element_info[:selector].include?(".#{cls}")
|
330
|
+
end
|
331
|
+
end
|
332
|
+
|
333
|
+
true
|
334
|
+
end
|
335
|
+
|
336
|
+
def matches_child_selector?(item, selector_parts)
|
337
|
+
# For child selector, we need to check if this item matches the child
|
338
|
+
# and verify its parent matches the parent selector
|
339
|
+
|
340
|
+
# First check if this item matches the child selector
|
341
|
+
return false unless matches_simple_selector?(item, selector_parts[:child])
|
342
|
+
|
343
|
+
# Then find its parent and check if it matches the parent selector
|
344
|
+
parent_item = find_parent_item(item)
|
345
|
+
return false unless parent_item
|
346
|
+
|
347
|
+
matches_simple_selector?(parent_item, selector_parts[:parent])
|
348
|
+
end
|
349
|
+
|
350
|
+
def matches_simple_selector?(item, selector_parts)
|
264
351
|
line = item[:stripped]
|
265
352
|
element_info = element_selector(line)
|
266
353
|
|
@@ -281,6 +368,62 @@ module Blueprint
|
|
281
368
|
|
282
369
|
true
|
283
370
|
end
|
371
|
+
|
372
|
+
def find_parent_item(target_item)
|
373
|
+
# Find the parent of the target item by looking for the previous item
|
374
|
+
# with lower indentation level
|
375
|
+
target_indent = target_item[:indent_level]
|
376
|
+
target_line_num = target_item[:line_number]
|
377
|
+
|
378
|
+
# Search backwards from target item to find parent
|
379
|
+
return nil unless @current_structure
|
380
|
+
|
381
|
+
@current_structure.reverse.each do |item|
|
382
|
+
next if item[:line_number] >= target_line_num
|
383
|
+
|
384
|
+
if item[:indent_level] < target_indent
|
385
|
+
return item
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
nil
|
390
|
+
end
|
391
|
+
|
392
|
+
def clean_orphaned_comments(structure)
|
393
|
+
result = []
|
394
|
+
|
395
|
+
structure.each_with_index do |item, index|
|
396
|
+
# If this is a comment, check if the next non-comment item exists
|
397
|
+
if item[:type] == :html_comment
|
398
|
+
# Look ahead to see if there's meaningful content after this comment
|
399
|
+
has_following_content = false
|
400
|
+
|
401
|
+
(index + 1...structure.size).each do |next_index|
|
402
|
+
next_item = structure[next_index]
|
403
|
+
|
404
|
+
# If we find content at the same or lower indent level, keep the comment
|
405
|
+
if next_item[:indent_level] <= item[:indent_level] &&
|
406
|
+
next_item[:type] != :html_comment
|
407
|
+
has_following_content = true
|
408
|
+
break
|
409
|
+
end
|
410
|
+
|
411
|
+
# If we find indented content, keep the comment
|
412
|
+
if next_item[:indent_level] > item[:indent_level]
|
413
|
+
has_following_content = true
|
414
|
+
break
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
# Only keep the comment if there's following content
|
419
|
+
result << item if has_following_content
|
420
|
+
else
|
421
|
+
result << item
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
result
|
426
|
+
end
|
284
427
|
end
|
285
428
|
end
|
286
429
|
end
|