lighterpack-parser 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lighterpack_parser/category.rb +31 -0
- data/lib/lighterpack_parser/category_parser.rb +76 -0
- data/lib/lighterpack_parser/gram_converter.rb +39 -0
- data/lib/lighterpack_parser/item.rb +92 -0
- data/lib/lighterpack_parser/item_parser.rb +171 -0
- data/lib/lighterpack_parser/list.rb +31 -0
- data/lib/lighterpack_parser/list_parser.rb +46 -0
- data/lib/lighterpack_parser/parser.rb +9 -234
- data/lib/lighterpack_parser/version.rb +1 -1
- data/lib/lighterpack_parser.rb +12 -0
- data/lighterpack-parser.gemspec +3 -1
- data/spec/parser_spec.rb +103 -61
- metadata +16 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 85fed15af6ad0ccfbba49c9960cf80e47fee71cbdce5166746e5788df489127d
|
|
4
|
+
data.tar.gz: dbf4da779516e9ba651846a1457fcdfa52142313643235f5cbc7500778d9baec
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1e35ae9d5211815cef892b469d26e4e79163698465e2b213e8f394a0d2f9546ddc007e7a182684513aff24579281fe719581d3156c3fc6323134f48af2d6067f
|
|
7
|
+
data.tar.gz: c90687504af99ace152a1bf2749362d70d004e37e3cd9baf50aee6adbe0fbce4869ba57ea7b6b7b74b617a6bac7ece5954dc4fe1c2e365377ffc110711efb8bb
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Represents a category from a Lighterpack list.
|
|
5
|
+
#
|
|
6
|
+
# @attr_reader [String] name The name of the category
|
|
7
|
+
# @attr_reader [String, nil] description Optional description of the category
|
|
8
|
+
# @attr_reader [Array<Item>] items Array of items in this category
|
|
9
|
+
class Category
|
|
10
|
+
attr_reader :name, :description, :items
|
|
11
|
+
|
|
12
|
+
# @param name [String] The name of the category
|
|
13
|
+
# @param description [String, nil] Optional description
|
|
14
|
+
# @param items [Array<Item>] Array of items in this category
|
|
15
|
+
def initialize(name:, description: nil, items: [])
|
|
16
|
+
@name = name
|
|
17
|
+
@description = description
|
|
18
|
+
@items = items
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Convert to hash for backward compatibility
|
|
22
|
+
# @return [Hash] Hash representation of the category
|
|
23
|
+
def to_h
|
|
24
|
+
{
|
|
25
|
+
name: name,
|
|
26
|
+
description: description,
|
|
27
|
+
items: items.map(&:to_h)
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Parser for extracting category data from Lighterpack HTML documents.
|
|
5
|
+
class CategoryParser
|
|
6
|
+
# Parse all categories from a Lighterpack HTML document.
|
|
7
|
+
#
|
|
8
|
+
# @param doc [Nokogiri::HTML::Document] The parsed HTML document
|
|
9
|
+
# @param item_parser [ItemParser] The parser to use for extracting items
|
|
10
|
+
# @return [Array<Category>] Array of extracted categories
|
|
11
|
+
def parse_all(doc, item_parser:)
|
|
12
|
+
categories = []
|
|
13
|
+
|
|
14
|
+
# Lighterpack structure: ul.lpCategories > li.lpCategory
|
|
15
|
+
doc.css('ul.lpCategories > li.lpCategory').each do |category_element|
|
|
16
|
+
category = parse(category_element, item_parser: item_parser)
|
|
17
|
+
categories << category if category
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
categories
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Parse a single category element.
|
|
24
|
+
#
|
|
25
|
+
# @param category_element [Nokogiri::XML::Element] The category HTML element
|
|
26
|
+
# @param item_parser [ItemParser] The parser to use for extracting items
|
|
27
|
+
# @return [Category, nil] The parsed category, or nil if name is missing
|
|
28
|
+
def parse(category_element, item_parser:)
|
|
29
|
+
# Category name is in h2.lpCategoryName
|
|
30
|
+
category_header = category_element.at_css('h2.lpCategoryName')
|
|
31
|
+
return nil unless category_header
|
|
32
|
+
|
|
33
|
+
category_name = category_header.text.strip
|
|
34
|
+
return nil if category_name.empty?
|
|
35
|
+
|
|
36
|
+
# Description is typically in the category name itself (in parentheses)
|
|
37
|
+
description = extract_description(category_name)
|
|
38
|
+
|
|
39
|
+
# Find items in this category
|
|
40
|
+
items = extract_items(category_element, item_parser: item_parser)
|
|
41
|
+
|
|
42
|
+
Category.new(
|
|
43
|
+
name: category_name,
|
|
44
|
+
description: description,
|
|
45
|
+
items: items
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def extract_items(category_element, item_parser:)
|
|
52
|
+
items = []
|
|
53
|
+
|
|
54
|
+
# Items are in ul.lpItems within the category
|
|
55
|
+
items_list = category_element.at_css('ul.lpItems')
|
|
56
|
+
return items unless items_list
|
|
57
|
+
|
|
58
|
+
# Extract items (skip header row)
|
|
59
|
+
items_list.css('li.lpItem').each do |item_element|
|
|
60
|
+
item = item_parser.parse(item_element)
|
|
61
|
+
items << item if item
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
items
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def extract_description(category_name)
|
|
68
|
+
# Description is often in parentheses in the category name
|
|
69
|
+
# e.g., "Big 3 (Pack, Tent, Sleep System)"
|
|
70
|
+
match = category_name.match(/\(([^)]+)\)/)
|
|
71
|
+
return match[1] if match
|
|
72
|
+
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Simple converter for weight units to grams.
|
|
5
|
+
class GramConverter
|
|
6
|
+
# Conversion factors for weight units to grams.
|
|
7
|
+
CONVERSION_FACTORS = {
|
|
8
|
+
'oz' => 28.3495,
|
|
9
|
+
'lb' => 453.592,
|
|
10
|
+
'g' => 1.0,
|
|
11
|
+
'kg' => 1000.0
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
# Initialize the converter with the source unit.
|
|
15
|
+
#
|
|
16
|
+
# @param source_unit [String] The unit to convert from.
|
|
17
|
+
def initialize(source_unit:)
|
|
18
|
+
@source_unit = source_unit
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Convert a value from the source unit to grams.
|
|
22
|
+
#
|
|
23
|
+
# @param value [Float] The value to convert..
|
|
24
|
+
# @return [Float] The converted value in grams.
|
|
25
|
+
def convert(value)
|
|
26
|
+
factor = CONVERSION_FACTORS[@source_unit.to_s.downcase] || 1.0
|
|
27
|
+
value * factor
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Convert a value from a unit to grams.
|
|
31
|
+
#
|
|
32
|
+
# @param value [Float] The value to convert.
|
|
33
|
+
# @param unit [String] The unit to convert from.
|
|
34
|
+
# @return [Float] The converted value in grams.
|
|
35
|
+
def self.to_grams(value, unit)
|
|
36
|
+
new(source_unit: unit).convert(value)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Represents a single item from a Lighterpack list.
|
|
5
|
+
#
|
|
6
|
+
# @attr_reader [String] name The name of the item
|
|
7
|
+
# @attr_reader [String, nil] description Optional description of the item
|
|
8
|
+
# @attr_reader [Float] weight Weight per item in grams
|
|
9
|
+
# @attr_reader [Float] total_weight Total weight (weight * quantity) in grams
|
|
10
|
+
# @attr_reader [Integer] quantity Number of items
|
|
11
|
+
# @attr_reader [String, nil] image_url Optional URL to item image
|
|
12
|
+
# @attr_reader [Boolean] consumable Whether the item is consumable
|
|
13
|
+
# @attr_reader [Float, nil] total_consumable_weight Total consumable weight
|
|
14
|
+
# (weight * quantity) if consumable, nil otherwise
|
|
15
|
+
# @attr_reader [Boolean] worn Whether the item is worn
|
|
16
|
+
# @attr_reader [Integer, nil] worn_quantity Number of worn items (always 1 if worn, nil otherwise)
|
|
17
|
+
# @attr_reader [Float, nil] total_worn_weight Total worn weight (weight * 1) if worn, nil otherwise
|
|
18
|
+
class Item
|
|
19
|
+
attr_reader :name, :description, :weight, :total_weight, :quantity, :image_url,
|
|
20
|
+
:consumable, :total_consumable_weight, :worn, :worn_quantity, :total_worn_weight
|
|
21
|
+
|
|
22
|
+
# @param name [String] The name of the item
|
|
23
|
+
# @param description [String, nil] Optional description
|
|
24
|
+
# @param weight [Float] Weight per item in grams
|
|
25
|
+
# @param total_weight [Float] Total weight (weight * quantity) in grams
|
|
26
|
+
# @param quantity [Integer] Number of items
|
|
27
|
+
# @param image_url [String, nil] Optional URL to item image
|
|
28
|
+
# @param consumable [Boolean] Whether the item is consumable
|
|
29
|
+
# @param total_consumable_weight [Float, nil] Total consumable weight if consumable
|
|
30
|
+
# @param worn [Boolean] Whether the item is worn
|
|
31
|
+
# @param worn_quantity [Integer, nil] Number of worn items (1 if worn)
|
|
32
|
+
# @param total_worn_weight [Float, nil] Total worn weight if worn
|
|
33
|
+
# rubocop:disable Metrics/ParameterLists, Metrics/MethodLength
|
|
34
|
+
def initialize(name:, weight:, total_weight:, quantity:, description: nil,
|
|
35
|
+
image_url: nil, consumable: false, total_consumable_weight: nil,
|
|
36
|
+
worn: false, worn_quantity: nil, total_worn_weight: nil)
|
|
37
|
+
@name = name
|
|
38
|
+
@description = description
|
|
39
|
+
@weight = weight
|
|
40
|
+
@total_weight = total_weight
|
|
41
|
+
@quantity = quantity
|
|
42
|
+
@image_url = image_url
|
|
43
|
+
@consumable = consumable
|
|
44
|
+
@total_consumable_weight = total_consumable_weight
|
|
45
|
+
@worn = worn
|
|
46
|
+
@worn_quantity = worn_quantity
|
|
47
|
+
@total_worn_weight = total_worn_weight
|
|
48
|
+
end
|
|
49
|
+
# rubocop:enable Metrics/ParameterLists, Metrics/MethodLength
|
|
50
|
+
|
|
51
|
+
# @return [Boolean] Whether the item is worn
|
|
52
|
+
def worn? = worn
|
|
53
|
+
|
|
54
|
+
# @return [Boolean] Whether the item is consumable
|
|
55
|
+
def consumable? = consumable
|
|
56
|
+
|
|
57
|
+
# Convert to hash
|
|
58
|
+
#
|
|
59
|
+
# @return [Hash] Hash representation of the item
|
|
60
|
+
def to_h
|
|
61
|
+
{
|
|
62
|
+
name: name, description: description,
|
|
63
|
+
weight: weight, total_weight: total_weight,
|
|
64
|
+
quantity: quantity,
|
|
65
|
+
image_url: image_url,
|
|
66
|
+
consumable: consumable,
|
|
67
|
+
total_consumable_weight: total_consumable_weight,
|
|
68
|
+
worn: worn, worn_quantity: worn_quantity,
|
|
69
|
+
total_worn_weight: total_worn_weight
|
|
70
|
+
}
|
|
71
|
+
end
|
|
72
|
+
alias worn? worn
|
|
73
|
+
alias consumable? consumable
|
|
74
|
+
|
|
75
|
+
# Convert to hash
|
|
76
|
+
#
|
|
77
|
+
# @return [Hash] Hash representation of the item
|
|
78
|
+
def to_h
|
|
79
|
+
{
|
|
80
|
+
name: name, description: description,
|
|
81
|
+
weight: weight, total_weight: total_weight,
|
|
82
|
+
|
|
83
|
+
quantity: quantity,
|
|
84
|
+
image_url: image_url,
|
|
85
|
+
consumable: consumable,
|
|
86
|
+
total_consumable_weight: total_consumable_weight,
|
|
87
|
+
worn: worn, worn_quantity: worn_quantity,
|
|
88
|
+
total_worn_weight: total_worn_weight
|
|
89
|
+
}
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Parser for extracting item data from Lighterpack HTML elements.
|
|
5
|
+
class ItemParser
|
|
6
|
+
# Parse a single item element and return an Item object.
|
|
7
|
+
#
|
|
8
|
+
# @param element [Nokogiri::XML::Element] The item HTML element
|
|
9
|
+
# @return [Item, nil] The parsed item, or nil if name is missing
|
|
10
|
+
def parse(element)
|
|
11
|
+
name = extract_name(element)
|
|
12
|
+
return nil unless name
|
|
13
|
+
|
|
14
|
+
weight_data = extract_weight(element)
|
|
15
|
+
quantity = extract_quantity(element)
|
|
16
|
+
description = extract_description(element)
|
|
17
|
+
image_url = extract_image_url(element)
|
|
18
|
+
consumable = extract_consumable_flag(element)
|
|
19
|
+
worn = extract_worn_flag(element)
|
|
20
|
+
|
|
21
|
+
# Calculate per-item weight
|
|
22
|
+
weight_per_item = weight_data[:weight_grams]
|
|
23
|
+
|
|
24
|
+
# Calculate total weights
|
|
25
|
+
total_weight = weight_per_item * quantity
|
|
26
|
+
|
|
27
|
+
# In Lighterpack, if an item is consumable, the consumable_weight is always the full weight
|
|
28
|
+
# Calculate total consumable weight (per item * quantity)
|
|
29
|
+
total_consumable_weight = consumable ? weight_per_item * quantity : nil
|
|
30
|
+
|
|
31
|
+
# In Lighterpack, if an item is worn, only the first item is worn (worn_quantity = 1)
|
|
32
|
+
# regardless of total quantity
|
|
33
|
+
worn_quantity = worn ? 1 : nil
|
|
34
|
+
total_worn_weight = worn ? weight_per_item * 1 : nil
|
|
35
|
+
|
|
36
|
+
Item.new(
|
|
37
|
+
name: name,
|
|
38
|
+
description: description,
|
|
39
|
+
weight: weight_per_item,
|
|
40
|
+
total_weight: total_weight,
|
|
41
|
+
quantity: quantity,
|
|
42
|
+
image_url: image_url,
|
|
43
|
+
consumable: consumable,
|
|
44
|
+
total_consumable_weight: total_consumable_weight,
|
|
45
|
+
worn: worn,
|
|
46
|
+
worn_quantity: worn_quantity,
|
|
47
|
+
total_worn_weight: total_worn_weight
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def extract_name(element)
|
|
54
|
+
# Item name is in span.lpName
|
|
55
|
+
name_elem = element.at_css('span.lpName')
|
|
56
|
+
return name_elem.text.strip if name_elem
|
|
57
|
+
|
|
58
|
+
nil
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def extract_weight(element)
|
|
62
|
+
# Lighterpack stores weight in milligrams in input.lpMG
|
|
63
|
+
mg_input = element.at_css('input.lpMG')
|
|
64
|
+
if mg_input && mg_input['value']
|
|
65
|
+
# Convert from milligrams to grams
|
|
66
|
+
weight_grams = mg_input['value'].to_f / 1000.0
|
|
67
|
+
return { weight_grams: weight_grams, original_unit: 'g' }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Fallback: try to get from span.lpWeight and unit
|
|
71
|
+
weight_elem = element.at_css('span.lpWeight')
|
|
72
|
+
unit_elem = element.at_css('span.lpDisplay, select.lpUnit option[selected]')
|
|
73
|
+
|
|
74
|
+
if weight_elem
|
|
75
|
+
weight_value = weight_elem.text.strip.to_f
|
|
76
|
+
unit = 'g' # default
|
|
77
|
+
|
|
78
|
+
if unit_elem
|
|
79
|
+
unit_text = unit_elem.text.strip.downcase
|
|
80
|
+
unit = unit_text if %w[oz lb g kg].include?(unit_text)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
weight_grams = GramConverter.to_grams(weight_value, unit)
|
|
84
|
+
return { weight_grams: weight_grams, original_unit: unit }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
{ weight_grams: 0.0, original_unit: 'g' }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def extract_quantity(element)
|
|
91
|
+
# Quantity is in span.lpQtyCell
|
|
92
|
+
qty_elem = element.at_css('span.lpQtyCell')
|
|
93
|
+
if qty_elem
|
|
94
|
+
qty_text = qty_elem.text.strip
|
|
95
|
+
return qty_text.to_i if qty_text.match?(/^\d+$/)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Check qty attribute
|
|
99
|
+
qty_attr = element['qty']
|
|
100
|
+
return qty_attr.to_i if qty_attr
|
|
101
|
+
|
|
102
|
+
1 # Default quantity
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def extract_description(element)
|
|
106
|
+
# Description is in span.lpDescription
|
|
107
|
+
desc_elem = element.at_css('span.lpDescription')
|
|
108
|
+
return desc_elem.text.strip if desc_elem && !desc_elem.text.strip.empty?
|
|
109
|
+
|
|
110
|
+
nil
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def extract_image_url(element)
|
|
114
|
+
# Image URL is in img.lpItemImage
|
|
115
|
+
img = element.at_css('img.lpItemImage')
|
|
116
|
+
if img && img['src']
|
|
117
|
+
# Decode HTML entities
|
|
118
|
+
url = img['src'].gsub('/', '/').gsub('=', '=')
|
|
119
|
+
return url
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Also check href attribute
|
|
123
|
+
if img && img['href']
|
|
124
|
+
url = img['href'].gsub('/', '/').gsub('=', '=')
|
|
125
|
+
return url
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
nil
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# rubocop:disable Naming/PredicateMethod
|
|
132
|
+
def extract_consumable_flag(element)
|
|
133
|
+
# Check for consumable icon with lpActive class (only active items have lpActive)
|
|
134
|
+
# Try CSS selector first - Nokogiri should handle multiple classes
|
|
135
|
+
consumable_active = element.at_css('i.lpSprite.lpConsumable.lpActive')
|
|
136
|
+
return true if consumable_active
|
|
137
|
+
|
|
138
|
+
# Fallback: check class attribute directly
|
|
139
|
+
consumable_icon = element.at_css('i.lpSprite.lpConsumable')
|
|
140
|
+
return false unless consumable_icon
|
|
141
|
+
|
|
142
|
+
class_attr = consumable_icon['class'].to_s
|
|
143
|
+
# Check if lpActive appears in the class string (handles extra spaces)
|
|
144
|
+
return true if class_attr.include?('lpActive')
|
|
145
|
+
|
|
146
|
+
# Explicitly return false to ensure boolean type
|
|
147
|
+
false
|
|
148
|
+
end
|
|
149
|
+
# rubocop:enable Naming/PredicateMethod
|
|
150
|
+
|
|
151
|
+
# rubocop:disable Naming/PredicateMethod
|
|
152
|
+
def extract_worn_flag(element)
|
|
153
|
+
# Check for worn icon with lpActive class (only active items have lpActive)
|
|
154
|
+
# Try CSS selector first - Nokogiri should handle multiple classes
|
|
155
|
+
worn_active = element.at_css('i.lpSprite.lpWorn.lpActive')
|
|
156
|
+
return true if worn_active
|
|
157
|
+
|
|
158
|
+
# Fallback: check class attribute directly
|
|
159
|
+
worn_icon = element.at_css('i.lpSprite.lpWorn')
|
|
160
|
+
return false unless worn_icon
|
|
161
|
+
|
|
162
|
+
class_attr = worn_icon['class'].to_s
|
|
163
|
+
# Check if lpActive appears in the class string (handles extra spaces)
|
|
164
|
+
return true if class_attr.include?('lpActive')
|
|
165
|
+
|
|
166
|
+
# Explicitly return false to ensure boolean type
|
|
167
|
+
false
|
|
168
|
+
end
|
|
169
|
+
# rubocop:enable Naming/PredicateMethod
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Represents a Lighterpack list.
|
|
5
|
+
#
|
|
6
|
+
# @attr_reader [String] name The name of the list
|
|
7
|
+
# @attr_reader [String, nil] description Optional description of the list
|
|
8
|
+
# @attr_reader [Array<Category>] categories Array of categories in this list
|
|
9
|
+
class List
|
|
10
|
+
attr_reader :name, :description, :categories
|
|
11
|
+
|
|
12
|
+
# @param name [String] The name of the list
|
|
13
|
+
# @param description [String, nil] Optional description
|
|
14
|
+
# @param categories [Array<Category>] Array of categories in this list
|
|
15
|
+
def initialize(name:, description: nil, categories: [])
|
|
16
|
+
@name = name
|
|
17
|
+
@description = description
|
|
18
|
+
@categories = categories
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Convert to hash for backward compatibility
|
|
22
|
+
# @return [Hash] Hash representation of the list
|
|
23
|
+
def to_h
|
|
24
|
+
{
|
|
25
|
+
name: name,
|
|
26
|
+
description: description,
|
|
27
|
+
categories: categories.map(&:to_h)
|
|
28
|
+
}
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LighterpackParser
|
|
4
|
+
# Parser for extracting list data from Lighterpack HTML documents.
|
|
5
|
+
class ListParser
|
|
6
|
+
# Parse a Lighterpack HTML document and return a List object.
|
|
7
|
+
#
|
|
8
|
+
# @param doc [Nokogiri::HTML::Document] The parsed HTML document
|
|
9
|
+
# @param category_parser [CategoryParser] The parser to use for extracting categories
|
|
10
|
+
# @param item_parser [ItemParser] The parser to use for extracting items
|
|
11
|
+
# @return [List] The parsed list
|
|
12
|
+
def parse(doc, category_parser:, item_parser:)
|
|
13
|
+
List.new(
|
|
14
|
+
name: extract_name(doc),
|
|
15
|
+
description: extract_description(doc),
|
|
16
|
+
categories: category_parser.parse_all(doc, item_parser: item_parser)
|
|
17
|
+
)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
private
|
|
21
|
+
|
|
22
|
+
def extract_name(doc)
|
|
23
|
+
# Lighterpack uses h1.lpListName
|
|
24
|
+
h1 = doc.at_css('h1.lpListName')
|
|
25
|
+
return h1.text.strip if h1
|
|
26
|
+
|
|
27
|
+
# Fallback to regular h1
|
|
28
|
+
h1 = doc.at_css('h1')
|
|
29
|
+
return h1.text.strip if h1
|
|
30
|
+
|
|
31
|
+
title = doc.at_css('title')
|
|
32
|
+
return title.text.strip if title
|
|
33
|
+
|
|
34
|
+
'Untitled List'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def extract_description(doc)
|
|
38
|
+
# Lighterpack doesn't seem to have a list description in the HTML
|
|
39
|
+
# Could be in meta tags
|
|
40
|
+
meta_desc = doc.at_css('meta[name="description"]')
|
|
41
|
+
return meta_desc['content'] if meta_desc && meta_desc['content']
|
|
42
|
+
|
|
43
|
+
nil
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
@@ -4,6 +4,10 @@ require 'nokogiri'
|
|
|
4
4
|
require 'httparty'
|
|
5
5
|
|
|
6
6
|
module LighterpackParser
|
|
7
|
+
# Main parser for extracting data from Lighterpack list HTML pages.
|
|
8
|
+
#
|
|
9
|
+
# Orchestrates the parsing process by coordinating ListParser, CategoryParser,
|
|
10
|
+
# and ItemParser to extract structured data from Lighterpack HTML.
|
|
7
11
|
class Parser
|
|
8
12
|
def initialize(html: nil, url: nil)
|
|
9
13
|
@html = if url
|
|
@@ -13,16 +17,14 @@ module LighterpackParser
|
|
|
13
17
|
else
|
|
14
18
|
raise ArgumentError, 'Either html or url must be provided'
|
|
15
19
|
end
|
|
20
|
+
@item_parser = ItemParser.new
|
|
21
|
+
@category_parser = CategoryParser.new
|
|
22
|
+
@list_parser = ListParser.new
|
|
16
23
|
end
|
|
17
24
|
|
|
18
25
|
def parse
|
|
19
26
|
doc = Nokogiri::HTML(@html)
|
|
20
|
-
|
|
21
|
-
{
|
|
22
|
-
name: extract_list_name(doc),
|
|
23
|
-
description: extract_list_description(doc),
|
|
24
|
-
categories: extract_categories(doc)
|
|
25
|
-
}
|
|
27
|
+
@list_parser.parse(doc, category_parser: @category_parser, item_parser: @item_parser)
|
|
26
28
|
end
|
|
27
29
|
|
|
28
30
|
private
|
|
@@ -30,235 +32,8 @@ module LighterpackParser
|
|
|
30
32
|
def fetch_html(url)
|
|
31
33
|
response = HTTParty.get(url, timeout: 30)
|
|
32
34
|
raise "Failed to fetch URL: #{response.code}" unless response.success?
|
|
33
|
-
response.body
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def extract_list_name(doc)
|
|
37
|
-
# Lighterpack uses h1.lpListName
|
|
38
|
-
h1 = doc.at_css('h1.lpListName')
|
|
39
|
-
return h1.text.strip if h1
|
|
40
|
-
|
|
41
|
-
# Fallback to regular h1
|
|
42
|
-
h1 = doc.at_css('h1')
|
|
43
|
-
return h1.text.strip if h1
|
|
44
|
-
|
|
45
|
-
title = doc.at_css('title')
|
|
46
|
-
return title.text.strip if title
|
|
47
|
-
|
|
48
|
-
'Untitled List'
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def extract_list_description(doc)
|
|
52
|
-
# Lighterpack doesn't seem to have a list description in the HTML
|
|
53
|
-
# Could be in meta tags
|
|
54
|
-
meta_desc = doc.at_css('meta[name="description"]')
|
|
55
|
-
return meta_desc['content'] if meta_desc && meta_desc['content']
|
|
56
|
-
|
|
57
|
-
nil
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def extract_categories(doc)
|
|
61
|
-
categories = []
|
|
62
|
-
|
|
63
|
-
# Lighterpack structure: ul.lpCategories > li.lpCategory
|
|
64
|
-
doc.css('ul.lpCategories > li.lpCategory').each do |category_element|
|
|
65
|
-
# Category name is in h2.lpCategoryName
|
|
66
|
-
category_header = category_element.at_css('h2.lpCategoryName')
|
|
67
|
-
next unless category_header
|
|
68
|
-
|
|
69
|
-
category_name = category_header.text.strip
|
|
70
|
-
next if category_name.empty?
|
|
71
|
-
|
|
72
|
-
# Description is typically in the category name itself (in parentheses)
|
|
73
|
-
description = extract_category_description(category_name)
|
|
74
|
-
|
|
75
|
-
# Find items in this category
|
|
76
|
-
items = extract_items_for_category(category_element)
|
|
77
|
-
|
|
78
|
-
categories << {
|
|
79
|
-
name: category_name,
|
|
80
|
-
description: description,
|
|
81
|
-
items: items
|
|
82
|
-
}
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
categories
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def extract_category_description(category_name)
|
|
89
|
-
# Description is often in parentheses in the category name
|
|
90
|
-
# e.g., "Big 3 (Pack, Tent, Sleep System)"
|
|
91
|
-
match = category_name.match(/\(([^)]+)\)/)
|
|
92
|
-
return match[1] if match
|
|
93
|
-
|
|
94
|
-
nil
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
def extract_items_for_category(category_element)
|
|
98
|
-
items = []
|
|
99
|
-
|
|
100
|
-
# Items are in ul.lpItems within the category
|
|
101
|
-
items_list = category_element.at_css('ul.lpItems')
|
|
102
|
-
return items unless items_list
|
|
103
|
-
|
|
104
|
-
# Extract items (skip header row)
|
|
105
|
-
items_list.css('li.lpItem').each do |item_element|
|
|
106
|
-
item = extract_item(item_element)
|
|
107
|
-
items << item if item && item[:name]
|
|
108
|
-
end
|
|
109
|
-
|
|
110
|
-
items
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
def extract_item(element)
|
|
114
|
-
# Extract item data from the element
|
|
115
|
-
# Lighterpack items have: name, weight, quantity, description, image
|
|
116
|
-
name = extract_item_name(element)
|
|
117
|
-
return nil unless name
|
|
118
|
-
|
|
119
|
-
weight_data = extract_weight(element)
|
|
120
|
-
quantity = extract_quantity(element)
|
|
121
|
-
description = extract_item_description(element)
|
|
122
|
-
image_url = extract_image_url(element)
|
|
123
|
-
consumable = extract_consumable_flag(element)
|
|
124
|
-
worn = extract_worn_flag(element)
|
|
125
|
-
|
|
126
|
-
{
|
|
127
|
-
name: name,
|
|
128
|
-
description: description,
|
|
129
|
-
weight: weight_data[:weight_grams],
|
|
130
|
-
quantity: quantity,
|
|
131
|
-
image_url: image_url,
|
|
132
|
-
consumable: consumable,
|
|
133
|
-
worn: worn
|
|
134
|
-
}
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
def extract_item_name(element)
|
|
138
|
-
# Item name is in span.lpName
|
|
139
|
-
name_elem = element.at_css('span.lpName')
|
|
140
|
-
return name_elem.text.strip if name_elem
|
|
141
35
|
|
|
142
|
-
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
def extract_weight(element)
|
|
146
|
-
# Lighterpack stores weight in milligrams in input.lpMG
|
|
147
|
-
mg_input = element.at_css('input.lpMG')
|
|
148
|
-
if mg_input && mg_input['value']
|
|
149
|
-
# Convert from milligrams to grams
|
|
150
|
-
weight_grams = mg_input['value'].to_f / 1000.0
|
|
151
|
-
return { weight_grams: weight_grams, original_unit: 'g' }
|
|
152
|
-
end
|
|
153
|
-
|
|
154
|
-
# Fallback: try to get from span.lpWeight and unit
|
|
155
|
-
weight_elem = element.at_css('span.lpWeight')
|
|
156
|
-
unit_elem = element.at_css('span.lpDisplay, select.lpUnit option[selected]')
|
|
157
|
-
|
|
158
|
-
if weight_elem
|
|
159
|
-
weight_value = weight_elem.text.strip.to_f
|
|
160
|
-
unit = 'g' # default
|
|
161
|
-
|
|
162
|
-
if unit_elem
|
|
163
|
-
unit_text = unit_elem.text.strip.downcase
|
|
164
|
-
unit = unit_text if ['oz', 'lb', 'g', 'kg'].include?(unit_text)
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
weight_grams = convert_to_grams(weight_value, unit)
|
|
168
|
-
return { weight_grams: weight_grams, original_unit: unit }
|
|
169
|
-
end
|
|
170
|
-
|
|
171
|
-
{ weight_grams: 0.0, original_unit: 'g' }
|
|
172
|
-
end
|
|
173
|
-
|
|
174
|
-
def convert_to_grams(value, unit)
|
|
175
|
-
case unit.downcase
|
|
176
|
-
when 'oz'
|
|
177
|
-
value * 28.3495
|
|
178
|
-
when 'lb'
|
|
179
|
-
value * 453.592
|
|
180
|
-
when 'g'
|
|
181
|
-
value
|
|
182
|
-
when 'kg'
|
|
183
|
-
value * 1000
|
|
184
|
-
else
|
|
185
|
-
value # Default to assuming grams
|
|
186
|
-
end
|
|
187
|
-
end
|
|
188
|
-
|
|
189
|
-
def extract_quantity(element)
|
|
190
|
-
# Quantity is in span.lpQtyCell
|
|
191
|
-
qty_elem = element.at_css('span.lpQtyCell')
|
|
192
|
-
if qty_elem
|
|
193
|
-
qty_text = qty_elem.text.strip
|
|
194
|
-
return qty_text.to_i if qty_text.match?(/^\d+$/)
|
|
195
|
-
end
|
|
196
|
-
|
|
197
|
-
# Check qty attribute
|
|
198
|
-
qty_attr = element['qty']
|
|
199
|
-
return qty_attr.to_i if qty_attr
|
|
200
|
-
|
|
201
|
-
1 # Default quantity
|
|
202
|
-
end
|
|
203
|
-
|
|
204
|
-
def extract_item_description(element)
|
|
205
|
-
# Description is in span.lpDescription
|
|
206
|
-
desc_elem = element.at_css('span.lpDescription')
|
|
207
|
-
return desc_elem.text.strip if desc_elem && !desc_elem.text.strip.empty?
|
|
208
|
-
|
|
209
|
-
nil
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
def extract_image_url(element)
|
|
213
|
-
# Image URL is in img.lpItemImage
|
|
214
|
-
img = element.at_css('img.lpItemImage')
|
|
215
|
-
if img && img['src']
|
|
216
|
-
# Decode HTML entities
|
|
217
|
-
url = img['src'].gsub('/', '/').gsub('=', '=')
|
|
218
|
-
return url
|
|
219
|
-
end
|
|
220
|
-
|
|
221
|
-
# Also check href attribute
|
|
222
|
-
if img && img['href']
|
|
223
|
-
url = img['href'].gsub('/', '/').gsub('=', '=')
|
|
224
|
-
return url
|
|
225
|
-
end
|
|
226
|
-
|
|
227
|
-
nil
|
|
228
|
-
end
|
|
229
|
-
|
|
230
|
-
def extract_consumable_flag(element)
|
|
231
|
-
# Check for consumable icon with lpActive class (only active items have lpActive)
|
|
232
|
-
# Try CSS selector first - Nokogiri should handle multiple classes
|
|
233
|
-
consumable_active = element.at_css('i.lpSprite.lpConsumable.lpActive')
|
|
234
|
-
return true if consumable_active
|
|
235
|
-
|
|
236
|
-
# Fallback: check class attribute directly
|
|
237
|
-
consumable_icon = element.at_css('i.lpSprite.lpConsumable')
|
|
238
|
-
return false unless consumable_icon
|
|
239
|
-
|
|
240
|
-
class_attr = consumable_icon['class'].to_s
|
|
241
|
-
# Check if lpActive appears in the class string (handles extra spaces)
|
|
242
|
-
return true if class_attr.include?('lpActive')
|
|
243
|
-
|
|
244
|
-
false
|
|
245
|
-
end
|
|
246
|
-
|
|
247
|
-
def extract_worn_flag(element)
|
|
248
|
-
# Check for worn icon with lpActive class (only active items have lpActive)
|
|
249
|
-
# Try CSS selector first - Nokogiri should handle multiple classes
|
|
250
|
-
worn_active = element.at_css('i.lpSprite.lpWorn.lpActive')
|
|
251
|
-
return true if worn_active
|
|
252
|
-
|
|
253
|
-
# Fallback: check class attribute directly
|
|
254
|
-
worn_icon = element.at_css('i.lpSprite.lpWorn')
|
|
255
|
-
return false unless worn_icon
|
|
256
|
-
|
|
257
|
-
class_attr = worn_icon['class'].to_s
|
|
258
|
-
# Check if lpActive appears in the class string (handles extra spaces)
|
|
259
|
-
return true if class_attr.include?('lpActive')
|
|
260
|
-
|
|
261
|
-
false
|
|
36
|
+
response.body
|
|
262
37
|
end
|
|
263
38
|
end
|
|
264
39
|
end
|
data/lib/lighterpack_parser.rb
CHANGED
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'lighterpack_parser/version'
|
|
4
|
+
require_relative 'lighterpack_parser/gram_converter'
|
|
5
|
+
require_relative 'lighterpack_parser/item'
|
|
6
|
+
require_relative 'lighterpack_parser/category'
|
|
7
|
+
require_relative 'lighterpack_parser/list'
|
|
8
|
+
require_relative 'lighterpack_parser/item_parser'
|
|
9
|
+
require_relative 'lighterpack_parser/category_parser'
|
|
10
|
+
require_relative 'lighterpack_parser/list_parser'
|
|
4
11
|
require_relative 'lighterpack_parser/parser'
|
|
5
12
|
|
|
13
|
+
# Parser for extracting data from Lighterpack list HTML pages.
|
|
14
|
+
#
|
|
15
|
+
# Provides classes and methods to parse Lighterpack list HTML and extract
|
|
16
|
+
# structured data including list information, categories, and items with their
|
|
17
|
+
# properties (weight, quantity, consumable status, etc.).
|
|
6
18
|
module LighterpackParser
|
|
7
19
|
# Convenience method to parse a Lighterpack URL
|
|
8
20
|
def self.parse_url(url)
|
data/lighterpack-parser.gemspec
CHANGED
|
@@ -13,11 +13,13 @@ Gem::Specification.new do |spec|
|
|
|
13
13
|
spec.homepage = 'https://github.com/alex-ross/lighterpack-parser'
|
|
14
14
|
spec.license = 'MIT'
|
|
15
15
|
|
|
16
|
+
spec.required_ruby_version = '>= 3.0'
|
|
17
|
+
|
|
16
18
|
spec.files = Dir['lib/**/*', 'spec/**/*', '*.md', '*.gemspec']
|
|
17
19
|
spec.require_paths = ['lib']
|
|
18
20
|
|
|
19
|
-
spec.add_dependency 'nokogiri', '~> 1.15'
|
|
20
21
|
spec.add_dependency 'httparty', '~> 0.21'
|
|
22
|
+
spec.add_dependency 'nokogiri', '~> 1.15'
|
|
21
23
|
|
|
22
24
|
spec.add_development_dependency 'rspec', '~> 3.12'
|
|
23
25
|
end
|
data/spec/parser_spec.rb
CHANGED
|
@@ -11,31 +11,71 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
11
11
|
let(:result) { described_class.new(html: html).parse }
|
|
12
12
|
|
|
13
13
|
it 'extracts the list name' do
|
|
14
|
-
expect(result
|
|
14
|
+
expect(result.name).to eq('Ultimate Hike 2025')
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
it 'extracts categories as an array' do
|
|
18
|
-
expect(result
|
|
19
|
-
expect(result
|
|
18
|
+
expect(result.categories).to be_a(Array)
|
|
19
|
+
expect(result.categories.length).to be > 0
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
it 'extracts the first category correctly' do
|
|
23
|
-
first_category = result
|
|
24
|
-
expect(first_category
|
|
25
|
-
expect(first_category
|
|
26
|
-
expect(first_category
|
|
23
|
+
first_category = result.categories.first
|
|
24
|
+
expect(first_category.name).to eq('Big 3 (Pack, Tent, Sleep System)')
|
|
25
|
+
expect(first_category.items).to be_a(Array)
|
|
26
|
+
expect(first_category.items.length).to be > 0
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
it 'extracts the first item correctly' do
|
|
30
|
-
first_category = result
|
|
31
|
-
first_item = first_category
|
|
30
|
+
first_category = result.categories.first
|
|
31
|
+
first_item = first_category.items.first
|
|
32
|
+
|
|
33
|
+
expect(first_item.name).to eq('Bonfus Altus 38')
|
|
34
|
+
expect(first_item.description).to eq('With vest styled straps')
|
|
35
|
+
expect(first_item.weight).to be > 0
|
|
36
|
+
expect(first_item.quantity).to eq(1)
|
|
37
|
+
expect(first_item).to_not be_worn
|
|
38
|
+
expect(first_item).to_not be_consumable
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it 'includes total weight fields' do
|
|
42
|
+
first_category = result.categories.first
|
|
43
|
+
first_item = first_category.items.first
|
|
44
|
+
|
|
45
|
+
expect(first_item.total_weight).to be > 0
|
|
46
|
+
expect(first_item.total_weight).to eq(first_item.weight * first_item.quantity)
|
|
47
|
+
|
|
48
|
+
if first_item.consumable
|
|
49
|
+
expect(first_item.total_consumable_weight).to be > 0
|
|
50
|
+
expect(first_item.total_consumable_weight).to eq(first_item.weight * first_item.quantity)
|
|
51
|
+
else
|
|
52
|
+
expect(first_item.total_consumable_weight).to be_nil
|
|
53
|
+
end
|
|
32
54
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
55
|
+
if first_item.worn
|
|
56
|
+
expect(first_item.worn_quantity).to eq(1)
|
|
57
|
+
expect(first_item.total_worn_weight).to be > 0
|
|
58
|
+
expect(first_item.total_worn_weight).to eq(first_item.weight * 1)
|
|
59
|
+
else
|
|
60
|
+
expect(first_item.worn_quantity).to be_nil
|
|
61
|
+
expect(first_item.total_worn_weight).to be_nil
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
it 'sets worn_quantity to 1 for worn items regardless of quantity' do
|
|
66
|
+
result.categories.each do |category|
|
|
67
|
+
category.items.each do |item|
|
|
68
|
+
if item.worn
|
|
69
|
+
expect(item.worn_quantity).to eq(1),
|
|
70
|
+
"Worn item #{item.name} should have worn_quantity=1, " \
|
|
71
|
+
"got #{item.worn_quantity}"
|
|
72
|
+
expect(item.total_worn_weight).to eq(item.weight * 1),
|
|
73
|
+
"Worn item #{item.name} should have total_worn_weight = weight * 1"
|
|
74
|
+
else
|
|
75
|
+
expect(item.worn_quantity).to be_nil, "Non-worn item #{item.name} should have worn_quantity=nil"
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
39
79
|
end
|
|
40
80
|
end
|
|
41
81
|
|
|
@@ -44,11 +84,11 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
44
84
|
let(:result) { described_class.new(html: html).parse }
|
|
45
85
|
|
|
46
86
|
it 'extracts the list name' do
|
|
47
|
-
expect(result
|
|
87
|
+
expect(result.name).to be_truthy
|
|
48
88
|
end
|
|
49
89
|
|
|
50
90
|
it 'extracts categories as an array' do
|
|
51
|
-
expect(result
|
|
91
|
+
expect(result.categories).to be_a(Array)
|
|
52
92
|
end
|
|
53
93
|
end
|
|
54
94
|
|
|
@@ -57,11 +97,11 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
57
97
|
let(:result) { described_class.new(html: html).parse }
|
|
58
98
|
|
|
59
99
|
it 'extracts the list name' do
|
|
60
|
-
expect(result
|
|
100
|
+
expect(result.name).to be_truthy
|
|
61
101
|
end
|
|
62
102
|
|
|
63
103
|
it 'extracts categories as an array' do
|
|
64
|
-
expect(result
|
|
104
|
+
expect(result.categories).to be_a(Array)
|
|
65
105
|
end
|
|
66
106
|
end
|
|
67
107
|
end
|
|
@@ -71,11 +111,11 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
71
111
|
let(:result) { described_class.new(html: html).parse }
|
|
72
112
|
|
|
73
113
|
it 'converts weights to grams correctly' do
|
|
74
|
-
result
|
|
75
|
-
category
|
|
76
|
-
if item
|
|
77
|
-
expect(item
|
|
78
|
-
expect(item
|
|
114
|
+
result.categories.each do |category|
|
|
115
|
+
category.items.each do |item|
|
|
116
|
+
if item.weight > 0
|
|
117
|
+
expect(item.weight).to be > 0, "Item #{item.name} should have weight > 0"
|
|
118
|
+
expect(item.weight).to be < 1_000_000, "Item #{item.name} weight seems too large: #{item.weight}"
|
|
79
119
|
end
|
|
80
120
|
end
|
|
81
121
|
end
|
|
@@ -87,9 +127,9 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
87
127
|
let(:result) { described_class.new(html: html).parse }
|
|
88
128
|
|
|
89
129
|
it 'extracts consumable flag as boolean for all items' do
|
|
90
|
-
result
|
|
91
|
-
category
|
|
92
|
-
expect([true, false]).to include(item
|
|
130
|
+
result.categories.each do |category|
|
|
131
|
+
category.items.each do |item|
|
|
132
|
+
expect([true, false]).to include(item.consumable), "Consumable should be boolean for #{item.name}"
|
|
93
133
|
end
|
|
94
134
|
end
|
|
95
135
|
end
|
|
@@ -100,9 +140,9 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
100
140
|
let(:result) { described_class.new(html: html).parse }
|
|
101
141
|
|
|
102
142
|
it 'extracts worn flag as boolean for all items' do
|
|
103
|
-
result
|
|
104
|
-
category
|
|
105
|
-
expect([true, false]).to include(item
|
|
143
|
+
result.categories.each do |category|
|
|
144
|
+
category.items.each do |item|
|
|
145
|
+
expect([true, false]).to include(item.worn), "Worn should be boolean for #{item.name}"
|
|
106
146
|
end
|
|
107
147
|
end
|
|
108
148
|
end
|
|
@@ -111,62 +151,64 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
111
151
|
describe 'worn flag correctness for h23rxt.html' do
|
|
112
152
|
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
113
153
|
let(:result) { described_class.new(html: html).parse }
|
|
114
|
-
let(:all_items) { result
|
|
154
|
+
let(:all_items) { result.categories.flat_map(&:items) }
|
|
115
155
|
|
|
116
156
|
it 'correctly identifies Sea to Summit Ultrasil as worn' do
|
|
117
|
-
ultrasil = all_items.find { |item| item
|
|
157
|
+
ultrasil = all_items.find { |item| item.name&.include?('Sea to Summit Ultrasil') }
|
|
118
158
|
expect(ultrasil).to be_truthy, 'Should find Sea to Summit Ultrasil item'
|
|
119
|
-
expect(ultrasil
|
|
120
|
-
expect(ultrasil
|
|
159
|
+
expect(ultrasil.worn).to be(true), 'Sea to Summit Ultrasil should be worn'
|
|
160
|
+
expect(ultrasil.consumable).to be(false), 'Sea to Summit Ultrasil should NOT be consumable'
|
|
121
161
|
end
|
|
122
162
|
|
|
123
163
|
it 'correctly identifies MacBook Pro as not worn' do
|
|
124
|
-
macbook = all_items.find { |item| item
|
|
164
|
+
macbook = all_items.find { |item| item.name&.include?('MacBook Pro') }
|
|
125
165
|
expect(macbook).to be_truthy, 'Should find MacBook Pro item'
|
|
126
|
-
expect(macbook
|
|
127
|
-
expect(macbook
|
|
166
|
+
expect(macbook.worn).to be(false), 'MacBook Pro should NOT be worn'
|
|
167
|
+
expect(macbook.consumable).to be(false), 'MacBook Pro should NOT be consumable'
|
|
128
168
|
end
|
|
129
169
|
end
|
|
130
170
|
|
|
131
171
|
describe 'consumable flag correctness for h23rxt.html' do
|
|
132
172
|
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
133
173
|
let(:result) { described_class.new(html: html).parse }
|
|
134
|
-
let(:all_items) { result
|
|
174
|
+
let(:all_items) { result.categories.flat_map(&:items) }
|
|
135
175
|
|
|
136
176
|
it 'correctly identifies Tandkräm as consumable' do
|
|
137
|
-
tandkram = all_items.find { |item| item
|
|
177
|
+
tandkram = all_items.find { |item| item.name&.include?('Tandkräm (innehåll)') }
|
|
138
178
|
expect(tandkram).to be_truthy, 'Should find Tandkräm item'
|
|
139
|
-
expect(tandkram
|
|
140
|
-
expect(tandkram
|
|
179
|
+
expect(tandkram.consumable).to be(true), 'Tandkräm should be consumable'
|
|
180
|
+
expect(tandkram.worn).to be(false), 'Tandkräm should NOT be worn'
|
|
141
181
|
end
|
|
142
182
|
|
|
143
183
|
it 'correctly identifies Dushtvål/Shampoo as consumable' do
|
|
144
|
-
shampoo = all_items.find { |item| item
|
|
184
|
+
shampoo = all_items.find { |item| item.name&.include?('Dushtvål') || item.name&.include?('Shampoo') }
|
|
145
185
|
expect(shampoo).to be_truthy, 'Should find Dushtvål/Shampoo item'
|
|
146
|
-
expect(shampoo
|
|
147
|
-
expect(shampoo
|
|
186
|
+
expect(shampoo.consumable).to be(true), 'Dushtvål/Shampoo should be consumable'
|
|
187
|
+
expect(shampoo.worn).to be(false), 'Dushtvål/Shampoo should NOT be worn'
|
|
148
188
|
end
|
|
149
189
|
|
|
150
190
|
it 'correctly identifies MacBook Pro as not consumable' do
|
|
151
|
-
macbook = all_items.find { |item| item
|
|
191
|
+
macbook = all_items.find { |item| item.name&.include?('MacBook Pro') }
|
|
152
192
|
expect(macbook).to be_truthy, 'Should find MacBook Pro item'
|
|
153
|
-
expect(macbook
|
|
193
|
+
expect(macbook.consumable).to be(false), 'MacBook Pro should NOT be consumable'
|
|
154
194
|
end
|
|
155
195
|
end
|
|
156
196
|
|
|
157
197
|
describe 'worn and consumable counts for h23rxt.html' do
|
|
158
198
|
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
159
199
|
let(:result) { described_class.new(html: html).parse }
|
|
160
|
-
let(:all_items) { result
|
|
200
|
+
let(:all_items) { result.categories.flat_map(&:items) }
|
|
161
201
|
let(:total_items) { all_items.length }
|
|
162
|
-
let(:worn_count) { all_items.count
|
|
163
|
-
let(:consumable_count) { all_items.count
|
|
202
|
+
let(:worn_count) { all_items.count(&:worn) }
|
|
203
|
+
let(:consumable_count) { all_items.count(&:consumable) }
|
|
164
204
|
|
|
165
205
|
it 'has reasonable counts of worn and consumable items' do
|
|
166
206
|
expect(worn_count).to be >= 1, "Should have at least 1 worn item, got #{worn_count}"
|
|
167
207
|
expect(worn_count).to be <= 5, "Should have at most 5 worn items (most items are not worn), got #{worn_count}"
|
|
168
208
|
expect(consumable_count).to be >= 2, "Should have at least 2 consumable items, got #{consumable_count}"
|
|
169
|
-
expect(consumable_count).to be <= 5,
|
|
209
|
+
expect(consumable_count).to be <= 5,
|
|
210
|
+
'Should have at most 5 consumable items ' \
|
|
211
|
+
"(most items are not consumable), got #{consumable_count}"
|
|
170
212
|
expect(total_items).to be > 10, "Should have many items total, got #{total_items}"
|
|
171
213
|
end
|
|
172
214
|
end
|
|
@@ -176,10 +218,10 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
176
218
|
let(:result) { described_class.new(html: html).parse }
|
|
177
219
|
|
|
178
220
|
it 'extracts quantities as positive integers' do
|
|
179
|
-
result
|
|
180
|
-
category
|
|
181
|
-
expect(item
|
|
182
|
-
expect(item
|
|
221
|
+
result.categories.each do |category|
|
|
222
|
+
category.items.each do |item|
|
|
223
|
+
expect(item.quantity).to be_a(Integer), "Quantity should be integer for #{item.name}"
|
|
224
|
+
expect(item.quantity).to be > 0, "Quantity should be > 0 for #{item.name}"
|
|
183
225
|
end
|
|
184
226
|
end
|
|
185
227
|
end
|
|
@@ -191,10 +233,10 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
191
233
|
|
|
192
234
|
it 'extracts image URLs correctly' do
|
|
193
235
|
items_with_images = 0
|
|
194
|
-
result
|
|
195
|
-
category
|
|
196
|
-
if item
|
|
197
|
-
expect(item
|
|
236
|
+
result.categories.each do |category|
|
|
237
|
+
category.items.each do |item|
|
|
238
|
+
if item.image_url
|
|
239
|
+
expect(item.image_url).to start_with('http'), "Image URL should start with http for #{item.name}"
|
|
198
240
|
items_with_images += 1
|
|
199
241
|
end
|
|
200
242
|
end
|
|
@@ -209,9 +251,9 @@ RSpec.describe LighterpackParser::Parser do
|
|
|
209
251
|
let(:result) { described_class.new(html: html).parse }
|
|
210
252
|
|
|
211
253
|
it 'extracts category descriptions when available' do
|
|
212
|
-
result
|
|
213
|
-
expect(category
|
|
214
|
-
|
|
254
|
+
result.categories.each do |category|
|
|
255
|
+
expect(category.description).to be_nil.or(be_a(String)),
|
|
256
|
+
"Description should be nil or string for category #{category.name}"
|
|
215
257
|
end
|
|
216
258
|
end
|
|
217
259
|
end
|
metadata
CHANGED
|
@@ -1,43 +1,43 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: lighterpack-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Packlista Team
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-01-02 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
|
-
name:
|
|
14
|
+
name: httparty
|
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
|
16
16
|
requirements:
|
|
17
17
|
- - "~>"
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '
|
|
19
|
+
version: '0.21'
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - "~>"
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
26
|
+
version: '0.21'
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
|
-
name:
|
|
28
|
+
name: nokogiri
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - "~>"
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '
|
|
33
|
+
version: '1.15'
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - "~>"
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '
|
|
40
|
+
version: '1.15'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: rspec
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -62,6 +62,13 @@ extra_rdoc_files: []
|
|
|
62
62
|
files:
|
|
63
63
|
- README.md
|
|
64
64
|
- lib/lighterpack_parser.rb
|
|
65
|
+
- lib/lighterpack_parser/category.rb
|
|
66
|
+
- lib/lighterpack_parser/category_parser.rb
|
|
67
|
+
- lib/lighterpack_parser/gram_converter.rb
|
|
68
|
+
- lib/lighterpack_parser/item.rb
|
|
69
|
+
- lib/lighterpack_parser/item_parser.rb
|
|
70
|
+
- lib/lighterpack_parser/list.rb
|
|
71
|
+
- lib/lighterpack_parser/list_parser.rb
|
|
65
72
|
- lib/lighterpack_parser/parser.rb
|
|
66
73
|
- lib/lighterpack_parser/version.rb
|
|
67
74
|
- lighterpack-parser.gemspec
|
|
@@ -82,7 +89,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
82
89
|
requirements:
|
|
83
90
|
- - ">="
|
|
84
91
|
- !ruby/object:Gem::Version
|
|
85
|
-
version: '0'
|
|
92
|
+
version: '3.0'
|
|
86
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
94
|
requirements:
|
|
88
95
|
- - ">="
|