lighterpack-parser 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 443e65a776623c5e587889eecf74da63064015eae8a3d1882b3e0a5ad5864675
4
- data.tar.gz: 25cb7cf2b5cea5deeadcc82a53665ac999c69895ee3754a3174929a6b43f3264
3
+ metadata.gz: 85fed15af6ad0ccfbba49c9960cf80e47fee71cbdce5166746e5788df489127d
4
+ data.tar.gz: dbf4da779516e9ba651846a1457fcdfa52142313643235f5cbc7500778d9baec
5
5
  SHA512:
6
- metadata.gz: 95e5525bd874996437de92e6a486a530ca23677e035c640bb7b4d7b2a7662189a67ea97a7cfa19913973e7ae875822517b05e73a66c0e0060dc1c28d27e06128
7
- data.tar.gz: 14af29e67a8f67ed1552e7e9dc2801005a214d800ab788dea0fc1add26478c7df4e780262278271a3f33945d6866f2df42fa511770df083e9c8f4383ff530bb1
6
+ metadata.gz: 1e35ae9d5211815cef892b469d26e4e79163698465e2b213e8f394a0d2f9546ddc007e7a182684513aff24579281fe719581d3156c3fc6323134f48af2d6067f
7
+ data.tar.gz: c90687504af99ace152a1bf2749362d70d004e37e3cd9baf50aee6adbe0fbce4869ba57ea7b6b7b74b617a6bac7ece5954dc4fe1c2e365377ffc110711efb8bb
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Represents a category from a Lighterpack list.
5
+ #
6
+ # @attr_reader [String] name The name of the category
7
+ # @attr_reader [String, nil] description Optional description of the category
8
+ # @attr_reader [Array<Item>] items Array of items in this category
9
+ class Category
10
+ attr_reader :name, :description, :items
11
+
12
+ # @param name [String] The name of the category
13
+ # @param description [String, nil] Optional description
14
+ # @param items [Array<Item>] Array of items in this category
15
+ def initialize(name:, description: nil, items: [])
16
+ @name = name
17
+ @description = description
18
+ @items = items
19
+ end
20
+
21
+ # Convert to hash for backward compatibility
22
+ # @return [Hash] Hash representation of the category
23
+ def to_h
24
+ {
25
+ name: name,
26
+ description: description,
27
+ items: items.map(&:to_h)
28
+ }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Parser for extracting category data from Lighterpack HTML documents.
5
+ class CategoryParser
6
+ # Parse all categories from a Lighterpack HTML document.
7
+ #
8
+ # @param doc [Nokogiri::HTML::Document] The parsed HTML document
9
+ # @param item_parser [ItemParser] The parser to use for extracting items
10
+ # @return [Array<Category>] Array of extracted categories
11
+ def parse_all(doc, item_parser:)
12
+ categories = []
13
+
14
+ # Lighterpack structure: ul.lpCategories > li.lpCategory
15
+ doc.css('ul.lpCategories > li.lpCategory').each do |category_element|
16
+ category = parse(category_element, item_parser: item_parser)
17
+ categories << category if category
18
+ end
19
+
20
+ categories
21
+ end
22
+
23
+ # Parse a single category element.
24
+ #
25
+ # @param category_element [Nokogiri::XML::Element] The category HTML element
26
+ # @param item_parser [ItemParser] The parser to use for extracting items
27
+ # @return [Category, nil] The parsed category, or nil if name is missing
28
+ def parse(category_element, item_parser:)
29
+ # Category name is in h2.lpCategoryName
30
+ category_header = category_element.at_css('h2.lpCategoryName')
31
+ return nil unless category_header
32
+
33
+ category_name = category_header.text.strip
34
+ return nil if category_name.empty?
35
+
36
+ # Description is typically in the category name itself (in parentheses)
37
+ description = extract_description(category_name)
38
+
39
+ # Find items in this category
40
+ items = extract_items(category_element, item_parser: item_parser)
41
+
42
+ Category.new(
43
+ name: category_name,
44
+ description: description,
45
+ items: items
46
+ )
47
+ end
48
+
49
+ private
50
+
51
+ def extract_items(category_element, item_parser:)
52
+ items = []
53
+
54
+ # Items are in ul.lpItems within the category
55
+ items_list = category_element.at_css('ul.lpItems')
56
+ return items unless items_list
57
+
58
+ # Extract items (skip header row)
59
+ items_list.css('li.lpItem').each do |item_element|
60
+ item = item_parser.parse(item_element)
61
+ items << item if item
62
+ end
63
+
64
+ items
65
+ end
66
+
67
+ def extract_description(category_name)
68
+ # Description is often in parentheses in the category name
69
+ # e.g., "Big 3 (Pack, Tent, Sleep System)"
70
+ match = category_name.match(/\(([^)]+)\)/)
71
+ return match[1] if match
72
+
73
+ nil
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Simple converter for weight units to grams.
5
+ class GramConverter
6
+ # Conversion factors for weight units to grams.
7
+ CONVERSION_FACTORS = {
8
+ 'oz' => 28.3495,
9
+ 'lb' => 453.592,
10
+ 'g' => 1.0,
11
+ 'kg' => 1000.0
12
+ }.freeze
13
+
14
+ # Initialize the converter with the source unit.
15
+ #
16
+ # @param source_unit [String] The unit to convert from.
17
+ def initialize(source_unit:)
18
+ @source_unit = source_unit
19
+ end
20
+
21
+ # Convert a value from the source unit to grams.
22
+ #
23
+ # @param value [Float] The value to convert..
24
+ # @return [Float] The converted value in grams.
25
+ def convert(value)
26
+ factor = CONVERSION_FACTORS[@source_unit.to_s.downcase] || 1.0
27
+ value * factor
28
+ end
29
+
30
+ # Convert a value from a unit to grams.
31
+ #
32
+ # @param value [Float] The value to convert.
33
+ # @param unit [String] The unit to convert from.
34
+ # @return [Float] The converted value in grams.
35
+ def self.to_grams(value, unit)
36
+ new(source_unit: unit).convert(value)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Represents a single item from a Lighterpack list.
5
+ #
6
+ # @attr_reader [String] name The name of the item
7
+ # @attr_reader [String, nil] description Optional description of the item
8
+ # @attr_reader [Float] weight Weight per item in grams
9
+ # @attr_reader [Float] total_weight Total weight (weight * quantity) in grams
10
+ # @attr_reader [Integer] quantity Number of items
11
+ # @attr_reader [String, nil] image_url Optional URL to item image
12
+ # @attr_reader [Boolean] consumable Whether the item is consumable
13
+ # @attr_reader [Float, nil] total_consumable_weight Total consumable weight
14
+ # (weight * quantity) if consumable, nil otherwise
15
+ # @attr_reader [Boolean] worn Whether the item is worn
16
+ # @attr_reader [Integer, nil] worn_quantity Number of worn items (always 1 if worn, nil otherwise)
17
+ # @attr_reader [Float, nil] total_worn_weight Total worn weight (weight * 1) if worn, nil otherwise
18
+ class Item
19
+ attr_reader :name, :description, :weight, :total_weight, :quantity, :image_url,
20
+ :consumable, :total_consumable_weight, :worn, :worn_quantity, :total_worn_weight
21
+
22
+ # @param name [String] The name of the item
23
+ # @param description [String, nil] Optional description
24
+ # @param weight [Float] Weight per item in grams
25
+ # @param total_weight [Float] Total weight (weight * quantity) in grams
26
+ # @param quantity [Integer] Number of items
27
+ # @param image_url [String, nil] Optional URL to item image
28
+ # @param consumable [Boolean] Whether the item is consumable
29
+ # @param total_consumable_weight [Float, nil] Total consumable weight if consumable
30
+ # @param worn [Boolean] Whether the item is worn
31
+ # @param worn_quantity [Integer, nil] Number of worn items (1 if worn)
32
+ # @param total_worn_weight [Float, nil] Total worn weight if worn
33
+ # rubocop:disable Metrics/ParameterLists, Metrics/MethodLength
34
+ def initialize(name:, weight:, total_weight:, quantity:, description: nil,
35
+ image_url: nil, consumable: false, total_consumable_weight: nil,
36
+ worn: false, worn_quantity: nil, total_worn_weight: nil)
37
+ @name = name
38
+ @description = description
39
+ @weight = weight
40
+ @total_weight = total_weight
41
+ @quantity = quantity
42
+ @image_url = image_url
43
+ @consumable = consumable
44
+ @total_consumable_weight = total_consumable_weight
45
+ @worn = worn
46
+ @worn_quantity = worn_quantity
47
+ @total_worn_weight = total_worn_weight
48
+ end
49
+ # rubocop:enable Metrics/ParameterLists, Metrics/MethodLength
50
+
51
+ # @return [Boolean] Whether the item is worn
52
+ def worn? = worn
53
+
54
+ # @return [Boolean] Whether the item is consumable
55
+ def consumable? = consumable
56
+
57
+ # Convert to hash
58
+ #
59
+ # @return [Hash] Hash representation of the item
60
+ def to_h
61
+ {
62
+ name: name, description: description,
63
+ weight: weight, total_weight: total_weight,
64
+ quantity: quantity,
65
+ image_url: image_url,
66
+ consumable: consumable,
67
+ total_consumable_weight: total_consumable_weight,
68
+ worn: worn, worn_quantity: worn_quantity,
69
+ total_worn_weight: total_worn_weight
70
+ }
71
+ end
72
+ alias worn? worn
73
+ alias consumable? consumable
74
+
75
+ # Convert to hash
76
+ #
77
+ # @return [Hash] Hash representation of the item
78
+ def to_h
79
+ {
80
+ name: name, description: description,
81
+ weight: weight, total_weight: total_weight,
82
+
83
+ quantity: quantity,
84
+ image_url: image_url,
85
+ consumable: consumable,
86
+ total_consumable_weight: total_consumable_weight,
87
+ worn: worn, worn_quantity: worn_quantity,
88
+ total_worn_weight: total_worn_weight
89
+ }
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Parser for extracting item data from Lighterpack HTML elements.
5
+ class ItemParser
6
+ # Parse a single item element and return an Item object.
7
+ #
8
+ # @param element [Nokogiri::XML::Element] The item HTML element
9
+ # @return [Item, nil] The parsed item, or nil if name is missing
10
+ def parse(element)
11
+ name = extract_name(element)
12
+ return nil unless name
13
+
14
+ weight_data = extract_weight(element)
15
+ quantity = extract_quantity(element)
16
+ description = extract_description(element)
17
+ image_url = extract_image_url(element)
18
+ consumable = extract_consumable_flag(element)
19
+ worn = extract_worn_flag(element)
20
+
21
+ # Calculate per-item weight
22
+ weight_per_item = weight_data[:weight_grams]
23
+
24
+ # Calculate total weights
25
+ total_weight = weight_per_item * quantity
26
+
27
+ # In Lighterpack, if an item is consumable, the consumable_weight is always the full weight
28
+ # Calculate total consumable weight (per item * quantity)
29
+ total_consumable_weight = consumable ? weight_per_item * quantity : nil
30
+
31
+ # In Lighterpack, if an item is worn, only the first item is worn (worn_quantity = 1)
32
+ # regardless of total quantity
33
+ worn_quantity = worn ? 1 : nil
34
+ total_worn_weight = worn ? weight_per_item * 1 : nil
35
+
36
+ Item.new(
37
+ name: name,
38
+ description: description,
39
+ weight: weight_per_item,
40
+ total_weight: total_weight,
41
+ quantity: quantity,
42
+ image_url: image_url,
43
+ consumable: consumable,
44
+ total_consumable_weight: total_consumable_weight,
45
+ worn: worn,
46
+ worn_quantity: worn_quantity,
47
+ total_worn_weight: total_worn_weight
48
+ )
49
+ end
50
+
51
+ private
52
+
53
+ def extract_name(element)
54
+ # Item name is in span.lpName
55
+ name_elem = element.at_css('span.lpName')
56
+ return name_elem.text.strip if name_elem
57
+
58
+ nil
59
+ end
60
+
61
+ def extract_weight(element)
62
+ # Lighterpack stores weight in milligrams in input.lpMG
63
+ mg_input = element.at_css('input.lpMG')
64
+ if mg_input && mg_input['value']
65
+ # Convert from milligrams to grams
66
+ weight_grams = mg_input['value'].to_f / 1000.0
67
+ return { weight_grams: weight_grams, original_unit: 'g' }
68
+ end
69
+
70
+ # Fallback: try to get from span.lpWeight and unit
71
+ weight_elem = element.at_css('span.lpWeight')
72
+ unit_elem = element.at_css('span.lpDisplay, select.lpUnit option[selected]')
73
+
74
+ if weight_elem
75
+ weight_value = weight_elem.text.strip.to_f
76
+ unit = 'g' # default
77
+
78
+ if unit_elem
79
+ unit_text = unit_elem.text.strip.downcase
80
+ unit = unit_text if %w[oz lb g kg].include?(unit_text)
81
+ end
82
+
83
+ weight_grams = GramConverter.to_grams(weight_value, unit)
84
+ return { weight_grams: weight_grams, original_unit: unit }
85
+ end
86
+
87
+ { weight_grams: 0.0, original_unit: 'g' }
88
+ end
89
+
90
+ def extract_quantity(element)
91
+ # Quantity is in span.lpQtyCell
92
+ qty_elem = element.at_css('span.lpQtyCell')
93
+ if qty_elem
94
+ qty_text = qty_elem.text.strip
95
+ return qty_text.to_i if qty_text.match?(/^\d+$/)
96
+ end
97
+
98
+ # Check qty attribute
99
+ qty_attr = element['qty']
100
+ return qty_attr.to_i if qty_attr
101
+
102
+ 1 # Default quantity
103
+ end
104
+
105
+ def extract_description(element)
106
+ # Description is in span.lpDescription
107
+ desc_elem = element.at_css('span.lpDescription')
108
+ return desc_elem.text.strip if desc_elem && !desc_elem.text.strip.empty?
109
+
110
+ nil
111
+ end
112
+
113
+ def extract_image_url(element)
114
+ # Image URL is in img.lpItemImage
115
+ img = element.at_css('img.lpItemImage')
116
+ if img && img['src']
117
+ # Decode HTML entities
118
+ url = img['src'].gsub('&#x2F;', '/').gsub('&#x3D;', '=')
119
+ return url
120
+ end
121
+
122
+ # Also check href attribute
123
+ if img && img['href']
124
+ url = img['href'].gsub('&#x2F;', '/').gsub('&#x3D;', '=')
125
+ return url
126
+ end
127
+
128
+ nil
129
+ end
130
+
131
+ # rubocop:disable Naming/PredicateMethod
132
+ def extract_consumable_flag(element)
133
+ # Check for consumable icon with lpActive class (only active items have lpActive)
134
+ # Try CSS selector first - Nokogiri should handle multiple classes
135
+ consumable_active = element.at_css('i.lpSprite.lpConsumable.lpActive')
136
+ return true if consumable_active
137
+
138
+ # Fallback: check class attribute directly
139
+ consumable_icon = element.at_css('i.lpSprite.lpConsumable')
140
+ return false unless consumable_icon
141
+
142
+ class_attr = consumable_icon['class'].to_s
143
+ # Check if lpActive appears in the class string (handles extra spaces)
144
+ return true if class_attr.include?('lpActive')
145
+
146
+ # Explicitly return false to ensure boolean type
147
+ false
148
+ end
149
+ # rubocop:enable Naming/PredicateMethod
150
+
151
+ # rubocop:disable Naming/PredicateMethod
152
+ def extract_worn_flag(element)
153
+ # Check for worn icon with lpActive class (only active items have lpActive)
154
+ # Try CSS selector first - Nokogiri should handle multiple classes
155
+ worn_active = element.at_css('i.lpSprite.lpWorn.lpActive')
156
+ return true if worn_active
157
+
158
+ # Fallback: check class attribute directly
159
+ worn_icon = element.at_css('i.lpSprite.lpWorn')
160
+ return false unless worn_icon
161
+
162
+ class_attr = worn_icon['class'].to_s
163
+ # Check if lpActive appears in the class string (handles extra spaces)
164
+ return true if class_attr.include?('lpActive')
165
+
166
+ # Explicitly return false to ensure boolean type
167
+ false
168
+ end
169
+ # rubocop:enable Naming/PredicateMethod
170
+ end
171
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Represents a Lighterpack list.
5
+ #
6
+ # @attr_reader [String] name The name of the list
7
+ # @attr_reader [String, nil] description Optional description of the list
8
+ # @attr_reader [Array<Category>] categories Array of categories in this list
9
+ class List
10
+ attr_reader :name, :description, :categories
11
+
12
+ # @param name [String] The name of the list
13
+ # @param description [String, nil] Optional description
14
+ # @param categories [Array<Category>] Array of categories in this list
15
+ def initialize(name:, description: nil, categories: [])
16
+ @name = name
17
+ @description = description
18
+ @categories = categories
19
+ end
20
+
21
+ # Convert to hash for backward compatibility
22
+ # @return [Hash] Hash representation of the list
23
+ def to_h
24
+ {
25
+ name: name,
26
+ description: description,
27
+ categories: categories.map(&:to_h)
28
+ }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LighterpackParser
4
+ # Parser for extracting list data from Lighterpack HTML documents.
5
+ class ListParser
6
+ # Parse a Lighterpack HTML document and return a List object.
7
+ #
8
+ # @param doc [Nokogiri::HTML::Document] The parsed HTML document
9
+ # @param category_parser [CategoryParser] The parser to use for extracting categories
10
+ # @param item_parser [ItemParser] The parser to use for extracting items
11
+ # @return [List] The parsed list
12
+ def parse(doc, category_parser:, item_parser:)
13
+ List.new(
14
+ name: extract_name(doc),
15
+ description: extract_description(doc),
16
+ categories: category_parser.parse_all(doc, item_parser: item_parser)
17
+ )
18
+ end
19
+
20
+ private
21
+
22
+ def extract_name(doc)
23
+ # Lighterpack uses h1.lpListName
24
+ h1 = doc.at_css('h1.lpListName')
25
+ return h1.text.strip if h1
26
+
27
+ # Fallback to regular h1
28
+ h1 = doc.at_css('h1')
29
+ return h1.text.strip if h1
30
+
31
+ title = doc.at_css('title')
32
+ return title.text.strip if title
33
+
34
+ 'Untitled List'
35
+ end
36
+
37
+ def extract_description(doc)
38
+ # Lighterpack doesn't seem to have a list description in the HTML
39
+ # Could be in meta tags
40
+ meta_desc = doc.at_css('meta[name="description"]')
41
+ return meta_desc['content'] if meta_desc && meta_desc['content']
42
+
43
+ nil
44
+ end
45
+ end
46
+ end
@@ -4,6 +4,10 @@ require 'nokogiri'
4
4
  require 'httparty'
5
5
 
6
6
  module LighterpackParser
7
+ # Main parser for extracting data from Lighterpack list HTML pages.
8
+ #
9
+ # Orchestrates the parsing process by coordinating ListParser, CategoryParser,
10
+ # and ItemParser to extract structured data from Lighterpack HTML.
7
11
  class Parser
8
12
  def initialize(html: nil, url: nil)
9
13
  @html = if url
@@ -13,16 +17,14 @@ module LighterpackParser
13
17
  else
14
18
  raise ArgumentError, 'Either html or url must be provided'
15
19
  end
20
+ @item_parser = ItemParser.new
21
+ @category_parser = CategoryParser.new
22
+ @list_parser = ListParser.new
16
23
  end
17
24
 
18
25
  def parse
19
26
  doc = Nokogiri::HTML(@html)
20
-
21
- {
22
- name: extract_list_name(doc),
23
- description: extract_list_description(doc),
24
- categories: extract_categories(doc)
25
- }
27
+ @list_parser.parse(doc, category_parser: @category_parser, item_parser: @item_parser)
26
28
  end
27
29
 
28
30
  private
@@ -30,235 +32,8 @@ module LighterpackParser
30
32
  def fetch_html(url)
31
33
  response = HTTParty.get(url, timeout: 30)
32
34
  raise "Failed to fetch URL: #{response.code}" unless response.success?
33
- response.body
34
- end
35
-
36
- def extract_list_name(doc)
37
- # Lighterpack uses h1.lpListName
38
- h1 = doc.at_css('h1.lpListName')
39
- return h1.text.strip if h1
40
-
41
- # Fallback to regular h1
42
- h1 = doc.at_css('h1')
43
- return h1.text.strip if h1
44
-
45
- title = doc.at_css('title')
46
- return title.text.strip if title
47
-
48
- 'Untitled List'
49
- end
50
-
51
- def extract_list_description(doc)
52
- # Lighterpack doesn't seem to have a list description in the HTML
53
- # Could be in meta tags
54
- meta_desc = doc.at_css('meta[name="description"]')
55
- return meta_desc['content'] if meta_desc && meta_desc['content']
56
-
57
- nil
58
- end
59
-
60
- def extract_categories(doc)
61
- categories = []
62
-
63
- # Lighterpack structure: ul.lpCategories > li.lpCategory
64
- doc.css('ul.lpCategories > li.lpCategory').each do |category_element|
65
- # Category name is in h2.lpCategoryName
66
- category_header = category_element.at_css('h2.lpCategoryName')
67
- next unless category_header
68
-
69
- category_name = category_header.text.strip
70
- next if category_name.empty?
71
-
72
- # Description is typically in the category name itself (in parentheses)
73
- description = extract_category_description(category_name)
74
-
75
- # Find items in this category
76
- items = extract_items_for_category(category_element)
77
-
78
- categories << {
79
- name: category_name,
80
- description: description,
81
- items: items
82
- }
83
- end
84
-
85
- categories
86
- end
87
-
88
- def extract_category_description(category_name)
89
- # Description is often in parentheses in the category name
90
- # e.g., "Big 3 (Pack, Tent, Sleep System)"
91
- match = category_name.match(/\(([^)]+)\)/)
92
- return match[1] if match
93
-
94
- nil
95
- end
96
-
97
- def extract_items_for_category(category_element)
98
- items = []
99
-
100
- # Items are in ul.lpItems within the category
101
- items_list = category_element.at_css('ul.lpItems')
102
- return items unless items_list
103
-
104
- # Extract items (skip header row)
105
- items_list.css('li.lpItem').each do |item_element|
106
- item = extract_item(item_element)
107
- items << item if item && item[:name]
108
- end
109
-
110
- items
111
- end
112
-
113
- def extract_item(element)
114
- # Extract item data from the element
115
- # Lighterpack items have: name, weight, quantity, description, image
116
- name = extract_item_name(element)
117
- return nil unless name
118
-
119
- weight_data = extract_weight(element)
120
- quantity = extract_quantity(element)
121
- description = extract_item_description(element)
122
- image_url = extract_image_url(element)
123
- consumable = extract_consumable_flag(element)
124
- worn = extract_worn_flag(element)
125
-
126
- {
127
- name: name,
128
- description: description,
129
- weight: weight_data[:weight_grams],
130
- quantity: quantity,
131
- image_url: image_url,
132
- consumable: consumable,
133
- worn: worn
134
- }
135
- end
136
-
137
- def extract_item_name(element)
138
- # Item name is in span.lpName
139
- name_elem = element.at_css('span.lpName')
140
- return name_elem.text.strip if name_elem
141
35
 
142
- nil
143
- end
144
-
145
- def extract_weight(element)
146
- # Lighterpack stores weight in milligrams in input.lpMG
147
- mg_input = element.at_css('input.lpMG')
148
- if mg_input && mg_input['value']
149
- # Convert from milligrams to grams
150
- weight_grams = mg_input['value'].to_f / 1000.0
151
- return { weight_grams: weight_grams, original_unit: 'g' }
152
- end
153
-
154
- # Fallback: try to get from span.lpWeight and unit
155
- weight_elem = element.at_css('span.lpWeight')
156
- unit_elem = element.at_css('span.lpDisplay, select.lpUnit option[selected]')
157
-
158
- if weight_elem
159
- weight_value = weight_elem.text.strip.to_f
160
- unit = 'g' # default
161
-
162
- if unit_elem
163
- unit_text = unit_elem.text.strip.downcase
164
- unit = unit_text if ['oz', 'lb', 'g', 'kg'].include?(unit_text)
165
- end
166
-
167
- weight_grams = convert_to_grams(weight_value, unit)
168
- return { weight_grams: weight_grams, original_unit: unit }
169
- end
170
-
171
- { weight_grams: 0.0, original_unit: 'g' }
172
- end
173
-
174
- def convert_to_grams(value, unit)
175
- case unit.downcase
176
- when 'oz'
177
- value * 28.3495
178
- when 'lb'
179
- value * 453.592
180
- when 'g'
181
- value
182
- when 'kg'
183
- value * 1000
184
- else
185
- value # Default to assuming grams
186
- end
187
- end
188
-
189
- def extract_quantity(element)
190
- # Quantity is in span.lpQtyCell
191
- qty_elem = element.at_css('span.lpQtyCell')
192
- if qty_elem
193
- qty_text = qty_elem.text.strip
194
- return qty_text.to_i if qty_text.match?(/^\d+$/)
195
- end
196
-
197
- # Check qty attribute
198
- qty_attr = element['qty']
199
- return qty_attr.to_i if qty_attr
200
-
201
- 1 # Default quantity
202
- end
203
-
204
- def extract_item_description(element)
205
- # Description is in span.lpDescription
206
- desc_elem = element.at_css('span.lpDescription')
207
- return desc_elem.text.strip if desc_elem && !desc_elem.text.strip.empty?
208
-
209
- nil
210
- end
211
-
212
- def extract_image_url(element)
213
- # Image URL is in img.lpItemImage
214
- img = element.at_css('img.lpItemImage')
215
- if img && img['src']
216
- # Decode HTML entities
217
- url = img['src'].gsub('&#x2F;', '/').gsub('&#x3D;', '=')
218
- return url
219
- end
220
-
221
- # Also check href attribute
222
- if img && img['href']
223
- url = img['href'].gsub('&#x2F;', '/').gsub('&#x3D;', '=')
224
- return url
225
- end
226
-
227
- nil
228
- end
229
-
230
- def extract_consumable_flag(element)
231
- # Check for consumable icon with lpActive class (only active items have lpActive)
232
- # Try CSS selector first - Nokogiri should handle multiple classes
233
- consumable_active = element.at_css('i.lpSprite.lpConsumable.lpActive')
234
- return true if consumable_active
235
-
236
- # Fallback: check class attribute directly
237
- consumable_icon = element.at_css('i.lpSprite.lpConsumable')
238
- return false unless consumable_icon
239
-
240
- class_attr = consumable_icon['class'].to_s
241
- # Check if lpActive appears in the class string (handles extra spaces)
242
- return true if class_attr.include?('lpActive')
243
-
244
- false
245
- end
246
-
247
- def extract_worn_flag(element)
248
- # Check for worn icon with lpActive class (only active items have lpActive)
249
- # Try CSS selector first - Nokogiri should handle multiple classes
250
- worn_active = element.at_css('i.lpSprite.lpWorn.lpActive')
251
- return true if worn_active
252
-
253
- # Fallback: check class attribute directly
254
- worn_icon = element.at_css('i.lpSprite.lpWorn')
255
- return false unless worn_icon
256
-
257
- class_attr = worn_icon['class'].to_s
258
- # Check if lpActive appears in the class string (handles extra spaces)
259
- return true if class_attr.include?('lpActive')
260
-
261
- false
36
+ response.body
262
37
  end
263
38
  end
264
39
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LighterpackParser
4
- VERSION = '0.1.0'
4
+ VERSION = '1.0.0'
5
5
  end
@@ -1,8 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'lighterpack_parser/version'
4
+ require_relative 'lighterpack_parser/gram_converter'
5
+ require_relative 'lighterpack_parser/item'
6
+ require_relative 'lighterpack_parser/category'
7
+ require_relative 'lighterpack_parser/list'
8
+ require_relative 'lighterpack_parser/item_parser'
9
+ require_relative 'lighterpack_parser/category_parser'
10
+ require_relative 'lighterpack_parser/list_parser'
4
11
  require_relative 'lighterpack_parser/parser'
5
12
 
13
+ # Parser for extracting data from Lighterpack list HTML pages.
14
+ #
15
+ # Provides classes and methods to parse Lighterpack list HTML and extract
16
+ # structured data including list information, categories, and items with their
17
+ # properties (weight, quantity, consumable status, etc.).
6
18
  module LighterpackParser
7
19
  # Convenience method to parse a Lighterpack URL
8
20
  def self.parse_url(url)
@@ -13,11 +13,13 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = 'https://github.com/alex-ross/lighterpack-parser'
14
14
  spec.license = 'MIT'
15
15
 
16
+ spec.required_ruby_version = '>= 3.0'
17
+
16
18
  spec.files = Dir['lib/**/*', 'spec/**/*', '*.md', '*.gemspec']
17
19
  spec.require_paths = ['lib']
18
20
 
19
- spec.add_dependency 'nokogiri', '~> 1.15'
20
21
  spec.add_dependency 'httparty', '~> 0.21'
22
+ spec.add_dependency 'nokogiri', '~> 1.15'
21
23
 
22
24
  spec.add_development_dependency 'rspec', '~> 3.12'
23
25
  end
data/spec/parser_spec.rb CHANGED
@@ -11,31 +11,71 @@ RSpec.describe LighterpackParser::Parser do
11
11
  let(:result) { described_class.new(html: html).parse }
12
12
 
13
13
  it 'extracts the list name' do
14
- expect(result[:name]).to eq('Ultimate Hike 2025')
14
+ expect(result.name).to eq('Ultimate Hike 2025')
15
15
  end
16
16
 
17
17
  it 'extracts categories as an array' do
18
- expect(result[:categories]).to be_a(Array)
19
- expect(result[:categories].length).to be > 0
18
+ expect(result.categories).to be_a(Array)
19
+ expect(result.categories.length).to be > 0
20
20
  end
21
21
 
22
22
  it 'extracts the first category correctly' do
23
- first_category = result[:categories].first
24
- expect(first_category[:name]).to eq('Big 3 (Pack, Tent, Sleep System)')
25
- expect(first_category[:items]).to be_a(Array)
26
- expect(first_category[:items].length).to be > 0
23
+ first_category = result.categories.first
24
+ expect(first_category.name).to eq('Big 3 (Pack, Tent, Sleep System)')
25
+ expect(first_category.items).to be_a(Array)
26
+ expect(first_category.items.length).to be > 0
27
27
  end
28
28
 
29
29
  it 'extracts the first item correctly' do
30
- first_category = result[:categories].first
31
- first_item = first_category[:items].first
30
+ first_category = result.categories.first
31
+ first_item = first_category.items.first
32
+
33
+ expect(first_item.name).to eq('Bonfus Altus 38')
34
+ expect(first_item.description).to eq('With vest styled straps')
35
+ expect(first_item.weight).to be > 0
36
+ expect(first_item.quantity).to eq(1)
37
+ expect(first_item).to_not be_worn
38
+ expect(first_item).to_not be_consumable
39
+ end
40
+
41
+ it 'includes total weight fields' do
42
+ first_category = result.categories.first
43
+ first_item = first_category.items.first
44
+
45
+ expect(first_item.total_weight).to be > 0
46
+ expect(first_item.total_weight).to eq(first_item.weight * first_item.quantity)
47
+
48
+ if first_item.consumable
49
+ expect(first_item.total_consumable_weight).to be > 0
50
+ expect(first_item.total_consumable_weight).to eq(first_item.weight * first_item.quantity)
51
+ else
52
+ expect(first_item.total_consumable_weight).to be_nil
53
+ end
32
54
 
33
- expect(first_item[:name]).to eq('Bonfus Altus 38')
34
- expect(first_item[:description]).to eq('With vest styled straps')
35
- expect(first_item[:weight]).to be > 0
36
- expect(first_item[:quantity]).to eq(1)
37
- expect([true, false]).to include(first_item[:worn])
38
- expect([true, false]).to include(first_item[:consumable])
55
+ if first_item.worn
56
+ expect(first_item.worn_quantity).to eq(1)
57
+ expect(first_item.total_worn_weight).to be > 0
58
+ expect(first_item.total_worn_weight).to eq(first_item.weight * 1)
59
+ else
60
+ expect(first_item.worn_quantity).to be_nil
61
+ expect(first_item.total_worn_weight).to be_nil
62
+ end
63
+ end
64
+
65
+ it 'sets worn_quantity to 1 for worn items regardless of quantity' do
66
+ result.categories.each do |category|
67
+ category.items.each do |item|
68
+ if item.worn
69
+ expect(item.worn_quantity).to eq(1),
70
+ "Worn item #{item.name} should have worn_quantity=1, " \
71
+ "got #{item.worn_quantity}"
72
+ expect(item.total_worn_weight).to eq(item.weight * 1),
73
+ "Worn item #{item.name} should have total_worn_weight = weight * 1"
74
+ else
75
+ expect(item.worn_quantity).to be_nil, "Non-worn item #{item.name} should have worn_quantity=nil"
76
+ end
77
+ end
78
+ end
39
79
  end
40
80
  end
41
81
 
@@ -44,11 +84,11 @@ RSpec.describe LighterpackParser::Parser do
44
84
  let(:result) { described_class.new(html: html).parse }
45
85
 
46
86
  it 'extracts the list name' do
47
- expect(result[:name]).to be_truthy
87
+ expect(result.name).to be_truthy
48
88
  end
49
89
 
50
90
  it 'extracts categories as an array' do
51
- expect(result[:categories]).to be_a(Array)
91
+ expect(result.categories).to be_a(Array)
52
92
  end
53
93
  end
54
94
 
@@ -57,11 +97,11 @@ RSpec.describe LighterpackParser::Parser do
57
97
  let(:result) { described_class.new(html: html).parse }
58
98
 
59
99
  it 'extracts the list name' do
60
- expect(result[:name]).to be_truthy
100
+ expect(result.name).to be_truthy
61
101
  end
62
102
 
63
103
  it 'extracts categories as an array' do
64
- expect(result[:categories]).to be_a(Array)
104
+ expect(result.categories).to be_a(Array)
65
105
  end
66
106
  end
67
107
  end
@@ -71,11 +111,11 @@ RSpec.describe LighterpackParser::Parser do
71
111
  let(:result) { described_class.new(html: html).parse }
72
112
 
73
113
  it 'converts weights to grams correctly' do
74
- result[:categories].each do |category|
75
- category[:items].each do |item|
76
- if item[:weight] > 0
77
- expect(item[:weight]).to be > 0, "Item #{item[:name]} should have weight > 0"
78
- expect(item[:weight]).to be < 1_000_000, "Item #{item[:name]} weight seems too large: #{item[:weight]}"
114
+ result.categories.each do |category|
115
+ category.items.each do |item|
116
+ if item.weight > 0
117
+ expect(item.weight).to be > 0, "Item #{item.name} should have weight > 0"
118
+ expect(item.weight).to be < 1_000_000, "Item #{item.name} weight seems too large: #{item.weight}"
79
119
  end
80
120
  end
81
121
  end
@@ -87,9 +127,9 @@ RSpec.describe LighterpackParser::Parser do
87
127
  let(:result) { described_class.new(html: html).parse }
88
128
 
89
129
  it 'extracts consumable flag as boolean for all items' do
90
- result[:categories].each do |category|
91
- category[:items].each do |item|
92
- expect([true, false]).to include(item[:consumable]), "Consumable should be boolean for #{item[:name]}"
130
+ result.categories.each do |category|
131
+ category.items.each do |item|
132
+ expect([true, false]).to include(item.consumable), "Consumable should be boolean for #{item.name}"
93
133
  end
94
134
  end
95
135
  end
@@ -100,9 +140,9 @@ RSpec.describe LighterpackParser::Parser do
100
140
  let(:result) { described_class.new(html: html).parse }
101
141
 
102
142
  it 'extracts worn flag as boolean for all items' do
103
- result[:categories].each do |category|
104
- category[:items].each do |item|
105
- expect([true, false]).to include(item[:worn]), "Worn should be boolean for #{item[:name]}"
143
+ result.categories.each do |category|
144
+ category.items.each do |item|
145
+ expect([true, false]).to include(item.worn), "Worn should be boolean for #{item.name}"
106
146
  end
107
147
  end
108
148
  end
@@ -111,62 +151,64 @@ RSpec.describe LighterpackParser::Parser do
111
151
  describe 'worn flag correctness for h23rxt.html' do
112
152
  let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
113
153
  let(:result) { described_class.new(html: html).parse }
114
- let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
154
+ let(:all_items) { result.categories.flat_map(&:items) }
115
155
 
116
156
  it 'correctly identifies Sea to Summit Ultrasil as worn' do
117
- ultrasil = all_items.find { |item| item[:name]&.include?('Sea to Summit Ultrasil') }
157
+ ultrasil = all_items.find { |item| item.name&.include?('Sea to Summit Ultrasil') }
118
158
  expect(ultrasil).to be_truthy, 'Should find Sea to Summit Ultrasil item'
119
- expect(ultrasil[:worn]).to eq(true), 'Sea to Summit Ultrasil should be worn'
120
- expect(ultrasil[:consumable]).to eq(false), 'Sea to Summit Ultrasil should NOT be consumable'
159
+ expect(ultrasil.worn).to be(true), 'Sea to Summit Ultrasil should be worn'
160
+ expect(ultrasil.consumable).to be(false), 'Sea to Summit Ultrasil should NOT be consumable'
121
161
  end
122
162
 
123
163
  it 'correctly identifies MacBook Pro as not worn' do
124
- macbook = all_items.find { |item| item[:name]&.include?('MacBook Pro') }
164
+ macbook = all_items.find { |item| item.name&.include?('MacBook Pro') }
125
165
  expect(macbook).to be_truthy, 'Should find MacBook Pro item'
126
- expect(macbook[:worn]).to eq(false), 'MacBook Pro should NOT be worn'
127
- expect(macbook[:consumable]).to eq(false), 'MacBook Pro should NOT be consumable'
166
+ expect(macbook.worn).to be(false), 'MacBook Pro should NOT be worn'
167
+ expect(macbook.consumable).to be(false), 'MacBook Pro should NOT be consumable'
128
168
  end
129
169
  end
130
170
 
131
171
  describe 'consumable flag correctness for h23rxt.html' do
132
172
  let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
133
173
  let(:result) { described_class.new(html: html).parse }
134
- let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
174
+ let(:all_items) { result.categories.flat_map(&:items) }
135
175
 
136
176
  it 'correctly identifies Tandkräm as consumable' do
137
- tandkram = all_items.find { |item| item[:name]&.include?('Tandkräm (innehåll)') }
177
+ tandkram = all_items.find { |item| item.name&.include?('Tandkräm (innehåll)') }
138
178
  expect(tandkram).to be_truthy, 'Should find Tandkräm item'
139
- expect(tandkram[:consumable]).to eq(true), 'Tandkräm should be consumable'
140
- expect(tandkram[:worn]).to eq(false), 'Tandkräm should NOT be worn'
179
+ expect(tandkram.consumable).to be(true), 'Tandkräm should be consumable'
180
+ expect(tandkram.worn).to be(false), 'Tandkräm should NOT be worn'
141
181
  end
142
182
 
143
183
  it 'correctly identifies Dushtvål/Shampoo as consumable' do
144
- shampoo = all_items.find { |item| item[:name]&.include?('Dushtvål') || item[:name]&.include?('Shampoo') }
184
+ shampoo = all_items.find { |item| item.name&.include?('Dushtvål') || item.name&.include?('Shampoo') }
145
185
  expect(shampoo).to be_truthy, 'Should find Dushtvål/Shampoo item'
146
- expect(shampoo[:consumable]).to eq(true), 'Dushtvål/Shampoo should be consumable'
147
- expect(shampoo[:worn]).to eq(false), 'Dushtvål/Shampoo should NOT be worn'
186
+ expect(shampoo.consumable).to be(true), 'Dushtvål/Shampoo should be consumable'
187
+ expect(shampoo.worn).to be(false), 'Dushtvål/Shampoo should NOT be worn'
148
188
  end
149
189
 
150
190
  it 'correctly identifies MacBook Pro as not consumable' do
151
- macbook = all_items.find { |item| item[:name]&.include?('MacBook Pro') }
191
+ macbook = all_items.find { |item| item.name&.include?('MacBook Pro') }
152
192
  expect(macbook).to be_truthy, 'Should find MacBook Pro item'
153
- expect(macbook[:consumable]).to eq(false), 'MacBook Pro should NOT be consumable'
193
+ expect(macbook.consumable).to be(false), 'MacBook Pro should NOT be consumable'
154
194
  end
155
195
  end
156
196
 
157
197
  describe 'worn and consumable counts for h23rxt.html' do
158
198
  let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
159
199
  let(:result) { described_class.new(html: html).parse }
160
- let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
200
+ let(:all_items) { result.categories.flat_map(&:items) }
161
201
  let(:total_items) { all_items.length }
162
- let(:worn_count) { all_items.count { |item| item[:worn] } }
163
- let(:consumable_count) { all_items.count { |item| item[:consumable] } }
202
+ let(:worn_count) { all_items.count(&:worn) }
203
+ let(:consumable_count) { all_items.count(&:consumable) }
164
204
 
165
205
  it 'has reasonable counts of worn and consumable items' do
166
206
  expect(worn_count).to be >= 1, "Should have at least 1 worn item, got #{worn_count}"
167
207
  expect(worn_count).to be <= 5, "Should have at most 5 worn items (most items are not worn), got #{worn_count}"
168
208
  expect(consumable_count).to be >= 2, "Should have at least 2 consumable items, got #{consumable_count}"
169
- expect(consumable_count).to be <= 5, "Should have at most 5 consumable items (most items are not consumable), got #{consumable_count}"
209
+ expect(consumable_count).to be <= 5,
210
+ 'Should have at most 5 consumable items ' \
211
+ "(most items are not consumable), got #{consumable_count}"
170
212
  expect(total_items).to be > 10, "Should have many items total, got #{total_items}"
171
213
  end
172
214
  end
@@ -176,10 +218,10 @@ RSpec.describe LighterpackParser::Parser do
176
218
  let(:result) { described_class.new(html: html).parse }
177
219
 
178
220
  it 'extracts quantities as positive integers' do
179
- result[:categories].each do |category|
180
- category[:items].each do |item|
181
- expect(item[:quantity]).to be_a(Integer), "Quantity should be integer for #{item[:name]}"
182
- expect(item[:quantity]).to be > 0, "Quantity should be > 0 for #{item[:name]}"
221
+ result.categories.each do |category|
222
+ category.items.each do |item|
223
+ expect(item.quantity).to be_a(Integer), "Quantity should be integer for #{item.name}"
224
+ expect(item.quantity).to be > 0, "Quantity should be > 0 for #{item.name}"
183
225
  end
184
226
  end
185
227
  end
@@ -191,10 +233,10 @@ RSpec.describe LighterpackParser::Parser do
191
233
 
192
234
  it 'extracts image URLs correctly' do
193
235
  items_with_images = 0
194
- result[:categories].each do |category|
195
- category[:items].each do |item|
196
- if item[:image_url]
197
- expect(item[:image_url]).to start_with('http'), "Image URL should start with http for #{item[:name]}"
236
+ result.categories.each do |category|
237
+ category.items.each do |item|
238
+ if item.image_url
239
+ expect(item.image_url).to start_with('http'), "Image URL should start with http for #{item.name}"
198
240
  items_with_images += 1
199
241
  end
200
242
  end
@@ -209,9 +251,9 @@ RSpec.describe LighterpackParser::Parser do
209
251
  let(:result) { described_class.new(html: html).parse }
210
252
 
211
253
  it 'extracts category descriptions when available' do
212
- result[:categories].each do |category|
213
- expect(category[:description]).to be_nil.or(be_a(String)),
214
- "Description should be nil or string for category #{category[:name]}"
254
+ result.categories.each do |category|
255
+ expect(category.description).to be_nil.or(be_a(String)),
256
+ "Description should be nil or string for category #{category.name}"
215
257
  end
216
258
  end
217
259
  end
metadata CHANGED
@@ -1,43 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lighterpack-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Packlista Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-11-29 00:00:00.000000000 Z
11
+ date: 2026-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: nokogiri
14
+ name: httparty
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.15'
19
+ version: '0.21'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.15'
26
+ version: '0.21'
27
27
  - !ruby/object:Gem::Dependency
28
- name: httparty
28
+ name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.21'
33
+ version: '1.15'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0.21'
40
+ version: '1.15'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -62,6 +62,13 @@ extra_rdoc_files: []
62
62
  files:
63
63
  - README.md
64
64
  - lib/lighterpack_parser.rb
65
+ - lib/lighterpack_parser/category.rb
66
+ - lib/lighterpack_parser/category_parser.rb
67
+ - lib/lighterpack_parser/gram_converter.rb
68
+ - lib/lighterpack_parser/item.rb
69
+ - lib/lighterpack_parser/item_parser.rb
70
+ - lib/lighterpack_parser/list.rb
71
+ - lib/lighterpack_parser/list_parser.rb
65
72
  - lib/lighterpack_parser/parser.rb
66
73
  - lib/lighterpack_parser/version.rb
67
74
  - lighterpack-parser.gemspec
@@ -82,7 +89,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
82
89
  requirements:
83
90
  - - ">="
84
91
  - !ruby/object:Gem::Version
85
- version: '0'
92
+ version: '3.0'
86
93
  required_rubygems_version: !ruby/object:Gem::Requirement
87
94
  requirements:
88
95
  - - ">="