lighterpack-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +108 -0
- data/lib/lighterpack_parser/parser.rb +264 -0
- data/lib/lighterpack_parser/version.rb +5 -0
- data/lib/lighterpack_parser.rb +11 -0
- data/lighterpack-parser.gemspec +23 -0
- data/spec/fixtures/adbf7c.html +2911 -0
- data/spec/fixtures/b6q1kr.html +2948 -0
- data/spec/fixtures/h23rxt.html +1660 -0
- data/spec/parser_spec.rb +218 -0
- data/spec/spec_helper.rb +15 -0
- metadata +96 -0
data/spec/parser_spec.rb
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe LighterpackParser::Parser do
|
|
6
|
+
let(:fixture_dir) { File.join(__dir__, 'fixtures') }
|
|
7
|
+
|
|
8
|
+
describe '#parse' do
|
|
9
|
+
context 'with b6q1kr.html' do
|
|
10
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
11
|
+
let(:result) { described_class.new(html: html).parse }
|
|
12
|
+
|
|
13
|
+
it 'extracts the list name' do
|
|
14
|
+
expect(result[:name]).to eq('Ultimate Hike 2025')
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it 'extracts categories as an array' do
|
|
18
|
+
expect(result[:categories]).to be_a(Array)
|
|
19
|
+
expect(result[:categories].length).to be > 0
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'extracts the first category correctly' do
|
|
23
|
+
first_category = result[:categories].first
|
|
24
|
+
expect(first_category[:name]).to eq('Big 3 (Pack, Tent, Sleep System)')
|
|
25
|
+
expect(first_category[:items]).to be_a(Array)
|
|
26
|
+
expect(first_category[:items].length).to be > 0
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it 'extracts the first item correctly' do
|
|
30
|
+
first_category = result[:categories].first
|
|
31
|
+
first_item = first_category[:items].first
|
|
32
|
+
|
|
33
|
+
expect(first_item[:name]).to eq('Bonfus Altus 38')
|
|
34
|
+
expect(first_item[:description]).to eq('With vest styled straps')
|
|
35
|
+
expect(first_item[:weight]).to be > 0
|
|
36
|
+
expect(first_item[:quantity]).to eq(1)
|
|
37
|
+
expect([true, false]).to include(first_item[:worn])
|
|
38
|
+
expect([true, false]).to include(first_item[:consumable])
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
context 'with adbf7c.html' do
|
|
43
|
+
let(:html) { File.read(File.join(fixture_dir, 'adbf7c.html')) }
|
|
44
|
+
let(:result) { described_class.new(html: html).parse }
|
|
45
|
+
|
|
46
|
+
it 'extracts the list name' do
|
|
47
|
+
expect(result[:name]).to be_truthy
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
it 'extracts categories as an array' do
|
|
51
|
+
expect(result[:categories]).to be_a(Array)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
context 'with h23rxt.html' do
|
|
56
|
+
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
57
|
+
let(:result) { described_class.new(html: html).parse }
|
|
58
|
+
|
|
59
|
+
it 'extracts the list name' do
|
|
60
|
+
expect(result[:name]).to be_truthy
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
it 'extracts categories as an array' do
|
|
64
|
+
expect(result[:categories]).to be_a(Array)
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
describe 'weight conversion' do
|
|
70
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
71
|
+
let(:result) { described_class.new(html: html).parse }
|
|
72
|
+
|
|
73
|
+
it 'converts weights to grams correctly' do
|
|
74
|
+
result[:categories].each do |category|
|
|
75
|
+
category[:items].each do |item|
|
|
76
|
+
if item[:weight] > 0
|
|
77
|
+
expect(item[:weight]).to be > 0, "Item #{item[:name]} should have weight > 0"
|
|
78
|
+
expect(item[:weight]).to be < 1_000_000, "Item #{item[:name]} weight seems too large: #{item[:weight]}"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
describe 'consumable flag extraction' do
|
|
86
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
87
|
+
let(:result) { described_class.new(html: html).parse }
|
|
88
|
+
|
|
89
|
+
it 'extracts consumable flag as boolean for all items' do
|
|
90
|
+
result[:categories].each do |category|
|
|
91
|
+
category[:items].each do |item|
|
|
92
|
+
expect([true, false]).to include(item[:consumable]), "Consumable should be boolean for #{item[:name]}"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
describe 'worn flag extraction' do
|
|
99
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
100
|
+
let(:result) { described_class.new(html: html).parse }
|
|
101
|
+
|
|
102
|
+
it 'extracts worn flag as boolean for all items' do
|
|
103
|
+
result[:categories].each do |category|
|
|
104
|
+
category[:items].each do |item|
|
|
105
|
+
expect([true, false]).to include(item[:worn]), "Worn should be boolean for #{item[:name]}"
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
describe 'worn flag correctness for h23rxt.html' do
|
|
112
|
+
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
113
|
+
let(:result) { described_class.new(html: html).parse }
|
|
114
|
+
let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
|
|
115
|
+
|
|
116
|
+
it 'correctly identifies Sea to Summit Ultrasil as worn' do
|
|
117
|
+
ultrasil = all_items.find { |item| item[:name]&.include?('Sea to Summit Ultrasil') }
|
|
118
|
+
expect(ultrasil).to be_truthy, 'Should find Sea to Summit Ultrasil item'
|
|
119
|
+
expect(ultrasil[:worn]).to eq(true), 'Sea to Summit Ultrasil should be worn'
|
|
120
|
+
expect(ultrasil[:consumable]).to eq(false), 'Sea to Summit Ultrasil should NOT be consumable'
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
it 'correctly identifies MacBook Pro as not worn' do
|
|
124
|
+
macbook = all_items.find { |item| item[:name]&.include?('MacBook Pro') }
|
|
125
|
+
expect(macbook).to be_truthy, 'Should find MacBook Pro item'
|
|
126
|
+
expect(macbook[:worn]).to eq(false), 'MacBook Pro should NOT be worn'
|
|
127
|
+
expect(macbook[:consumable]).to eq(false), 'MacBook Pro should NOT be consumable'
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
describe 'consumable flag correctness for h23rxt.html' do
|
|
132
|
+
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
133
|
+
let(:result) { described_class.new(html: html).parse }
|
|
134
|
+
let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
|
|
135
|
+
|
|
136
|
+
it 'correctly identifies Tandkräm as consumable' do
|
|
137
|
+
tandkram = all_items.find { |item| item[:name]&.include?('Tandkräm (innehåll)') }
|
|
138
|
+
expect(tandkram).to be_truthy, 'Should find Tandkräm item'
|
|
139
|
+
expect(tandkram[:consumable]).to eq(true), 'Tandkräm should be consumable'
|
|
140
|
+
expect(tandkram[:worn]).to eq(false), 'Tandkräm should NOT be worn'
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
it 'correctly identifies Dushtvål/Shampoo as consumable' do
|
|
144
|
+
shampoo = all_items.find { |item| item[:name]&.include?('Dushtvål') || item[:name]&.include?('Shampoo') }
|
|
145
|
+
expect(shampoo).to be_truthy, 'Should find Dushtvål/Shampoo item'
|
|
146
|
+
expect(shampoo[:consumable]).to eq(true), 'Dushtvål/Shampoo should be consumable'
|
|
147
|
+
expect(shampoo[:worn]).to eq(false), 'Dushtvål/Shampoo should NOT be worn'
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it 'correctly identifies MacBook Pro as not consumable' do
|
|
151
|
+
macbook = all_items.find { |item| item[:name]&.include?('MacBook Pro') }
|
|
152
|
+
expect(macbook).to be_truthy, 'Should find MacBook Pro item'
|
|
153
|
+
expect(macbook[:consumable]).to eq(false), 'MacBook Pro should NOT be consumable'
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
describe 'worn and consumable counts for h23rxt.html' do
|
|
158
|
+
let(:html) { File.read(File.join(fixture_dir, 'h23rxt.html')) }
|
|
159
|
+
let(:result) { described_class.new(html: html).parse }
|
|
160
|
+
let(:all_items) { result[:categories].flat_map { |cat| cat[:items] } }
|
|
161
|
+
let(:total_items) { all_items.length }
|
|
162
|
+
let(:worn_count) { all_items.count { |item| item[:worn] } }
|
|
163
|
+
let(:consumable_count) { all_items.count { |item| item[:consumable] } }
|
|
164
|
+
|
|
165
|
+
it 'has reasonable counts of worn and consumable items' do
|
|
166
|
+
expect(worn_count).to be >= 1, "Should have at least 1 worn item, got #{worn_count}"
|
|
167
|
+
expect(worn_count).to be <= 5, "Should have at most 5 worn items (most items are not worn), got #{worn_count}"
|
|
168
|
+
expect(consumable_count).to be >= 2, "Should have at least 2 consumable items, got #{consumable_count}"
|
|
169
|
+
expect(consumable_count).to be <= 5, "Should have at most 5 consumable items (most items are not consumable), got #{consumable_count}"
|
|
170
|
+
expect(total_items).to be > 10, "Should have many items total, got #{total_items}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
describe 'quantity extraction' do
|
|
175
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
176
|
+
let(:result) { described_class.new(html: html).parse }
|
|
177
|
+
|
|
178
|
+
it 'extracts quantities as positive integers' do
|
|
179
|
+
result[:categories].each do |category|
|
|
180
|
+
category[:items].each do |item|
|
|
181
|
+
expect(item[:quantity]).to be_a(Integer), "Quantity should be integer for #{item[:name]}"
|
|
182
|
+
expect(item[:quantity]).to be > 0, "Quantity should be > 0 for #{item[:name]}"
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
describe 'image URL extraction' do
|
|
189
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
190
|
+
let(:result) { described_class.new(html: html).parse }
|
|
191
|
+
|
|
192
|
+
it 'extracts image URLs correctly' do
|
|
193
|
+
items_with_images = 0
|
|
194
|
+
result[:categories].each do |category|
|
|
195
|
+
category[:items].each do |item|
|
|
196
|
+
if item[:image_url]
|
|
197
|
+
expect(item[:image_url]).to start_with('http'), "Image URL should start with http for #{item[:name]}"
|
|
198
|
+
items_with_images += 1
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
expect(items_with_images).to be > 0, 'At least some items should have image URLs'
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
describe 'category description extraction' do
|
|
208
|
+
let(:html) { File.read(File.join(fixture_dir, 'b6q1kr.html')) }
|
|
209
|
+
let(:result) { described_class.new(html: html).parse }
|
|
210
|
+
|
|
211
|
+
it 'extracts category descriptions when available' do
|
|
212
|
+
result[:categories].each do |category|
|
|
213
|
+
expect(category[:description]).to be_nil.or(be_a(String)),
|
|
214
|
+
"Description should be nil or string for category #{category[:name]}"
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../lib/lighterpack_parser'
|
|
4
|
+
|
|
5
|
+
RSpec.configure do |config|
|
|
6
|
+
# Enable flags like --only-failures and --next-failure
|
|
7
|
+
config.example_status_persistence_file_path = '.rspec_status'
|
|
8
|
+
|
|
9
|
+
# Disable RSpec exposing methods globally on `Module` and `main`
|
|
10
|
+
config.disable_monkey_patching!
|
|
11
|
+
|
|
12
|
+
config.expect_with :rspec do |c|
|
|
13
|
+
c.syntax = :expect
|
|
14
|
+
end
|
|
15
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lighterpack-parser
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Packlista Team
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-11-29 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: nokogiri
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.15'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.15'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: httparty
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0.21'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0.21'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.12'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.12'
|
|
55
|
+
description: Parse Lighterpack HTML to extract list data including categories, items,
|
|
56
|
+
weights, and metadata
|
|
57
|
+
email:
|
|
58
|
+
- team@packlista.com
|
|
59
|
+
executables: []
|
|
60
|
+
extensions: []
|
|
61
|
+
extra_rdoc_files: []
|
|
62
|
+
files:
|
|
63
|
+
- README.md
|
|
64
|
+
- lib/lighterpack_parser.rb
|
|
65
|
+
- lib/lighterpack_parser/parser.rb
|
|
66
|
+
- lib/lighterpack_parser/version.rb
|
|
67
|
+
- lighterpack-parser.gemspec
|
|
68
|
+
- spec/fixtures/adbf7c.html
|
|
69
|
+
- spec/fixtures/b6q1kr.html
|
|
70
|
+
- spec/fixtures/h23rxt.html
|
|
71
|
+
- spec/parser_spec.rb
|
|
72
|
+
- spec/spec_helper.rb
|
|
73
|
+
homepage: https://github.com/alex-ross/lighterpack-parser
|
|
74
|
+
licenses:
|
|
75
|
+
- MIT
|
|
76
|
+
metadata: {}
|
|
77
|
+
post_install_message:
|
|
78
|
+
rdoc_options: []
|
|
79
|
+
require_paths:
|
|
80
|
+
- lib
|
|
81
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
82
|
+
requirements:
|
|
83
|
+
- - ">="
|
|
84
|
+
- !ruby/object:Gem::Version
|
|
85
|
+
version: '0'
|
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
87
|
+
requirements:
|
|
88
|
+
- - ">="
|
|
89
|
+
- !ruby/object:Gem::Version
|
|
90
|
+
version: '0'
|
|
91
|
+
requirements: []
|
|
92
|
+
rubygems_version: 3.5.22
|
|
93
|
+
signing_key:
|
|
94
|
+
specification_version: 4
|
|
95
|
+
summary: Parser for Lighterpack lists
|
|
96
|
+
test_files: []
|