lcbo 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +3 -3
- data/examples/crawlers/product_lists_crawler.rb +2 -2
- data/lcbo.gemspec +1 -0
- data/lib/lcbo/crawlkit.rb +2 -0
- data/lib/lcbo/crawlkit/tag_helper.rb +41 -0
- data/lib/lcbo/pages/product_page.rb +11 -0
- data/lib/lcbo/pages/store_page.rb +20 -10
- data/lib/lcbo/version.rb +1 -1
- data/spec/crawlkit/tag_helper_spec.rb +18 -0
- data/spec/pages/product_pages.yml +8 -0
- data/spec/pages/store_pages.yml +2 -0
- metadata +19 -3
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,9 +26,9 @@ store list pages directly from the [LCBO](http://lcbo.com) website.
|
|
26
26
|
## Crawlers
|
27
27
|
|
28
28
|
Some examples of crawlers exist
|
29
|
-
[here](
|
30
|
-
check out the
|
31
|
-
[crawler spec](
|
29
|
+
[here](https://github.com/heycarsten/lcbo/tree/master/examples/crawlers).
|
30
|
+
You can also check out the
|
31
|
+
[crawler spec](https://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
|
32
32
|
to see how to interact with them.
|
33
33
|
|
34
34
|
## Installation
|
@@ -1,6 +1,6 @@
|
|
1
1
|
class ProductListsCrawler
|
2
2
|
|
3
|
-
include CrawlKit::Crawler
|
3
|
+
include LCBO::CrawlKit::Crawler
|
4
4
|
|
5
5
|
def request(params)
|
6
6
|
LCBO.product_list(params[:next_page] || 1)
|
@@ -11,7 +11,7 @@ class ProductListsCrawler
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def reduce
|
14
|
-
|
14
|
+
responses.map { |params| params[:product_nos] }.flatten
|
15
15
|
end
|
16
16
|
|
17
17
|
end
|
data/lcbo.gemspec
CHANGED
data/lib/lcbo/crawlkit.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'typhoeus'
|
3
3
|
require 'unicode_utils'
|
4
|
+
require 'stringex'
|
4
5
|
require 'uri'
|
5
6
|
|
6
7
|
module LCBO
|
@@ -23,6 +24,7 @@ require 'lcbo/crawlkit/fastdate_helper'
|
|
23
24
|
require 'lcbo/crawlkit/page'
|
24
25
|
require 'lcbo/crawlkit/request'
|
25
26
|
require 'lcbo/crawlkit/response'
|
27
|
+
require 'lcbo/crawlkit/tag_helper'
|
26
28
|
require 'lcbo/crawlkit/request_prototype'
|
27
29
|
require 'lcbo/crawlkit/crawler'
|
28
30
|
require 'lcbo/crawlkit/titlecase_helper'
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module LCBO
|
2
|
+
module CrawlKit
|
3
|
+
module TagHelper
|
4
|
+
DELETION_RE = /\'|\"|\\|\/|\(|\)|\[|\]|\./
|
5
|
+
WHITESPACE_RE = /\*|\+|\&|\_|\,|\s/
|
6
|
+
|
7
|
+
def self.flatten(values)
|
8
|
+
TitleCaseHelper.downcase(values.flatten.join(' ')).
|
9
|
+
gsub(DELETION_RE, '').
|
10
|
+
gsub(WHITESPACE_RE, ' ').
|
11
|
+
strip
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.split(str)
|
15
|
+
[str, str.to_ascii].
|
16
|
+
join(' ').
|
17
|
+
split.
|
18
|
+
map { |word| stem(word) }.
|
19
|
+
flatten.
|
20
|
+
uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.stem(word)
|
24
|
+
if word.include?('-')
|
25
|
+
parts = word.split('-')
|
26
|
+
a = parts.dup
|
27
|
+
a << parts.join
|
28
|
+
a
|
29
|
+
else
|
30
|
+
word
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.[](*values)
|
35
|
+
return [] if values.any? { |val| '' == val.to_s.strip }
|
36
|
+
split(flatten(values))
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -19,6 +19,17 @@ module LCBO
|
|
19
19
|
CrawlKit::TitleCaseHelper[product_details_form('itemName')]
|
20
20
|
end
|
21
21
|
|
22
|
+
emits :tags do
|
23
|
+
CrawlKit::TagHelper[
|
24
|
+
name,
|
25
|
+
primary_category,
|
26
|
+
secondary_category,
|
27
|
+
origin,
|
28
|
+
producer_name,
|
29
|
+
package_unit_type
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
22
33
|
emits :price_in_cents do
|
23
34
|
(product_details_form('price').to_f * 100).to_i
|
24
35
|
end
|
@@ -36,20 +36,20 @@ module LCBO
|
|
36
36
|
query_params[:store_no].to_i
|
37
37
|
end
|
38
38
|
|
39
|
-
DAY_NAMES.each do |day|
|
40
|
-
emits :"#{day}_open" do
|
41
|
-
time_open_close(day)[0]
|
42
|
-
end
|
43
|
-
|
44
|
-
emits :"#{day}_close" do
|
45
|
-
time_open_close(day)[1]
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
39
|
emits :name do
|
50
40
|
CrawlKit::TitleCaseHelper[info_nodes[1].content.strip]
|
51
41
|
end
|
52
42
|
|
43
|
+
emits :tags do
|
44
|
+
CrawlKit::TagHelper[
|
45
|
+
name,
|
46
|
+
address_line_1,
|
47
|
+
address_line_2,
|
48
|
+
city,
|
49
|
+
postal_code
|
50
|
+
]
|
51
|
+
end
|
52
|
+
|
53
53
|
emits :address_line_1 do
|
54
54
|
data = info_nodes[2].content.strip.split(',')[0]
|
55
55
|
unless data
|
@@ -99,6 +99,16 @@ module LCBO
|
|
99
99
|
location['longitude'][0].to_f
|
100
100
|
end
|
101
101
|
|
102
|
+
DAY_NAMES.each do |day|
|
103
|
+
emits :"#{day}_open" do
|
104
|
+
time_open_close(day)[0]
|
105
|
+
end
|
106
|
+
|
107
|
+
emits :"#{day}_close" do
|
108
|
+
time_open_close(day)[1]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
102
112
|
DETAIL_FIELDS.keys.each do |field|
|
103
113
|
emits(field) { details[field] }
|
104
114
|
end
|
data/lib/lcbo/version.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe LCBO::CrawlKit::TagHelper do
|
5
|
+
@expectations = {
|
6
|
+
['Hello World'] => %w[hello world],
|
7
|
+
['Éve Picard'] => %w[éve picard eve],
|
8
|
+
['Hello Hello World'] => %w[hello world],
|
9
|
+
['Hello', 'Éve Picard'] => %w[hello éve picard eve],
|
10
|
+
['Hello', 'Éve-Picard'] => %w[hello éve picard évepicard eve evepicard]
|
11
|
+
}
|
12
|
+
|
13
|
+
@expectations.each_pair do |input, expectation|
|
14
|
+
it "should tagify: #{input.inspect} to: #{expectation.inspect}" do
|
15
|
+
LCBO::CrawlKit::TagHelper[*input].must_equal expectation
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -7,6 +7,7 @@
|
|
7
7
|
:expectation:
|
8
8
|
:product_no: 122408
|
9
9
|
:name: Pascual Toso Malbec Rose
|
10
|
+
:tags: []
|
10
11
|
:price_in_cents: 1195
|
11
12
|
:regular_price_in_cents: 1195
|
12
13
|
:limited_time_offer_savings_in_cents: 0
|
@@ -47,6 +48,7 @@
|
|
47
48
|
:expectation:
|
48
49
|
:product_no: 18
|
49
50
|
:name: Heineken Lager
|
51
|
+
:tags: ["heineken", "lager", "beer", "netherlands", "heinekens", "brouwerijen", "nederland", "bv", "bottle"]
|
50
52
|
:price_in_cents: 1250
|
51
53
|
:regular_price_in_cents: 1350
|
52
54
|
:limited_time_offer_savings_in_cents: 100
|
@@ -87,6 +89,7 @@
|
|
87
89
|
:expectation:
|
88
90
|
:product_no: 280461
|
89
91
|
:name: Dom Pérignon Brut Champagne
|
92
|
+
:tags: ["dom", "pérignon", "brut", "champagne", "wine", "france", "moet", "chandon", "sa", "bottle", "perignon"]
|
90
93
|
:price_in_cents: 21995
|
91
94
|
:regular_price_in_cents: 21995
|
92
95
|
:limited_time_offer_savings_in_cents: 0
|
@@ -127,6 +130,7 @@
|
|
127
130
|
:expectation:
|
128
131
|
:product_no: 479154
|
129
132
|
:name: Floris Ninkeberry Gardenbeer
|
133
|
+
:tags: ["floris", "ninkeberry", "gardenbeer", "beer", "ale", "belgium", "brouwerij", "huyghe", "bottle"]
|
130
134
|
:price_in_cents: 250
|
131
135
|
:regular_price_in_cents: 250
|
132
136
|
:limited_time_offer_savings_in_cents: 0
|
@@ -167,6 +171,7 @@
|
|
167
171
|
:expectation:
|
168
172
|
:product_no: 148528
|
169
173
|
:name: Bodegas Ruconia Rey Don Garcia El de Najera Crianza 2005
|
174
|
+
:tags: ["bodegas", "ruconia", "rey", "don", "garcia", "el", "de", "najera", "crianza", "2005", "wine", "red", "rioja", "spain", "sl", "bottle"]
|
170
175
|
:price_in_cents: 1595
|
171
176
|
:regular_price_in_cents: 1595
|
172
177
|
:limited_time_offer_savings_in_cents: 0
|
@@ -207,6 +212,7 @@
|
|
207
212
|
:expectation:
|
208
213
|
:product_no: 582973
|
209
214
|
:name: Bombay Sapphire
|
215
|
+
:tags: ["bombay", "sapphire", "spirits", "gin", "england", "united", "kingdom", "company", "ltd", "bottle"]
|
210
216
|
:price_in_cents: 5545
|
211
217
|
:regular_price_in_cents: 5545
|
212
218
|
:limited_time_offer_savings_in_cents: 0
|
@@ -247,6 +253,7 @@
|
|
247
253
|
:expectation:
|
248
254
|
:product_no: 2183079
|
249
255
|
:name: Lake & River Series Cabernet Franc VQA
|
256
|
+
:tags: []
|
250
257
|
:price_in_cents: 1095
|
251
258
|
:regular_price_in_cents: 1095
|
252
259
|
:limited_time_offer_savings_in_cents: 0
|
@@ -287,6 +294,7 @@
|
|
287
294
|
:expectation:
|
288
295
|
:product_no: 114173
|
289
296
|
:name: 05 Sauv Blanc Weinstock Cellar K
|
297
|
+
:tags: []
|
290
298
|
:price_in_cents: 2295
|
291
299
|
:regular_price_in_cents: 2295
|
292
300
|
:limited_time_offer_savings_in_cents: 0
|
data/spec/pages/store_pages.yml
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
:expectation:
|
8
8
|
:store_no: 444
|
9
9
|
:name: Kennedy & Sheppard
|
10
|
+
:tags: ["kennedy", "sheppard", "2356", "road", "agincourt", "mall", "toronto", "scarborough", "torontoscarborough", "m1t3h1"]
|
10
11
|
:address_line_1: 2356 Kennedy Road
|
11
12
|
:address_line_2: Agincourt Mall
|
12
13
|
:city: Toronto-Scarborough
|
@@ -61,6 +62,7 @@
|
|
61
62
|
:sunday_open: 720
|
62
63
|
:sunday_close: 960
|
63
64
|
:name: Flora & Clinton Street
|
65
|
+
:tags: ["flora", "clinton", "street", "1", "industrial", "road", "unit", "b", "teeswater", "n0g2s0"]
|
64
66
|
:address_line_1: 1 Industrial Road
|
65
67
|
:address_line_2: Unit B
|
66
68
|
:city: Teeswater
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 10
|
8
|
-
-
|
9
|
-
version: 0.10.
|
8
|
+
- 1
|
9
|
+
version: 0.10.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Carsten Nielsen
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-30 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -56,6 +56,19 @@ dependencies:
|
|
56
56
|
version: "0"
|
57
57
|
type: :runtime
|
58
58
|
version_requirements: *id003
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: stringex
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
type: :runtime
|
71
|
+
version_requirements: *id004
|
59
72
|
description: Request and parse product, store, inventory, and product search pages directly from the official LCBO website.
|
60
73
|
email:
|
61
74
|
- heycarsten@gmail.com
|
@@ -89,6 +102,7 @@ files:
|
|
89
102
|
- lib/lcbo/crawlkit/request.rb
|
90
103
|
- lib/lcbo/crawlkit/request_prototype.rb
|
91
104
|
- lib/lcbo/crawlkit/response.rb
|
105
|
+
- lib/lcbo/crawlkit/tag_helper.rb
|
92
106
|
- lib/lcbo/crawlkit/titlecase_helper.rb
|
93
107
|
- lib/lcbo/crawlkit/volume_helper.rb
|
94
108
|
- lib/lcbo/ext.rb
|
@@ -107,6 +121,7 @@ files:
|
|
107
121
|
- spec/crawlkit/request_prototype_spec.rb
|
108
122
|
- spec/crawlkit/request_spec.rb
|
109
123
|
- spec/crawlkit/response_spec.rb
|
124
|
+
- spec/crawlkit/tag_helper_spec.rb
|
110
125
|
- spec/crawlkit/titlecase_helper_spec.rb
|
111
126
|
- spec/crawlkit/volume_helper_spec.rb
|
112
127
|
- spec/crawlkit_spec.rb
|
@@ -174,6 +189,7 @@ test_files:
|
|
174
189
|
- spec/crawlkit/request_prototype_spec.rb
|
175
190
|
- spec/crawlkit/request_spec.rb
|
176
191
|
- spec/crawlkit/response_spec.rb
|
192
|
+
- spec/crawlkit/tag_helper_spec.rb
|
177
193
|
- spec/crawlkit/titlecase_helper_spec.rb
|
178
194
|
- spec/crawlkit/volume_helper_spec.rb
|
179
195
|
- spec/crawlkit_spec.rb
|