lcbo 0.10.0 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +5 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +3 -3
- data/examples/crawlers/product_lists_crawler.rb +2 -2
- data/lcbo.gemspec +1 -0
- data/lib/lcbo/crawlkit.rb +2 -0
- data/lib/lcbo/crawlkit/tag_helper.rb +41 -0
- data/lib/lcbo/pages/product_page.rb +11 -0
- data/lib/lcbo/pages/store_page.rb +20 -10
- data/lib/lcbo/version.rb +1 -1
- data/spec/crawlkit/tag_helper_spec.rb +18 -0
- data/spec/pages/product_pages.yml +8 -0
- data/spec/pages/store_pages.yml +2 -0
- metadata +19 -3
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -26,9 +26,9 @@ store list pages directly from the [LCBO](http://lcbo.com) website.
|
|
26
26
|
## Crawlers
|
27
27
|
|
28
28
|
Some examples of crawlers exist
|
29
|
-
[here](
|
30
|
-
check out the
|
31
|
-
[crawler spec](
|
29
|
+
[here](https://github.com/heycarsten/lcbo/tree/master/examples/crawlers).
|
30
|
+
You can also check out the
|
31
|
+
[crawler spec](https://github.com/heycarsten/lcbo/blob/master/spec/crawlkit/crawler_spec.rb)
|
32
32
|
to see how to interact with them.
|
33
33
|
|
34
34
|
## Installation
|
@@ -1,6 +1,6 @@
|
|
1
1
|
class ProductListsCrawler
|
2
2
|
|
3
|
-
include CrawlKit::Crawler
|
3
|
+
include LCBO::CrawlKit::Crawler
|
4
4
|
|
5
5
|
def request(params)
|
6
6
|
LCBO.product_list(params[:next_page] || 1)
|
@@ -11,7 +11,7 @@ class ProductListsCrawler
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def reduce
|
14
|
-
|
14
|
+
responses.map { |params| params[:product_nos] }.flatten
|
15
15
|
end
|
16
16
|
|
17
17
|
end
|
data/lcbo.gemspec
CHANGED
data/lib/lcbo/crawlkit.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'typhoeus'
|
3
3
|
require 'unicode_utils'
|
4
|
+
require 'stringex'
|
4
5
|
require 'uri'
|
5
6
|
|
6
7
|
module LCBO
|
@@ -23,6 +24,7 @@ require 'lcbo/crawlkit/fastdate_helper'
|
|
23
24
|
require 'lcbo/crawlkit/page'
|
24
25
|
require 'lcbo/crawlkit/request'
|
25
26
|
require 'lcbo/crawlkit/response'
|
27
|
+
require 'lcbo/crawlkit/tag_helper'
|
26
28
|
require 'lcbo/crawlkit/request_prototype'
|
27
29
|
require 'lcbo/crawlkit/crawler'
|
28
30
|
require 'lcbo/crawlkit/titlecase_helper'
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module LCBO
|
2
|
+
module CrawlKit
|
3
|
+
module TagHelper
|
4
|
+
DELETION_RE = /\'|\"|\\|\/|\(|\)|\[|\]|\./
|
5
|
+
WHITESPACE_RE = /\*|\+|\&|\_|\,|\s/
|
6
|
+
|
7
|
+
def self.flatten(values)
|
8
|
+
TitleCaseHelper.downcase(values.flatten.join(' ')).
|
9
|
+
gsub(DELETION_RE, '').
|
10
|
+
gsub(WHITESPACE_RE, ' ').
|
11
|
+
strip
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.split(str)
|
15
|
+
[str, str.to_ascii].
|
16
|
+
join(' ').
|
17
|
+
split.
|
18
|
+
map { |word| stem(word) }.
|
19
|
+
flatten.
|
20
|
+
uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.stem(word)
|
24
|
+
if word.include?('-')
|
25
|
+
parts = word.split('-')
|
26
|
+
a = parts.dup
|
27
|
+
a << parts.join
|
28
|
+
a
|
29
|
+
else
|
30
|
+
word
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.[](*values)
|
35
|
+
return [] if values.any? { |val| '' == val.to_s.strip }
|
36
|
+
split(flatten(values))
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -19,6 +19,17 @@ module LCBO
|
|
19
19
|
CrawlKit::TitleCaseHelper[product_details_form('itemName')]
|
20
20
|
end
|
21
21
|
|
22
|
+
emits :tags do
|
23
|
+
CrawlKit::TagHelper[
|
24
|
+
name,
|
25
|
+
primary_category,
|
26
|
+
secondary_category,
|
27
|
+
origin,
|
28
|
+
producer_name,
|
29
|
+
package_unit_type
|
30
|
+
]
|
31
|
+
end
|
32
|
+
|
22
33
|
emits :price_in_cents do
|
23
34
|
(product_details_form('price').to_f * 100).to_i
|
24
35
|
end
|
@@ -36,20 +36,20 @@ module LCBO
|
|
36
36
|
query_params[:store_no].to_i
|
37
37
|
end
|
38
38
|
|
39
|
-
DAY_NAMES.each do |day|
|
40
|
-
emits :"#{day}_open" do
|
41
|
-
time_open_close(day)[0]
|
42
|
-
end
|
43
|
-
|
44
|
-
emits :"#{day}_close" do
|
45
|
-
time_open_close(day)[1]
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
39
|
emits :name do
|
50
40
|
CrawlKit::TitleCaseHelper[info_nodes[1].content.strip]
|
51
41
|
end
|
52
42
|
|
43
|
+
emits :tags do
|
44
|
+
CrawlKit::TagHelper[
|
45
|
+
name,
|
46
|
+
address_line_1,
|
47
|
+
address_line_2,
|
48
|
+
city,
|
49
|
+
postal_code
|
50
|
+
]
|
51
|
+
end
|
52
|
+
|
53
53
|
emits :address_line_1 do
|
54
54
|
data = info_nodes[2].content.strip.split(',')[0]
|
55
55
|
unless data
|
@@ -99,6 +99,16 @@ module LCBO
|
|
99
99
|
location['longitude'][0].to_f
|
100
100
|
end
|
101
101
|
|
102
|
+
DAY_NAMES.each do |day|
|
103
|
+
emits :"#{day}_open" do
|
104
|
+
time_open_close(day)[0]
|
105
|
+
end
|
106
|
+
|
107
|
+
emits :"#{day}_close" do
|
108
|
+
time_open_close(day)[1]
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
102
112
|
DETAIL_FIELDS.keys.each do |field|
|
103
113
|
emits(field) { details[field] }
|
104
114
|
end
|
data/lib/lcbo/version.rb
CHANGED
@@ -0,0 +1,18 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe LCBO::CrawlKit::TagHelper do
|
5
|
+
@expectations = {
|
6
|
+
['Hello World'] => %w[hello world],
|
7
|
+
['Éve Picard'] => %w[éve picard eve],
|
8
|
+
['Hello Hello World'] => %w[hello world],
|
9
|
+
['Hello', 'Éve Picard'] => %w[hello éve picard eve],
|
10
|
+
['Hello', 'Éve-Picard'] => %w[hello éve picard évepicard eve evepicard]
|
11
|
+
}
|
12
|
+
|
13
|
+
@expectations.each_pair do |input, expectation|
|
14
|
+
it "should tagify: #{input.inspect} to: #{expectation.inspect}" do
|
15
|
+
LCBO::CrawlKit::TagHelper[*input].must_equal expectation
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -7,6 +7,7 @@
|
|
7
7
|
:expectation:
|
8
8
|
:product_no: 122408
|
9
9
|
:name: Pascual Toso Malbec Rose
|
10
|
+
:tags: []
|
10
11
|
:price_in_cents: 1195
|
11
12
|
:regular_price_in_cents: 1195
|
12
13
|
:limited_time_offer_savings_in_cents: 0
|
@@ -47,6 +48,7 @@
|
|
47
48
|
:expectation:
|
48
49
|
:product_no: 18
|
49
50
|
:name: Heineken Lager
|
51
|
+
:tags: ["heineken", "lager", "beer", "netherlands", "heinekens", "brouwerijen", "nederland", "bv", "bottle"]
|
50
52
|
:price_in_cents: 1250
|
51
53
|
:regular_price_in_cents: 1350
|
52
54
|
:limited_time_offer_savings_in_cents: 100
|
@@ -87,6 +89,7 @@
|
|
87
89
|
:expectation:
|
88
90
|
:product_no: 280461
|
89
91
|
:name: Dom Pérignon Brut Champagne
|
92
|
+
:tags: ["dom", "pérignon", "brut", "champagne", "wine", "france", "moet", "chandon", "sa", "bottle", "perignon"]
|
90
93
|
:price_in_cents: 21995
|
91
94
|
:regular_price_in_cents: 21995
|
92
95
|
:limited_time_offer_savings_in_cents: 0
|
@@ -127,6 +130,7 @@
|
|
127
130
|
:expectation:
|
128
131
|
:product_no: 479154
|
129
132
|
:name: Floris Ninkeberry Gardenbeer
|
133
|
+
:tags: ["floris", "ninkeberry", "gardenbeer", "beer", "ale", "belgium", "brouwerij", "huyghe", "bottle"]
|
130
134
|
:price_in_cents: 250
|
131
135
|
:regular_price_in_cents: 250
|
132
136
|
:limited_time_offer_savings_in_cents: 0
|
@@ -167,6 +171,7 @@
|
|
167
171
|
:expectation:
|
168
172
|
:product_no: 148528
|
169
173
|
:name: Bodegas Ruconia Rey Don Garcia El de Najera Crianza 2005
|
174
|
+
:tags: ["bodegas", "ruconia", "rey", "don", "garcia", "el", "de", "najera", "crianza", "2005", "wine", "red", "rioja", "spain", "sl", "bottle"]
|
170
175
|
:price_in_cents: 1595
|
171
176
|
:regular_price_in_cents: 1595
|
172
177
|
:limited_time_offer_savings_in_cents: 0
|
@@ -207,6 +212,7 @@
|
|
207
212
|
:expectation:
|
208
213
|
:product_no: 582973
|
209
214
|
:name: Bombay Sapphire
|
215
|
+
:tags: ["bombay", "sapphire", "spirits", "gin", "england", "united", "kingdom", "company", "ltd", "bottle"]
|
210
216
|
:price_in_cents: 5545
|
211
217
|
:regular_price_in_cents: 5545
|
212
218
|
:limited_time_offer_savings_in_cents: 0
|
@@ -247,6 +253,7 @@
|
|
247
253
|
:expectation:
|
248
254
|
:product_no: 2183079
|
249
255
|
:name: Lake & River Series Cabernet Franc VQA
|
256
|
+
:tags: []
|
250
257
|
:price_in_cents: 1095
|
251
258
|
:regular_price_in_cents: 1095
|
252
259
|
:limited_time_offer_savings_in_cents: 0
|
@@ -287,6 +294,7 @@
|
|
287
294
|
:expectation:
|
288
295
|
:product_no: 114173
|
289
296
|
:name: 05 Sauv Blanc Weinstock Cellar K
|
297
|
+
:tags: []
|
290
298
|
:price_in_cents: 2295
|
291
299
|
:regular_price_in_cents: 2295
|
292
300
|
:limited_time_offer_savings_in_cents: 0
|
data/spec/pages/store_pages.yml
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
:expectation:
|
8
8
|
:store_no: 444
|
9
9
|
:name: Kennedy & Sheppard
|
10
|
+
:tags: ["kennedy", "sheppard", "2356", "road", "agincourt", "mall", "toronto", "scarborough", "torontoscarborough", "m1t3h1"]
|
10
11
|
:address_line_1: 2356 Kennedy Road
|
11
12
|
:address_line_2: Agincourt Mall
|
12
13
|
:city: Toronto-Scarborough
|
@@ -61,6 +62,7 @@
|
|
61
62
|
:sunday_open: 720
|
62
63
|
:sunday_close: 960
|
63
64
|
:name: Flora & Clinton Street
|
65
|
+
:tags: ["flora", "clinton", "street", "1", "industrial", "road", "unit", "b", "teeswater", "n0g2s0"]
|
64
66
|
:address_line_1: 1 Industrial Road
|
65
67
|
:address_line_2: Unit B
|
66
68
|
:city: Teeswater
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 10
|
8
|
-
-
|
9
|
-
version: 0.10.
|
8
|
+
- 1
|
9
|
+
version: 0.10.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Carsten Nielsen
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-12-
|
17
|
+
date: 2010-12-30 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -56,6 +56,19 @@ dependencies:
|
|
56
56
|
version: "0"
|
57
57
|
type: :runtime
|
58
58
|
version_requirements: *id003
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: stringex
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
type: :runtime
|
71
|
+
version_requirements: *id004
|
59
72
|
description: Request and parse product, store, inventory, and product search pages directly from the official LCBO website.
|
60
73
|
email:
|
61
74
|
- heycarsten@gmail.com
|
@@ -89,6 +102,7 @@ files:
|
|
89
102
|
- lib/lcbo/crawlkit/request.rb
|
90
103
|
- lib/lcbo/crawlkit/request_prototype.rb
|
91
104
|
- lib/lcbo/crawlkit/response.rb
|
105
|
+
- lib/lcbo/crawlkit/tag_helper.rb
|
92
106
|
- lib/lcbo/crawlkit/titlecase_helper.rb
|
93
107
|
- lib/lcbo/crawlkit/volume_helper.rb
|
94
108
|
- lib/lcbo/ext.rb
|
@@ -107,6 +121,7 @@ files:
|
|
107
121
|
- spec/crawlkit/request_prototype_spec.rb
|
108
122
|
- spec/crawlkit/request_spec.rb
|
109
123
|
- spec/crawlkit/response_spec.rb
|
124
|
+
- spec/crawlkit/tag_helper_spec.rb
|
110
125
|
- spec/crawlkit/titlecase_helper_spec.rb
|
111
126
|
- spec/crawlkit/volume_helper_spec.rb
|
112
127
|
- spec/crawlkit_spec.rb
|
@@ -174,6 +189,7 @@ test_files:
|
|
174
189
|
- spec/crawlkit/request_prototype_spec.rb
|
175
190
|
- spec/crawlkit/request_spec.rb
|
176
191
|
- spec/crawlkit/response_spec.rb
|
192
|
+
- spec/crawlkit/tag_helper_spec.rb
|
177
193
|
- spec/crawlkit/titlecase_helper_spec.rb
|
178
194
|
- spec/crawlkit/volume_helper_spec.rb
|
179
195
|
- spec/crawlkit_spec.rb
|