wedding_registry_scraper 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +22 -0
- data/README.md +53 -0
- data/lib/wedding_registry_scraper.rb +3 -0
- data/lib/wedding_registry_scraper/registries.rb +28 -0
- data/lib/wedding_registry_scraper/registries/crate_and_barrel.rb +45 -0
- data/lib/wedding_registry_scraper/registries/heath_ceramics.rb +90 -0
- data/lib/wedding_registry_scraper/registries/rei.rb +49 -0
- data/lib/wedding_registry_scraper/registries/williams_sonoma.rb +42 -0
- data/lib/wedding_registry_scraper/registries/zola.rb +55 -0
- data/lib/wedding_registry_scraper/registry.rb +120 -0
- data/lib/wedding_registry_scraper/registry_scraper.rb +21 -0
- data/lib/wedding_registry_scraper/version.rb +7 -0
- metadata +15 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d95783063ce46d1a28af98b6c3330bd8d33ba95
|
4
|
+
data.tar.gz: 56298d31eba3630e7a1f28bfd4565f655f110708
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f82473b27cf53ccdd6d000b2543256fe72ae7d35322e06a4eef9b71cf2b30bf780cf2a00d169adb7bbc38b0dc8e2a771f904768e02b85ac3caa125c1c433d14
|
7
|
+
data.tar.gz: b29414b56a8faab31165d8fece59f57b2a5bc568aae8ce9809ad8cfadd14149d98befab78b7fa8d388d58ad58ef0ab55bfcdf798af0783a1a63e5a776b6eeebd
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Andrew Pariser
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# Wedding Registry Scraper
|
2
|
+
|
3
|
+
## Why?
|
4
|
+
|
5
|
+
Want to consolidate multiple registries onto one website?
|
6
|
+
|
7
|
+
Want to regularly update your fulfillment counts?
|
8
|
+
|
9
|
+
This (work in progress) is for you!
|
10
|
+
|
11
|
+
## Which Registries?
|
12
|
+
|
13
|
+
Right now, works with:
|
14
|
+
|
15
|
+
* Zola — https://www.zola.com
|
16
|
+
* REI — http://www.rei.com
|
17
|
+
* Heath Ceramics — http://www.heathceramics.com
|
18
|
+
* Crate & Barrel — http://www.crateandbarrel.com
|
19
|
+
* Williams Sonoma — http://www.williams-sonoma.com
|
20
|
+
|
21
|
+
I intend to get this working with:
|
22
|
+
|
23
|
+
* Sur la Table
|
24
|
+
* Pottery Barn
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
Add to your Gemfile
|
29
|
+
|
30
|
+
gem 'wedding-registry-scraper'
|
31
|
+
|
32
|
+
Use `WeddingRegistryScraper#scrape`:
|
33
|
+
|
34
|
+
require 'wedding_registry_scraper'
|
35
|
+
|
36
|
+
WeddingRegistryScraper.scrape([
|
37
|
+
"http://www.heathceramics.com/giftregistry/view/index/id/YOUR_UNIQUE_REGISTRY_ID",
|
38
|
+
"http://www.crateandbarrel.com/Gift-Registry/YOUR_NAMES/YOUR_UNIQUE_REGISTRY_ID",
|
39
|
+
"https://www.zola.com/registry/YOUR_REGISTRY_SLUG",
|
40
|
+
"http://www.rei.com/GiftRegistryDetails/YOUR_UNIQUE_REGISTRY_ID",
|
41
|
+
"https://secure.williams-sonoma.com/registry/YOUR_UNIQUE_REGISTRY_ID/registry-list.html",
|
42
|
+
])
|
43
|
+
|
44
|
+
|
45
|
+
## Want to Help?
|
46
|
+
|
47
|
+
It should be simple to add another registry by cloning a `Regsitry` inside `lib/wedding_registry_scraper/registries`.
|
48
|
+
|
49
|
+
## Like this project and planning on registering?
|
50
|
+
|
51
|
+
Why don't you register for Zola using my invite code: https://www.zola.com/invite/pariser
|
52
|
+
|
53
|
+
Thanks!
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module WeddingRegistryScraper::Registries
|
2
|
+
class << self
|
3
|
+
def registry_classes
|
4
|
+
@registry_classes ||= begin
|
5
|
+
self.constants.map do |const|
|
6
|
+
obj = self.const_get(const)
|
7
|
+
obj if obj.is_a?(Class) && obj < WeddingRegistryScraper::Registry
|
8
|
+
end.compact
|
9
|
+
end
|
10
|
+
@registry_classes
|
11
|
+
end
|
12
|
+
|
13
|
+
def registry_class_from_url(url)
|
14
|
+
registry_classes.detect do |klass|
|
15
|
+
/^https?:\/\/[^\/]*#{klass.domain}/.match(url)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize_registry(url, options={})
|
20
|
+
klass = registry_class_from_url(url)
|
21
|
+
klass ? klass.new(url, options) : nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Load all registries
|
27
|
+
|
28
|
+
Dir[File.dirname(__FILE__) + '/registries/*.rb'].each { |file| require file }
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::CrateAndBarrel < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Crate & Barrel"
|
3
|
+
@domain = "crateandbarrel.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('.jsItemRow:not(.emptyCategoryRow)')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.itemTitle').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.skuNum').text.strip.match(/SKU (\S+)/)[1]
|
15
|
+
"c&b:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
# TODO pop up modal? set proper anchor (they're not unique!)
|
20
|
+
@url
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
thumb_src = product.css('img')[0]['src']
|
25
|
+
thumb_src.sub(/\$web_itembasket\$/, '&$web_popup_zoom$&wid=379&hei=379')
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_remaining(product)
|
29
|
+
desired = product.css('td')[4].css('.itemHas').text.strip.to_i
|
30
|
+
fulfilled = product.css('td')[5].css('.itemHas').text.strip.to_i
|
31
|
+
desired - fulfilled
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_desired(product)
|
35
|
+
product.css('td')[4].css('.itemHas').text.strip.to_i
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_price(product)
|
39
|
+
if (sale_price = product.css('.salePrice')).any?
|
40
|
+
/\$?(\d+(\.\d+)?)/.match(sale_price.text) { |match| match[1].to_f }
|
41
|
+
elsif (regular_price = product.css('.regPrice')).any?
|
42
|
+
/\$?(\d+(\.\d+)?)/.match(regular_price.text) { |match| match[1].to_f }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
ActiveSupport::Inflector.inflections(:en) do |inflector|
|
4
|
+
inflector.uncountable 'heath_ceramics'
|
5
|
+
end
|
6
|
+
|
7
|
+
class WeddingRegistryScraper::Registries::HeathCeramics < WeddingRegistryScraper::Registry
|
8
|
+
@display_name = "Heath Ceramics"
|
9
|
+
@domain = "heathceramics.com"
|
10
|
+
private
|
11
|
+
def get_products(doc)
|
12
|
+
doc.css('table#shopping-cart-table tbody tr')
|
13
|
+
end
|
14
|
+
|
15
|
+
def get_name(product)
|
16
|
+
product.css('.attentionText').text.strip
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_sku(product)
|
20
|
+
text = product.css('.ctxProductCol .tinyText').map(&:text).detect { |t| t =~ /SKU/ }
|
21
|
+
sku = text.match(/SKU:\s+(\S+)/)[1]
|
22
|
+
"heath:#{sku}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_url(product)
|
26
|
+
@url
|
27
|
+
end
|
28
|
+
|
29
|
+
def get_image_url(product)
|
30
|
+
details_url = product.css('[data-url]')[0]['data-url']
|
31
|
+
|
32
|
+
request_params = {
|
33
|
+
:headers => {
|
34
|
+
'X-Requested-With' => 'XMLHttpRequest'
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
result = with_lax_heath_uri_parsing do
|
39
|
+
puts "GET #{details_url.inspect} with params #{request_params.to_json}" if @debug
|
40
|
+
Unirest.get(details_url, request_params)
|
41
|
+
end
|
42
|
+
|
43
|
+
doc = Nokogiri::HTML(result.body)
|
44
|
+
|
45
|
+
if doc.css('ul.mixMatchList').any?
|
46
|
+
doc.css('ul.mixMatchList li:last-child img[src]')[0]['src']
|
47
|
+
else
|
48
|
+
doc.css('.imagesScrollable img[src]')[0]['src']
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_remaining(product)
|
53
|
+
fulfillment = product.css('.fulfilled').text.strip
|
54
|
+
fulfillment.match(/^(\d+)/)[0].to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_desired(product)
|
58
|
+
fulfillment = product.css('.fulfilled').text.strip
|
59
|
+
fulfillment.match(/(\d+)$/)[0].to_i
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_price(product)
|
63
|
+
product.css('.price').text.strip.gsub(/[^\d\.]/, '').to_f
|
64
|
+
end
|
65
|
+
|
66
|
+
def with_lax_heath_uri_parsing(&block)
|
67
|
+
URI::RFC3986_Parser.class_eval do
|
68
|
+
alias_method :old_split, :split
|
69
|
+
|
70
|
+
def split(uri)
|
71
|
+
# scheme, userinfo, host, port, registry, path, opaque, query, fragment
|
72
|
+
[ "http", nil, "www.heathceramics.com", nil, nil, uri.sub(/^https?:\/\/www.heathceramics.com/, ''), nil, nil, nil ]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
exception = nil
|
77
|
+
begin
|
78
|
+
result = yield
|
79
|
+
rescue Exception => e
|
80
|
+
exception = e
|
81
|
+
ensure
|
82
|
+
URI::RFC3986_Parser.class_eval do
|
83
|
+
alias_method :split, :old_split
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
raise exception if exception
|
88
|
+
result
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::Rei < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "REI"
|
3
|
+
@domain = "rei.com"
|
4
|
+
private
|
5
|
+
def get_product_details_url(product)
|
6
|
+
href = product.css('a[name=prod]')[0]['href'].sub(/^\/?/, '')
|
7
|
+
"https://www.rei.com/#{href.sub(/^\//, '')}"
|
8
|
+
end
|
9
|
+
|
10
|
+
def get_products(doc)
|
11
|
+
doc.css('table.registryList').first.css('tr.tr0')
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_name(product)
|
15
|
+
product.css('a[name=prod]').text.strip
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_sku(product)
|
19
|
+
sku = get_product_details_url(product).match(/\/product\/(\d+)/)[1].to_i
|
20
|
+
"rei-#{sku}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_url(product)
|
24
|
+
@url
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_image_url(product)
|
28
|
+
details_url = get_product_details_url(product)
|
29
|
+
|
30
|
+
puts "GET #{details_url.inspect}" if @debug
|
31
|
+
result = Unirest.get(details_url)
|
32
|
+
doc = Nokogiri::HTML(result.body)
|
33
|
+
|
34
|
+
image_url = doc.css('#zoomLink')[0]['href']
|
35
|
+
"https://www.rei.com/#{image_url.sub(/^\//,'')}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_remaining(product)
|
39
|
+
product.css('td')[5].text.strip.to_i
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_desired(product)
|
43
|
+
product.css('td')[4].text.strip.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_price(product)
|
47
|
+
product.css('td')[3].text.strip.sub('$','').to_f
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::WilliamsSonoma < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Williams-Sonoma"
|
3
|
+
@domain = "williams-sonoma.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('table.registry-category-list tbody tr')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.product-detail .product-info .title a').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.product-detail .product-info .item-number').text.strip.match(/: (\d+)/)[1]
|
15
|
+
"williams-sonoma:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
# product.css('.product-detail .product-info .title a')[0]['href']
|
20
|
+
@url
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
product.css('img')[0]['src'].sub(/f\.jpg$/, 'c.jpg')
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_remaining(product)
|
28
|
+
product.css('td.still-needs').text.strip.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_desired(product)
|
32
|
+
product.css('td.requested').text.strip.to_i
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_price(product)
|
36
|
+
if (sale_price = product.css('td.price .price-state.price-special')).any?
|
37
|
+
sale_price.css('.currencyUSD .price-amount').text.strip.to_f
|
38
|
+
else
|
39
|
+
product.css('td.price .price-state.price-standard .currencyUSD .price-amount').text.strip.to_f
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::Zola < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Zola"
|
3
|
+
@domain = "zola.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('#all-panel .product-tile')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.single-product-name').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.single-product')[0]['id']
|
15
|
+
"zola:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
href = product.css('.content a')[0]['href'].sub(/^\/?/, '')
|
20
|
+
"https://www.zola.com/#{href}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
product.css('[data-image-url]')[0]['data-image-url']
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_remaining(product)
|
28
|
+
product.css('.needed').text.strip.gsub(/[^\d]+/, '').to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_desired(product)
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_price(product)
|
35
|
+
product.css('[data-price]')[0]['data-price'].gsub(/[$,]/, '').to_f
|
36
|
+
end
|
37
|
+
|
38
|
+
def price_type(product)
|
39
|
+
product_price = product.css('.product-price')
|
40
|
+
|
41
|
+
if product_price.length > 0 && product_price[0].text.strip == 'Contribute what you wish'
|
42
|
+
WeddingRegistryScraper::Registry::VARIABLE_PRICE
|
43
|
+
else
|
44
|
+
WeddingRegistryScraper::Registry::FIXED_PRICE
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def fulfilled?(product)
|
49
|
+
if price_type(product) == WeddingRegistryScraper::Registry::VARIABLE_PRICE
|
50
|
+
get_price(product) <= 0
|
51
|
+
else
|
52
|
+
get_remaining(product) <= 0
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'unirest'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class WeddingRegistryScraper::Registry
|
5
|
+
PRICE_TYPES = [
|
6
|
+
FIXED_PRICE = 'Fixed price',
|
7
|
+
VARIABLE_PRICE = 'Variable price',
|
8
|
+
]
|
9
|
+
|
10
|
+
class << self
|
11
|
+
attr_reader :display_name
|
12
|
+
attr_reader :domain
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(url, params={})
|
16
|
+
params.symbolize_keys!
|
17
|
+
@url = url
|
18
|
+
@debug = params[:debug] == true
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_items
|
22
|
+
doc = get_registry
|
23
|
+
|
24
|
+
get_products(doc).reduce({}) do |products, product|
|
25
|
+
sku = get_sku(product)
|
26
|
+
|
27
|
+
details = {
|
28
|
+
:name => get_name(product),
|
29
|
+
:remaining => get_remaining(product),
|
30
|
+
:desired => get_desired(product),
|
31
|
+
:url => get_url(product),
|
32
|
+
:image_url => get_image_url(product),
|
33
|
+
:registry_name => self.class.display_name,
|
34
|
+
:fulfilled => fulfilled?(product),
|
35
|
+
:price_type => price_type(product),
|
36
|
+
:price_value => get_price(product),
|
37
|
+
}
|
38
|
+
|
39
|
+
products.merge! sku => details
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def get_products(doc) ; raise NotImplementedError; end
|
46
|
+
|
47
|
+
def get_name(product) ; raise NotImplementedError; end
|
48
|
+
def get_sku(product) ; raise NotImplementedError; end
|
49
|
+
def get_url(product) ; raise NotImplementedError; end
|
50
|
+
def get_image_url(product) ; raise NotImplementedError; end
|
51
|
+
def get_remaining(product) ; raise NotImplementedError; end
|
52
|
+
def get_desired(product) ; raise NotImplementedError; end
|
53
|
+
def get_price(product) ; raise NotImplementedError; end
|
54
|
+
|
55
|
+
def price_type(product)
|
56
|
+
FIXED_PRICE
|
57
|
+
end
|
58
|
+
|
59
|
+
def fulfilled?(product)
|
60
|
+
get_remaining(product) == 0
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_registry
|
64
|
+
result, _ = make_request(:get, @url)
|
65
|
+
Nokogiri::HTML(result.body)
|
66
|
+
end
|
67
|
+
|
68
|
+
def make_request(method, url, params={})
|
69
|
+
json = params.delete(:json)
|
70
|
+
request_params = {}
|
71
|
+
|
72
|
+
if cookies = params.delete(:cookies)
|
73
|
+
request_params[:headers] = {
|
74
|
+
'Cookie' => dump_cookies(cookies)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
if json
|
79
|
+
request_params[:headers] = {
|
80
|
+
'Accept' => "application/json"
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
request_params[:parameters] = json ? params.to_json : params
|
85
|
+
|
86
|
+
puts "#{method.to_s.upcase} #{url} with params #{request_params.to_json}" if @debug
|
87
|
+
result = Unirest.send(method, url, request_params)
|
88
|
+
|
89
|
+
puts "RESULT #{result.code} with headers #{result.headers.inspect}" if @debug
|
90
|
+
|
91
|
+
open_result_in_browser(result) if @debug
|
92
|
+
|
93
|
+
cookies = load_cookies(result.headers[:set_cookie])
|
94
|
+
|
95
|
+
[ result, cookies ]
|
96
|
+
end
|
97
|
+
|
98
|
+
def open_result_in_browser(result)
|
99
|
+
file = Tempfile.new(['weddding-scraper','.html'])
|
100
|
+
file << result.body
|
101
|
+
file.close
|
102
|
+
|
103
|
+
`open "#{file.path}"`
|
104
|
+
|
105
|
+
sleep 1
|
106
|
+
file.unlink
|
107
|
+
end
|
108
|
+
|
109
|
+
def dump_cookies(cookies)
|
110
|
+
cookies.map { |k, v| "#{k}=#{v}" }.join("; ")
|
111
|
+
end
|
112
|
+
|
113
|
+
def load_cookies(set_cookie)
|
114
|
+
set_cookie ||= []
|
115
|
+
set_cookie.reduce({}) do |cookies, cookie|
|
116
|
+
key, value = cookie.split(';')[0].split('=')
|
117
|
+
cookies.merge!(key => value || "")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class WeddingRegistryScraper::RegistryScraper
|
2
|
+
class << self
|
3
|
+
def scrape(registry_urls=[], options={})
|
4
|
+
all_products = {}
|
5
|
+
|
6
|
+
registry_urls.each do |url|
|
7
|
+
unless registry = WeddingRegistryScraper::Registries.initialize_registry(url)
|
8
|
+
raise "Could not initialize registry from url #{url.inspect}"
|
9
|
+
end
|
10
|
+
|
11
|
+
puts "* Loading items from #{registry.class.name.demodulize}..." if options[:log_messages]
|
12
|
+
products = registry.get_items
|
13
|
+
puts " Loaded %d items\n" % products.count if options[:log_messages]
|
14
|
+
|
15
|
+
all_products.merge!(products)
|
16
|
+
end
|
17
|
+
|
18
|
+
all_products
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wedding_registry_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Pariser
|
@@ -44,48 +44,20 @@ dependencies:
|
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
46
|
version: '1.6'
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: colored
|
49
|
-
requirement: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - "~>"
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '1.2'
|
54
|
-
type: :runtime
|
55
|
-
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - "~>"
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: '1.2'
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: safe_yaml
|
63
|
-
requirement: !ruby/object:Gem::Requirement
|
64
|
-
requirements:
|
65
|
-
- - "~>"
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: '1.0'
|
68
|
-
type: :runtime
|
69
|
-
prerelease: false
|
70
|
-
version_requirements: !ruby/object:Gem::Requirement
|
71
|
-
requirements:
|
72
|
-
- - "~>"
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '1.0'
|
75
47
|
- !ruby/object:Gem::Dependency
|
76
48
|
name: activesupport
|
77
49
|
requirement: !ruby/object:Gem::Requirement
|
78
50
|
requirements:
|
79
51
|
- - "~>"
|
80
52
|
- !ruby/object:Gem::Version
|
81
|
-
version: '4.
|
53
|
+
version: '4.2'
|
82
54
|
type: :runtime
|
83
55
|
prerelease: false
|
84
56
|
version_requirements: !ruby/object:Gem::Requirement
|
85
57
|
requirements:
|
86
58
|
- - "~>"
|
87
59
|
- !ruby/object:Gem::Version
|
88
|
-
version: '4.
|
60
|
+
version: '4.2'
|
89
61
|
description: Look at a bunch of various retailers' wedding registries and consolidate
|
90
62
|
information in one place.
|
91
63
|
email: pariser@gmail.com
|
@@ -93,7 +65,18 @@ executables: []
|
|
93
65
|
extensions: []
|
94
66
|
extra_rdoc_files: []
|
95
67
|
files:
|
68
|
+
- LICENSE
|
69
|
+
- README.md
|
96
70
|
- lib/wedding_registry_scraper.rb
|
71
|
+
- lib/wedding_registry_scraper/registries.rb
|
72
|
+
- lib/wedding_registry_scraper/registries/crate_and_barrel.rb
|
73
|
+
- lib/wedding_registry_scraper/registries/heath_ceramics.rb
|
74
|
+
- lib/wedding_registry_scraper/registries/rei.rb
|
75
|
+
- lib/wedding_registry_scraper/registries/williams_sonoma.rb
|
76
|
+
- lib/wedding_registry_scraper/registries/zola.rb
|
77
|
+
- lib/wedding_registry_scraper/registry.rb
|
78
|
+
- lib/wedding_registry_scraper/registry_scraper.rb
|
79
|
+
- lib/wedding_registry_scraper/version.rb
|
97
80
|
homepage: https://github.com/pariser/wedding-registry-scraper
|
98
81
|
licenses:
|
99
82
|
- MIT
|
@@ -106,7 +89,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
106
89
|
requirements:
|
107
90
|
- - ">="
|
108
91
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
92
|
+
version: '2.2'
|
110
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
94
|
requirements:
|
112
95
|
- - ">="
|