wedding_registry_scraper 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +22 -0
- data/README.md +53 -0
- data/lib/wedding_registry_scraper.rb +3 -0
- data/lib/wedding_registry_scraper/registries.rb +28 -0
- data/lib/wedding_registry_scraper/registries/crate_and_barrel.rb +45 -0
- data/lib/wedding_registry_scraper/registries/heath_ceramics.rb +90 -0
- data/lib/wedding_registry_scraper/registries/rei.rb +49 -0
- data/lib/wedding_registry_scraper/registries/williams_sonoma.rb +42 -0
- data/lib/wedding_registry_scraper/registries/zola.rb +55 -0
- data/lib/wedding_registry_scraper/registry.rb +120 -0
- data/lib/wedding_registry_scraper/registry_scraper.rb +21 -0
- data/lib/wedding_registry_scraper/version.rb +7 -0
- metadata +15 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d95783063ce46d1a28af98b6c3330bd8d33ba95
|
4
|
+
data.tar.gz: 56298d31eba3630e7a1f28bfd4565f655f110708
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0f82473b27cf53ccdd6d000b2543256fe72ae7d35322e06a4eef9b71cf2b30bf780cf2a00d169adb7bbc38b0dc8e2a771f904768e02b85ac3caa125c1c433d14
|
7
|
+
data.tar.gz: b29414b56a8faab31165d8fece59f57b2a5bc568aae8ce9809ad8cfadd14149d98befab78b7fa8d388d58ad58ef0ab55bfcdf798af0783a1a63e5a776b6eeebd
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Andrew Pariser
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
22
|
+
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# Wedding Registry Scraper
|
2
|
+
|
3
|
+
## Why?
|
4
|
+
|
5
|
+
Want to consolidate multiple registries onto one website?
|
6
|
+
|
7
|
+
Want to regularly update your fulfillment counts?
|
8
|
+
|
9
|
+
This (work in progress) is for you!
|
10
|
+
|
11
|
+
## Which Registries?
|
12
|
+
|
13
|
+
Right now, works with:
|
14
|
+
|
15
|
+
* Zola — https://www.zola.com
|
16
|
+
* REI — http://www.rei.com
|
17
|
+
* Heath Ceramics — http://www.heathceramics.com
|
18
|
+
* Crate & Barrel — http://www.crateandbarrel.com
|
19
|
+
* Williams Sonoma — http://www.williams-sonoma.com
|
20
|
+
|
21
|
+
I intend to get this working with:
|
22
|
+
|
23
|
+
* Sur la Table
|
24
|
+
* Pottery Barn
|
25
|
+
|
26
|
+
## Usage
|
27
|
+
|
28
|
+
Add to your Gemfile
|
29
|
+
|
30
|
+
gem 'wedding-registry-scraper'
|
31
|
+
|
32
|
+
Use `WeddingRegistryScraper#scrape`:
|
33
|
+
|
34
|
+
require 'wedding_registry_scraper'
|
35
|
+
|
36
|
+
WeddingRegistryScraper.scrape([
|
37
|
+
"http://www.heathceramics.com/giftregistry/view/index/id/YOUR_UNIQUE_REGISTRY_ID",
|
38
|
+
"http://www.crateandbarrel.com/Gift-Registry/YOUR_NAMES/YOUR_UNIQUE_REGISTRY_ID",
|
39
|
+
"https://www.zola.com/registry/YOUR_REGISTRY_SLUG",
|
40
|
+
"http://www.rei.com/GiftRegistryDetails/YOUR_UNIQUE_REGISTRY_ID",
|
41
|
+
"https://secure.williams-sonoma.com/registry/YOUR_UNIQUE_REGISTRY_ID/registry-list.html",
|
42
|
+
])
|
43
|
+
|
44
|
+
|
45
|
+
## Want to Help?
|
46
|
+
|
47
|
+
It should be simple to add another registry by cloning a `Regsitry` inside `lib/wedding_registry_scraper/registries`.
|
48
|
+
|
49
|
+
## Like this project and planning on registering?
|
50
|
+
|
51
|
+
Why don't you register for Zola using my invite code: https://www.zola.com/invite/pariser
|
52
|
+
|
53
|
+
Thanks!
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module WeddingRegistryScraper::Registries
|
2
|
+
class << self
|
3
|
+
def registry_classes
|
4
|
+
@registry_classes ||= begin
|
5
|
+
self.constants.map do |const|
|
6
|
+
obj = self.const_get(const)
|
7
|
+
obj if obj.is_a?(Class) && obj < WeddingRegistryScraper::Registry
|
8
|
+
end.compact
|
9
|
+
end
|
10
|
+
@registry_classes
|
11
|
+
end
|
12
|
+
|
13
|
+
def registry_class_from_url(url)
|
14
|
+
registry_classes.detect do |klass|
|
15
|
+
/^https?:\/\/[^\/]*#{klass.domain}/.match(url)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize_registry(url, options={})
|
20
|
+
klass = registry_class_from_url(url)
|
21
|
+
klass ? klass.new(url, options) : nil
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Load all registries
|
27
|
+
|
28
|
+
Dir[File.dirname(__FILE__) + '/registries/*.rb'].each { |file| require file }
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::CrateAndBarrel < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Crate & Barrel"
|
3
|
+
@domain = "crateandbarrel.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('.jsItemRow:not(.emptyCategoryRow)')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.itemTitle').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.skuNum').text.strip.match(/SKU (\S+)/)[1]
|
15
|
+
"c&b:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
# TODO pop up modal? set proper anchor (they're not unique!)
|
20
|
+
@url
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
thumb_src = product.css('img')[0]['src']
|
25
|
+
thumb_src.sub(/\$web_itembasket\$/, '&$web_popup_zoom$&wid=379&hei=379')
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_remaining(product)
|
29
|
+
desired = product.css('td')[4].css('.itemHas').text.strip.to_i
|
30
|
+
fulfilled = product.css('td')[5].css('.itemHas').text.strip.to_i
|
31
|
+
desired - fulfilled
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_desired(product)
|
35
|
+
product.css('td')[4].css('.itemHas').text.strip.to_i
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_price(product)
|
39
|
+
if (sale_price = product.css('.salePrice')).any?
|
40
|
+
/\$?(\d+(\.\d+)?)/.match(sale_price.text) { |match| match[1].to_f }
|
41
|
+
elsif (regular_price = product.css('.regPrice')).any?
|
42
|
+
/\$?(\d+(\.\d+)?)/.match(regular_price.text) { |match| match[1].to_f }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
ActiveSupport::Inflector.inflections(:en) do |inflector|
|
4
|
+
inflector.uncountable 'heath_ceramics'
|
5
|
+
end
|
6
|
+
|
7
|
+
class WeddingRegistryScraper::Registries::HeathCeramics < WeddingRegistryScraper::Registry
|
8
|
+
@display_name = "Heath Ceramics"
|
9
|
+
@domain = "heathceramics.com"
|
10
|
+
private
|
11
|
+
def get_products(doc)
|
12
|
+
doc.css('table#shopping-cart-table tbody tr')
|
13
|
+
end
|
14
|
+
|
15
|
+
def get_name(product)
|
16
|
+
product.css('.attentionText').text.strip
|
17
|
+
end
|
18
|
+
|
19
|
+
def get_sku(product)
|
20
|
+
text = product.css('.ctxProductCol .tinyText').map(&:text).detect { |t| t =~ /SKU/ }
|
21
|
+
sku = text.match(/SKU:\s+(\S+)/)[1]
|
22
|
+
"heath:#{sku}"
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_url(product)
|
26
|
+
@url
|
27
|
+
end
|
28
|
+
|
29
|
+
def get_image_url(product)
|
30
|
+
details_url = product.css('[data-url]')[0]['data-url']
|
31
|
+
|
32
|
+
request_params = {
|
33
|
+
:headers => {
|
34
|
+
'X-Requested-With' => 'XMLHttpRequest'
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
result = with_lax_heath_uri_parsing do
|
39
|
+
puts "GET #{details_url.inspect} with params #{request_params.to_json}" if @debug
|
40
|
+
Unirest.get(details_url, request_params)
|
41
|
+
end
|
42
|
+
|
43
|
+
doc = Nokogiri::HTML(result.body)
|
44
|
+
|
45
|
+
if doc.css('ul.mixMatchList').any?
|
46
|
+
doc.css('ul.mixMatchList li:last-child img[src]')[0]['src']
|
47
|
+
else
|
48
|
+
doc.css('.imagesScrollable img[src]')[0]['src']
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_remaining(product)
|
53
|
+
fulfillment = product.css('.fulfilled').text.strip
|
54
|
+
fulfillment.match(/^(\d+)/)[0].to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_desired(product)
|
58
|
+
fulfillment = product.css('.fulfilled').text.strip
|
59
|
+
fulfillment.match(/(\d+)$/)[0].to_i
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_price(product)
|
63
|
+
product.css('.price').text.strip.gsub(/[^\d\.]/, '').to_f
|
64
|
+
end
|
65
|
+
|
66
|
+
def with_lax_heath_uri_parsing(&block)
|
67
|
+
URI::RFC3986_Parser.class_eval do
|
68
|
+
alias_method :old_split, :split
|
69
|
+
|
70
|
+
def split(uri)
|
71
|
+
# scheme, userinfo, host, port, registry, path, opaque, query, fragment
|
72
|
+
[ "http", nil, "www.heathceramics.com", nil, nil, uri.sub(/^https?:\/\/www.heathceramics.com/, ''), nil, nil, nil ]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
exception = nil
|
77
|
+
begin
|
78
|
+
result = yield
|
79
|
+
rescue Exception => e
|
80
|
+
exception = e
|
81
|
+
ensure
|
82
|
+
URI::RFC3986_Parser.class_eval do
|
83
|
+
alias_method :split, :old_split
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
raise exception if exception
|
88
|
+
result
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::Rei < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "REI"
|
3
|
+
@domain = "rei.com"
|
4
|
+
private
|
5
|
+
def get_product_details_url(product)
|
6
|
+
href = product.css('a[name=prod]')[0]['href'].sub(/^\/?/, '')
|
7
|
+
"https://www.rei.com/#{href.sub(/^\//, '')}"
|
8
|
+
end
|
9
|
+
|
10
|
+
def get_products(doc)
|
11
|
+
doc.css('table.registryList').first.css('tr.tr0')
|
12
|
+
end
|
13
|
+
|
14
|
+
def get_name(product)
|
15
|
+
product.css('a[name=prod]').text.strip
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_sku(product)
|
19
|
+
sku = get_product_details_url(product).match(/\/product\/(\d+)/)[1].to_i
|
20
|
+
"rei-#{sku}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_url(product)
|
24
|
+
@url
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_image_url(product)
|
28
|
+
details_url = get_product_details_url(product)
|
29
|
+
|
30
|
+
puts "GET #{details_url.inspect}" if @debug
|
31
|
+
result = Unirest.get(details_url)
|
32
|
+
doc = Nokogiri::HTML(result.body)
|
33
|
+
|
34
|
+
image_url = doc.css('#zoomLink')[0]['href']
|
35
|
+
"https://www.rei.com/#{image_url.sub(/^\//,'')}"
|
36
|
+
end
|
37
|
+
|
38
|
+
def get_remaining(product)
|
39
|
+
product.css('td')[5].text.strip.to_i
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_desired(product)
|
43
|
+
product.css('td')[4].text.strip.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def get_price(product)
|
47
|
+
product.css('td')[3].text.strip.sub('$','').to_f
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::WilliamsSonoma < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Williams-Sonoma"
|
3
|
+
@domain = "williams-sonoma.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('table.registry-category-list tbody tr')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.product-detail .product-info .title a').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.product-detail .product-info .item-number').text.strip.match(/: (\d+)/)[1]
|
15
|
+
"williams-sonoma:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
# product.css('.product-detail .product-info .title a')[0]['href']
|
20
|
+
@url
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
product.css('img')[0]['src'].sub(/f\.jpg$/, 'c.jpg')
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_remaining(product)
|
28
|
+
product.css('td.still-needs').text.strip.to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_desired(product)
|
32
|
+
product.css('td.requested').text.strip.to_i
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_price(product)
|
36
|
+
if (sale_price = product.css('td.price .price-state.price-special')).any?
|
37
|
+
sale_price.css('.currencyUSD .price-amount').text.strip.to_f
|
38
|
+
else
|
39
|
+
product.css('td.price .price-state.price-standard .currencyUSD .price-amount').text.strip.to_f
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
class WeddingRegistryScraper::Registries::Zola < WeddingRegistryScraper::Registry
|
2
|
+
@display_name = "Zola"
|
3
|
+
@domain = "zola.com"
|
4
|
+
private
|
5
|
+
def get_products(doc)
|
6
|
+
doc.css('#all-panel .product-tile')
|
7
|
+
end
|
8
|
+
|
9
|
+
def get_name(product)
|
10
|
+
product.css('.single-product-name').text.strip
|
11
|
+
end
|
12
|
+
|
13
|
+
def get_sku(product)
|
14
|
+
sku = product.css('.single-product')[0]['id']
|
15
|
+
"zola:#{sku}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def get_url(product)
|
19
|
+
href = product.css('.content a')[0]['href'].sub(/^\/?/, '')
|
20
|
+
"https://www.zola.com/#{href}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_image_url(product)
|
24
|
+
product.css('[data-image-url]')[0]['data-image-url']
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_remaining(product)
|
28
|
+
product.css('.needed').text.strip.gsub(/[^\d]+/, '').to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_desired(product)
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_price(product)
|
35
|
+
product.css('[data-price]')[0]['data-price'].gsub(/[$,]/, '').to_f
|
36
|
+
end
|
37
|
+
|
38
|
+
def price_type(product)
|
39
|
+
product_price = product.css('.product-price')
|
40
|
+
|
41
|
+
if product_price.length > 0 && product_price[0].text.strip == 'Contribute what you wish'
|
42
|
+
WeddingRegistryScraper::Registry::VARIABLE_PRICE
|
43
|
+
else
|
44
|
+
WeddingRegistryScraper::Registry::FIXED_PRICE
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def fulfilled?(product)
|
49
|
+
if price_type(product) == WeddingRegistryScraper::Registry::VARIABLE_PRICE
|
50
|
+
get_price(product) <= 0
|
51
|
+
else
|
52
|
+
get_remaining(product) <= 0
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
require 'unirest'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class WeddingRegistryScraper::Registry
|
5
|
+
PRICE_TYPES = [
|
6
|
+
FIXED_PRICE = 'Fixed price',
|
7
|
+
VARIABLE_PRICE = 'Variable price',
|
8
|
+
]
|
9
|
+
|
10
|
+
class << self
|
11
|
+
attr_reader :display_name
|
12
|
+
attr_reader :domain
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(url, params={})
|
16
|
+
params.symbolize_keys!
|
17
|
+
@url = url
|
18
|
+
@debug = params[:debug] == true
|
19
|
+
end
|
20
|
+
|
21
|
+
def get_items
|
22
|
+
doc = get_registry
|
23
|
+
|
24
|
+
get_products(doc).reduce({}) do |products, product|
|
25
|
+
sku = get_sku(product)
|
26
|
+
|
27
|
+
details = {
|
28
|
+
:name => get_name(product),
|
29
|
+
:remaining => get_remaining(product),
|
30
|
+
:desired => get_desired(product),
|
31
|
+
:url => get_url(product),
|
32
|
+
:image_url => get_image_url(product),
|
33
|
+
:registry_name => self.class.display_name,
|
34
|
+
:fulfilled => fulfilled?(product),
|
35
|
+
:price_type => price_type(product),
|
36
|
+
:price_value => get_price(product),
|
37
|
+
}
|
38
|
+
|
39
|
+
products.merge! sku => details
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def get_products(doc) ; raise NotImplementedError; end
|
46
|
+
|
47
|
+
def get_name(product) ; raise NotImplementedError; end
|
48
|
+
def get_sku(product) ; raise NotImplementedError; end
|
49
|
+
def get_url(product) ; raise NotImplementedError; end
|
50
|
+
def get_image_url(product) ; raise NotImplementedError; end
|
51
|
+
def get_remaining(product) ; raise NotImplementedError; end
|
52
|
+
def get_desired(product) ; raise NotImplementedError; end
|
53
|
+
def get_price(product) ; raise NotImplementedError; end
|
54
|
+
|
55
|
+
def price_type(product)
|
56
|
+
FIXED_PRICE
|
57
|
+
end
|
58
|
+
|
59
|
+
def fulfilled?(product)
|
60
|
+
get_remaining(product) == 0
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_registry
|
64
|
+
result, _ = make_request(:get, @url)
|
65
|
+
Nokogiri::HTML(result.body)
|
66
|
+
end
|
67
|
+
|
68
|
+
def make_request(method, url, params={})
|
69
|
+
json = params.delete(:json)
|
70
|
+
request_params = {}
|
71
|
+
|
72
|
+
if cookies = params.delete(:cookies)
|
73
|
+
request_params[:headers] = {
|
74
|
+
'Cookie' => dump_cookies(cookies)
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
if json
|
79
|
+
request_params[:headers] = {
|
80
|
+
'Accept' => "application/json"
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
request_params[:parameters] = json ? params.to_json : params
|
85
|
+
|
86
|
+
puts "#{method.to_s.upcase} #{url} with params #{request_params.to_json}" if @debug
|
87
|
+
result = Unirest.send(method, url, request_params)
|
88
|
+
|
89
|
+
puts "RESULT #{result.code} with headers #{result.headers.inspect}" if @debug
|
90
|
+
|
91
|
+
open_result_in_browser(result) if @debug
|
92
|
+
|
93
|
+
cookies = load_cookies(result.headers[:set_cookie])
|
94
|
+
|
95
|
+
[ result, cookies ]
|
96
|
+
end
|
97
|
+
|
98
|
+
def open_result_in_browser(result)
|
99
|
+
file = Tempfile.new(['weddding-scraper','.html'])
|
100
|
+
file << result.body
|
101
|
+
file.close
|
102
|
+
|
103
|
+
`open "#{file.path}"`
|
104
|
+
|
105
|
+
sleep 1
|
106
|
+
file.unlink
|
107
|
+
end
|
108
|
+
|
109
|
+
def dump_cookies(cookies)
|
110
|
+
cookies.map { |k, v| "#{k}=#{v}" }.join("; ")
|
111
|
+
end
|
112
|
+
|
113
|
+
def load_cookies(set_cookie)
|
114
|
+
set_cookie ||= []
|
115
|
+
set_cookie.reduce({}) do |cookies, cookie|
|
116
|
+
key, value = cookie.split(';')[0].split('=')
|
117
|
+
cookies.merge!(key => value || "")
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class WeddingRegistryScraper::RegistryScraper
|
2
|
+
class << self
|
3
|
+
def scrape(registry_urls=[], options={})
|
4
|
+
all_products = {}
|
5
|
+
|
6
|
+
registry_urls.each do |url|
|
7
|
+
unless registry = WeddingRegistryScraper::Registries.initialize_registry(url)
|
8
|
+
raise "Could not initialize registry from url #{url.inspect}"
|
9
|
+
end
|
10
|
+
|
11
|
+
puts "* Loading items from #{registry.class.name.demodulize}..." if options[:log_messages]
|
12
|
+
products = registry.get_items
|
13
|
+
puts " Loaded %d items\n" % products.count if options[:log_messages]
|
14
|
+
|
15
|
+
all_products.merge!(products)
|
16
|
+
end
|
17
|
+
|
18
|
+
all_products
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wedding_registry_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Pariser
|
@@ -44,48 +44,20 @@ dependencies:
|
|
44
44
|
- - "~>"
|
45
45
|
- !ruby/object:Gem::Version
|
46
46
|
version: '1.6'
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: colored
|
49
|
-
requirement: !ruby/object:Gem::Requirement
|
50
|
-
requirements:
|
51
|
-
- - "~>"
|
52
|
-
- !ruby/object:Gem::Version
|
53
|
-
version: '1.2'
|
54
|
-
type: :runtime
|
55
|
-
prerelease: false
|
56
|
-
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
requirements:
|
58
|
-
- - "~>"
|
59
|
-
- !ruby/object:Gem::Version
|
60
|
-
version: '1.2'
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: safe_yaml
|
63
|
-
requirement: !ruby/object:Gem::Requirement
|
64
|
-
requirements:
|
65
|
-
- - "~>"
|
66
|
-
- !ruby/object:Gem::Version
|
67
|
-
version: '1.0'
|
68
|
-
type: :runtime
|
69
|
-
prerelease: false
|
70
|
-
version_requirements: !ruby/object:Gem::Requirement
|
71
|
-
requirements:
|
72
|
-
- - "~>"
|
73
|
-
- !ruby/object:Gem::Version
|
74
|
-
version: '1.0'
|
75
47
|
- !ruby/object:Gem::Dependency
|
76
48
|
name: activesupport
|
77
49
|
requirement: !ruby/object:Gem::Requirement
|
78
50
|
requirements:
|
79
51
|
- - "~>"
|
80
52
|
- !ruby/object:Gem::Version
|
81
|
-
version: '4.
|
53
|
+
version: '4.2'
|
82
54
|
type: :runtime
|
83
55
|
prerelease: false
|
84
56
|
version_requirements: !ruby/object:Gem::Requirement
|
85
57
|
requirements:
|
86
58
|
- - "~>"
|
87
59
|
- !ruby/object:Gem::Version
|
88
|
-
version: '4.
|
60
|
+
version: '4.2'
|
89
61
|
description: Look at a bunch of various retailers' wedding registries and consolidate
|
90
62
|
information in one place.
|
91
63
|
email: pariser@gmail.com
|
@@ -93,7 +65,18 @@ executables: []
|
|
93
65
|
extensions: []
|
94
66
|
extra_rdoc_files: []
|
95
67
|
files:
|
68
|
+
- LICENSE
|
69
|
+
- README.md
|
96
70
|
- lib/wedding_registry_scraper.rb
|
71
|
+
- lib/wedding_registry_scraper/registries.rb
|
72
|
+
- lib/wedding_registry_scraper/registries/crate_and_barrel.rb
|
73
|
+
- lib/wedding_registry_scraper/registries/heath_ceramics.rb
|
74
|
+
- lib/wedding_registry_scraper/registries/rei.rb
|
75
|
+
- lib/wedding_registry_scraper/registries/williams_sonoma.rb
|
76
|
+
- lib/wedding_registry_scraper/registries/zola.rb
|
77
|
+
- lib/wedding_registry_scraper/registry.rb
|
78
|
+
- lib/wedding_registry_scraper/registry_scraper.rb
|
79
|
+
- lib/wedding_registry_scraper/version.rb
|
97
80
|
homepage: https://github.com/pariser/wedding-registry-scraper
|
98
81
|
licenses:
|
99
82
|
- MIT
|
@@ -106,7 +89,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
106
89
|
requirements:
|
107
90
|
- - ">="
|
108
91
|
- !ruby/object:Gem::Version
|
109
|
-
version: '
|
92
|
+
version: '2.2'
|
110
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
94
|
requirements:
|
112
95
|
- - ">="
|