joyceshop 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f8dbce4f8ea05d23d63f3d9c49bc5819afeb7bd7
4
- data.tar.gz: ddc747fbfb484c3f4dc4f8d3e513153a243bd01c
3
+ metadata.gz: 2c4b143619c3fe55c48d39a9b419f39d6923fce0
4
+ data.tar.gz: 1b0fd41b0f9aff1067b680b43e2260a3f7f83cae
5
5
  SHA512:
6
- metadata.gz: ed50d49f299ae7bf30e8f86f3e9284d4d9dd0eaebe325b0a9f7f3b01664e652b57faf20b57b05b17ab1b26ba6ac0149dd43693927e49890ab1b8db57e0ef1bf4
7
- data.tar.gz: c7cd6da017475ef2f3b34d92774407f3fc7916f97c416ab9b0094488f605a302d38b554096f798dd6fda6f88aac53dc2ed82f53fffc0a546fa569caddfac6740
6
+ metadata.gz: 1b7810cbdc56d47096794d986417082dbe14dc84122e8d0539dbbc2e1f0e4c243f98ba451be45642fafb93fc0f053c0c258e8e4c6fcc6b288caded95c1b961c7
7
+ data.tar.gz: bd71e91f8adda7003cd73357e1eb2495c4e262d30e39169242bca461f4baaf0caffc5925849c0a5526f3a9fc3c438e2c05b3df9eb72170b1036e40ef063aa02f
@@ -2,5 +2,6 @@
2
2
  # require 'joyceshop' # for production
3
3
  require_relative '../lib/joyceshop.rb' # for testing
4
4
 
5
- scraper = JoyceShop::Scraper.new()
5
+ scraper = JoyceShop::Scraper.new
6
6
  puts scraper.search('紗針織衫', {price_boundary: [100, 443]})
7
+ puts scraper.scrape(:tops, 1)
@@ -1,10 +1,14 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'oga'
3
+ require 'uri'
3
4
  require 'open-uri'
4
5
 
5
6
  # scrape data
6
7
  module JoyceShop
7
8
  class Scraper
9
+ # Types
10
+ @@VALID_TYPES = [:tops, :popular, :pants, :pants, :accessories, :latest]
11
+
8
12
  # URI
9
13
  @@BASE_URI = 'https://www.joyce-shop.com'
10
14
  @@LATEST_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=&item2=&ya19=&keyword=&recommand=1412170001&ob=F"
@@ -12,11 +16,12 @@ module JoyceShop
12
16
  @@TOPS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=110&item2=111&ya19=&keyword=&recommand=&ob=F"
13
17
  @@PANTS_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=120&item2=121&ya19=&keyword=&recommand=&ob=F"
14
18
  @@ACCESSORIES_URI = "#{@@BASE_URI}/PDList.asp?brand=01&item1=140&item2=141&ya19=&keyword=&recommand=&ob=F"
19
+ @@SEARCH_URI = "#{@@BASE_URI}/PDList.asp?"
15
20
 
16
21
  # Selectors
17
22
  @@ITEM_SELECTOR = "//div[contains(@class, 'NEW_shop_list')]/ul/li/div[contains(@class, 'NEW_shop_list_pic')]"
18
- @@LINK_SELECTOR = 'a'
19
- @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]"
23
+ @@LINK_SELECTOR = 'a[1]/@href'
24
+ @@IMAGE_SELECTOR = "a/img[contains(@class, 'lazyload')]/@src"
20
25
  @@ITEM_INFO_SELECTOR = "div[contains(@class, 'NEW_shop_list_info')]"
21
26
  @@TITLE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/div[1]"
22
27
  @@PRICE_SELECTOR = "#{@@ITEM_INFO_SELECTOR}/span"
@@ -58,6 +63,19 @@ module JoyceShop
58
63
  data = parse_html(body)
59
64
  filter(data, options)
60
65
  end
66
+
67
+ def search(keyword, options={})
68
+ uri = uri_with_search(keyword)
69
+ body = fetch_data(uri)
70
+ data = parse_html(body)
71
+ filter(data, options)
72
+ end
73
+
74
+ def scrape(type, page, options = {})
75
+ abort "only supports #{@@VALID_TYPES}" unless @@VALID_TYPES.include?(type.to_sym)
76
+
77
+ method = self.method(type)
78
+ method.call(page, options)
61
79
  end
62
80
 
63
81
  private
@@ -65,6 +83,10 @@ module JoyceShop
65
83
  "#{uri}&pageno=#{page}"
66
84
  end
67
85
 
86
+ def uri_with_search(keyword)
87
+ "#{@@SEARCH_URI}keyword=#{URI.escape(keyword)}"
88
+ end
89
+
68
90
  def fetch_data(uri)
69
91
  open(uri) { |file| file.read }
70
92
  end
@@ -116,13 +138,13 @@ module JoyceShop
116
138
  end
117
139
 
118
140
  def extract_images(item)
119
- image = item.xpath(@@IMAGE_SELECTOR).attribute(:src).first.value
141
+ image = item.xpath(@@IMAGE_SELECTOR).text
120
142
  image_hover = image.sub(/\.jpg/, '-h.jpg')
121
143
  ["#{@@BASE_URI}#{image}", "#{@@BASE_URI}#{image_hover}"]
122
144
  end
123
145
 
124
146
  def extract_link(item)
125
- "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).attribute(:href).first.value}"
147
+ "#{@@BASE_URI}/#{item.xpath(@@LINK_SELECTOR).text}"
126
148
  end
127
149
  end
128
150
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: joyceshop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Even Chang
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2016-01-03 00:00:00.000000000 Z
14
+ date: 2016-01-04 00:00:00.000000000 Z
15
15
  dependencies: []
16
16
  description: This is a gem scraping joyceshop's website and returns the popular/latest
17
17
  items