stylemooncat 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/stylemooncat +9 -2
- data/lib/stylemooncat/scraper.rb +8 -6
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 15167c685cf84f8384b77774e8131bf5e2858221
|
4
|
+
data.tar.gz: e4b76b8567f8b3d98fa1b6835708d35f00378b9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01cceb18d13a90075f03e839437846befc18220e8a6d740b27f5d6cdf399527bcb15ba5d7d5b1991c91988b53052567a9dabdca8d3bae2039c53a65b5ad4a450
|
7
|
+
data.tar.gz: 53f473894440e01eb118911574554f33594a0c0aed19280f9f1dec5ab336980eaf00b6e0e7159236d48f00b91c146df8545185765c2ec6407c2d93297e55e0b0
|
data/bin/stylemooncat
CHANGED
@@ -13,6 +13,13 @@ price_boundary.push(ARGV[4])
|
|
13
13
|
options[:keyword]=ARGV[2]
|
14
14
|
options[:price_boundary]=price_boundary
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
puts @scraper.scrape(ARGV[0],ARGV[1],ARGV[2],ARGV[3],ARGV[4],ARGV[5])
|
17
|
+
if ARGV[5]=='color'
|
18
|
+
puts @scraper.scrape_contain_color(ARGV[0],options)
|
19
|
+
else
|
20
|
+
puts @scraper.scrape(ARGV[0],options)
|
21
|
+
end
|
22
|
+
|
18
23
|
#puts @scraper.scrape("shoes",{:keyword=>"none",:page_limit=>3,:price_boundary=>[0,600]})
|
24
|
+
#puts '-----------'
|
25
|
+
#puts @scraper.scrape("shoes",{:keyword=>"跟鞋",:page_limit=>1})
|
data/lib/stylemooncat/scraper.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
require 'oga'
|
3
3
|
require 'open-uri'
|
4
4
|
require 'open-uri-s3'
|
5
|
-
|
5
|
+
require 'iconv'
|
6
6
|
# scrape data
|
7
7
|
module StyleMoonCat
|
8
8
|
class Scraper
|
@@ -60,7 +60,7 @@ module StyleMoonCat
|
|
60
60
|
|
61
61
|
@@COLOR_ITEM_XPATH = "//option"
|
62
62
|
|
63
|
-
def scrape_contain_color(category,options)
|
63
|
+
def scrape_contain_color(category,options={})
|
64
64
|
@@IsScrapeColor=1
|
65
65
|
filter_results = scrape(category,options)
|
66
66
|
filter_results_with_color = filter_results.each do |x|
|
@@ -87,10 +87,12 @@ module StyleMoonCat
|
|
87
87
|
item.text.split(" ")[0].split(":")[1]
|
88
88
|
end
|
89
89
|
|
90
|
-
def scrape(category,options)
|
90
|
+
def scrape(category,options={})
|
91
|
+
options[:keyword]
|
92
|
+
|
93
|
+
# keyword= Iconv.conv('big5','utf-8',options[:keyword])
|
91
94
|
keyword= options[:keyword]
|
92
95
|
page_limit=options[:page_limit]
|
93
|
-
puts options
|
94
96
|
|
95
97
|
if options[:price_boundary]!= nil && options[:price_boundary].length ==2
|
96
98
|
if options[:price_boundary][0].to_i>options[:price_boundary][1].to_i
|
@@ -211,7 +213,7 @@ module StyleMoonCat
|
|
211
213
|
|
212
214
|
private
|
213
215
|
def uri_with_keyword(uri, keyword)
|
214
|
-
"#{uri}&keyword=#{keyword}"
|
216
|
+
"#{uri}&keyword=#{URI.escape(keyword)}"
|
215
217
|
end
|
216
218
|
|
217
219
|
def uri_with_page(uri, page)
|
@@ -266,7 +268,7 @@ module StyleMoonCat
|
|
266
268
|
|
267
269
|
def extract_images(item)
|
268
270
|
result=[]
|
269
|
-
result.push(item.xpath(@@IMAGE_XPATH).attribute(:src).first.value)
|
271
|
+
result.push('http://www.stylemooncat.com.tw'+item.xpath(@@IMAGE_XPATH).attribute(:src).first.value)
|
270
272
|
|
271
273
|
end
|
272
274
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stylemooncat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Even Chang
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-01-
|
14
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: This is a gem scraping StyleMoonCat's website.Input category name,page
|
17
17
|
limit,searcing keyword,and price range,and it will return the items with title,price,image,and
|