stylemooncat 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/stylemooncat +1 -0
- data/lib/stylemooncat/scraper.rb +142 -80
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37605c582c4d3a6f49efc9a54d55c85a477d82d4
|
4
|
+
data.tar.gz: a9c7415659c0a44728be90e95b5fc1d18a3ad50b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d9b67cb94c808f8109e4aafa78906137b91dfda2ecc4da2bf7067c47a5cb992d71867e6e79d62c4bf720eacff7f3fcdd962061c99bb8c4a8364db67d6a0cedb
|
7
|
+
data.tar.gz: 56d7d6788c4a08e0bf6bec75c63f1863ccf575624f0c815a915242c3e52e883b48e97d02ecfb952477357ad6c2cbaccbd805851f0c37322c22394841c1610999
|
data/bin/stylemooncat
CHANGED
data/lib/stylemooncat/scraper.rb
CHANGED
@@ -56,92 +56,155 @@ module StyleMoonCat
|
|
56
56
|
# Regular ?
|
57
57
|
@@TITLE_REGEX = /([.\p{Han}[a-zA-Z]]+)/
|
58
58
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
uri = uri_with_page(@@BAG_URI, page)
|
104
|
-
when "accessories"
|
105
|
-
uri = uri_with_page(@@ACCESSORIES_URI, page)
|
106
|
-
when "accessories_watch"
|
107
|
-
uri = uri_with_page(@@ACCESSORIES_Watch_URI, page)
|
108
|
-
when "accessories_necklace"
|
109
|
-
uri = uri_with_page(@@ACCESSORIES_Necklace_URI, page)
|
110
|
-
when "accessories_ring"
|
111
|
-
uri = uri_with_page(@@ACCESSORIES_Ring_URI, page)
|
112
|
-
when "accessories_bracelet"
|
113
|
-
uri = uri_with_page(@@ACCESSORIES_Bracelet_URI, page)
|
114
|
-
when "accessories_earring"
|
115
|
-
uri = uri_with_page(@@ACCESSORIES_Earring_URI, page)
|
116
|
-
when "accessories_muffler"
|
117
|
-
uri = uri_with_page(@@ACCESSORIES_Muffler_URI, page)
|
118
|
-
when "accessories_belt"
|
119
|
-
uri = uri_with_page(@@ACCESSORIES_Belt_URI, page)
|
120
|
-
when "accessories_haircap"
|
121
|
-
uri = uri_with_page(@@ACCESSORIES_Haircap_URI, page)
|
122
|
-
when "accessories_glasses"
|
123
|
-
uri = uri_with_page(@@ACCESSORIES_Glasses_URI, page)
|
124
|
-
when "accessories_socks"
|
125
|
-
uri = uri_with_page(@@ACCESSORIES_Socks_URI, page)
|
126
|
-
when "accessories_underwear"
|
127
|
-
uri = uri_with_page(@@ACCESSORIES_Underwear_URI, page)
|
128
|
-
when "accessories_others"
|
129
|
-
uri = uri_with_page(@@ACCESSORIES_Others_URI, page)
|
59
|
+
@@IsScrpeColor=0;
|
60
|
+
|
61
|
+
@@COLOR_ITEM_XPATH = "//option"
|
62
|
+
|
63
|
+
def scrape_contain_color(category,options)
|
64
|
+
@@IsScrapeColor=1
|
65
|
+
filter_results = scrape(category,options)
|
66
|
+
filter_results_with_color = filter_results.each do |x|
|
67
|
+
# puts x[:link]
|
68
|
+
body = fetch_data(x[:link])
|
69
|
+
color = color_extract(body)
|
70
|
+
x[:colors]= color
|
71
|
+
end
|
72
|
+
|
73
|
+
return filter_results_with_color
|
74
|
+
end
|
75
|
+
|
76
|
+
def color_extract(raw)
|
77
|
+
# puts Oga.parse_html(raw).xpath(@@ITEM_XPATH).map { |item| parse(item) }
|
78
|
+
result = Oga.parse_html(raw)
|
79
|
+
.xpath(@@COLOR_ITEM_XPATH)
|
80
|
+
.select { |item| item.text.length >4 }
|
81
|
+
.map { |item| color_parse(item) }
|
82
|
+
.uniq
|
83
|
+
return result
|
84
|
+
end
|
85
|
+
|
86
|
+
def color_parse(item)
|
87
|
+
item.text.split(" ")[0].split(":")[1]
|
88
|
+
end
|
89
|
+
|
90
|
+
def scrape(category,options)
|
91
|
+
keyword= options[:keyword]
|
92
|
+
page_limit=options[:page_limit]
|
93
|
+
puts options
|
94
|
+
|
95
|
+
if options[:price_boundary]!= nil && options[:price_boundary].length ==2
|
96
|
+
if options[:price_boundary][0].to_i>options[:price_boundary][1].to_i
|
97
|
+
price_from = options[:price_boundary][1]
|
98
|
+
price_to = options[:price_boundary][0]
|
99
|
+
else
|
100
|
+
price_from = options[:price_boundary][0]
|
101
|
+
price_to = options[:price_boundary][1]
|
102
|
+
end
|
130
103
|
else
|
131
|
-
|
104
|
+
price_from = -1
|
105
|
+
price_to = -1
|
132
106
|
end
|
107
|
+
@filter_results=[]
|
108
|
+
@count=1
|
109
|
+
1.upto(page_limit) do
|
110
|
+
page = @count
|
111
|
+
case category
|
112
|
+
when "newarrival"
|
113
|
+
uri = uri_with_page(@@NEW_ARRIVALS_URI, page)
|
114
|
+
when "lastweek"
|
115
|
+
uri = uri_with_page(@@LAST_WEEK_URI, page)
|
116
|
+
when "specialdiscount"
|
117
|
+
uri = uri_with_page(@@SPECIAL_DISCOUNT_URI, page)
|
118
|
+
when "top"
|
119
|
+
uri = uri_with_page(@@TOP_URI, page)
|
120
|
+
when "top_clothes"
|
121
|
+
uri = uri_with_page(@@TOPS_Clothes_URI, page)
|
122
|
+
when "top_Tshirt"
|
123
|
+
uri = uri_with_page(@@TOPS_Tshirt_URI, page)
|
124
|
+
when "top_vest"
|
125
|
+
uri = uri_with_page(@@TOPS_Vest_URI, page)
|
126
|
+
when "top_blouse"
|
127
|
+
uri = uri_with_page(@@TOPS_Blouse_URI, page)
|
128
|
+
when "top_knit"
|
129
|
+
uri = uri_with_page(@@TOPS_Knit_URI, page)
|
130
|
+
when "bottom"
|
131
|
+
uri = uri_with_page(@@BOTTOM_URI, page)
|
132
|
+
when "bottom_pants"
|
133
|
+
uri = uri_with_page(@@BOTTOM_Pants_URI, page)
|
134
|
+
when "bottom_skirts"
|
135
|
+
uri = uri_with_page(@@BOTTOM_Skirts_URI, page)
|
136
|
+
when "outer"
|
137
|
+
uri = uri_with_page(@@OUTER_URI, page)
|
138
|
+
when "outer_coat"
|
139
|
+
uri = uri_with_page(@@OUTER_Coat_URI, page)
|
140
|
+
when "outer_jacket"
|
141
|
+
uri = uri_with_page(@@OUTER_Jacket_URI, page)
|
142
|
+
when "outer_knit"
|
143
|
+
uri = uri_with_page(@@OUTER_Knit_URI, page)
|
144
|
+
when "outer_vest"
|
145
|
+
uri = uri_with_page(@@OUTER_Vest_URI, page)
|
133
146
|
|
134
|
-
|
135
|
-
|
147
|
+
when "dress"
|
148
|
+
uri = uri_with_page(@@DRESS_URI, page)
|
149
|
+
when "shoes_and_bag"
|
150
|
+
uri = uri_with_page(@@SHOES_AND_BAGS_URI, page)
|
151
|
+
when "shoes"
|
152
|
+
uri = uri_with_page(@@SHOES_URI, page)
|
153
|
+
when "bag"
|
154
|
+
uri = uri_with_page(@@BAG_URI, page)
|
155
|
+
when "accessories"
|
156
|
+
uri = uri_with_page(@@ACCESSORIES_URI, page)
|
157
|
+
when "accessories_watch"
|
158
|
+
uri = uri_with_page(@@ACCESSORIES_Watch_URI, page)
|
159
|
+
when "accessories_necklace"
|
160
|
+
uri = uri_with_page(@@ACCESSORIES_Necklace_URI, page)
|
161
|
+
when "accessories_ring"
|
162
|
+
uri = uri_with_page(@@ACCESSORIES_Ring_URI, page)
|
163
|
+
when "accessories_bracelet"
|
164
|
+
uri = uri_with_page(@@ACCESSORIES_Bracelet_URI, page)
|
165
|
+
when "accessories_earring"
|
166
|
+
uri = uri_with_page(@@ACCESSORIES_Earring_URI, page)
|
167
|
+
when "accessories_muffler"
|
168
|
+
uri = uri_with_page(@@ACCESSORIES_Muffler_URI, page)
|
169
|
+
when "accessories_belt"
|
170
|
+
uri = uri_with_page(@@ACCESSORIES_Belt_URI, page)
|
171
|
+
when "accessories_haircap"
|
172
|
+
uri = uri_with_page(@@ACCESSORIES_Haircap_URI, page)
|
173
|
+
when "accessories_glasses"
|
174
|
+
uri = uri_with_page(@@ACCESSORIES_Glasses_URI, page)
|
175
|
+
when "accessories_socks"
|
176
|
+
uri = uri_with_page(@@ACCESSORIES_Socks_URI, page)
|
177
|
+
when "accessories_underwear"
|
178
|
+
uri = uri_with_page(@@ACCESSORIES_Underwear_URI, page)
|
179
|
+
when "accessories_others"
|
180
|
+
uri = uri_with_page(@@ACCESSORIES_Others_URI, page)
|
181
|
+
else
|
182
|
+
uri = uri_with_page(@@ALL_ITEMS_URI, page)
|
183
|
+
end
|
184
|
+
|
185
|
+
if (keyword != "none") && (keyword != nil)
|
186
|
+
uri = uri_with_keyword(uri,keyword)
|
187
|
+
end
|
188
|
+
# puts uri
|
189
|
+
body = fetch_data(uri)
|
190
|
+
@filter_results = filter(body)
|
191
|
+
|
192
|
+
if @count==1
|
193
|
+
@combine_filter_results = @filter_results
|
194
|
+
@count +=1
|
195
|
+
else
|
196
|
+
if @filter_results.length>0
|
197
|
+
@combine_filter_results= @final_filter_results.concat(@filter_results)
|
198
|
+
end
|
199
|
+
end
|
136
200
|
end
|
137
|
-
|
138
|
-
|
139
|
-
filter_results = filter(body)
|
201
|
+
@count=1
|
202
|
+
|
140
203
|
#filter with price if there are correct price parameters
|
141
204
|
if price_to!=nil && price_from!=nil && price_to.to_i >=price_from.to_i && price_from.to_i !=-1 && price_to.to_i !=-1
|
142
|
-
return
|
205
|
+
return @combine_filter_results.select{|x| x[:price].to_i<=price_to.to_i && x[:price].to_i>=price_from.to_i }
|
143
206
|
else
|
144
|
-
return
|
207
|
+
return @combine_filter_results
|
145
208
|
end
|
146
209
|
end
|
147
210
|
|
@@ -156,7 +219,6 @@ module StyleMoonCat
|
|
156
219
|
end
|
157
220
|
|
158
221
|
def fetch_data(uri)
|
159
|
-
# puts uri
|
160
222
|
open(uri) {|file| file.read}
|
161
223
|
end
|
162
224
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stylemooncat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Even Chang
|
@@ -11,10 +11,10 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2016-01-
|
14
|
+
date: 2016-01-03 00:00:00.000000000 Z
|
15
15
|
dependencies: []
|
16
16
|
description: This is a gem scraping StyleMoonCat's website.Input category name,page
|
17
|
-
|
17
|
+
limit,searcing keyword,and price range,and it will return the items with title,price,image,and
|
18
18
|
link in the page of the category
|
19
19
|
email:
|
20
20
|
- kiki44552002@gmail.com
|