libcraigscrape 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/Rakefile +1 -1
- data/bin/craigwatch +1 -1
- data/lib/listings.rb +1 -1
- data/lib/posting.rb +18 -7
- data/lib/scraper.rb +13 -5
- data/test/post_samples/posting1796890756-061710.html +2318 -0
- data/test/post_samples/posting1808219423.html +2473 -0
- data/test/test_craigslist_posting.rb +56 -1
- metadata +43 -17
@@ -300,7 +300,62 @@ EOD
|
|
300
300
|
assert_equal [], sfbay_art_1223614914.images
|
301
301
|
assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
|
302
302
|
assert_equal [:pic], sfbay_art_1223614914.img_types
|
303
|
-
|
303
|
+
end
|
304
|
+
|
305
|
+
# This is actually a 'bug' with hpricot itself when the ulimit is set too low.
|
306
|
+
# the Easy fix is running "ulimit -s 16384" before the tests. But the better fix was
|
307
|
+
# to remove the userbody sending these pages to be parsed by Hpricot
|
308
|
+
def test_bugs_found061710
|
309
|
+
posting_061710 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1796890756-061710.html')
|
310
|
+
|
311
|
+
assert_equal false, posting_061710.deleted_by_author?
|
312
|
+
assert_equal true, posting_061710.downloaded?
|
313
|
+
assert_equal false, posting_061710.flagged_for_removal?
|
314
|
+
assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting_061710.full_section
|
315
|
+
assert_equal false, posting_061710.has_img?
|
316
|
+
assert_equal false, posting_061710.has_pic?
|
317
|
+
assert_equal false, posting_061710.has_pic_or_img?
|
318
|
+
assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header
|
319
|
+
assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header_as_plain
|
320
|
+
assert_equal nil, posting_061710.href
|
321
|
+
assert_equal [], posting_061710.images
|
322
|
+
assert_equal [], posting_061710.img_types
|
323
|
+
assert_equal "*****SOFTWARE****", posting_061710.label
|
324
|
+
assert_equal "Dade/Broward", posting_061710.location
|
325
|
+
assert_equal [], posting_061710.pics
|
326
|
+
assert_equal [0, 0, 0, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_date.to_a
|
327
|
+
assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_time.to_a
|
328
|
+
assert_equal 1796890756, posting_061710.posting_id
|
329
|
+
assert_equal nil, posting_061710.price
|
330
|
+
assert_equal nil, posting_061710.reply_to
|
331
|
+
assert_equal "general for sale", posting_061710.section
|
332
|
+
assert_equal false, posting_061710.system_post?
|
333
|
+
assert_equal "*****SOFTWARE****", posting_061710.title
|
334
|
+
|
335
|
+
posting1808219423 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1808219423.html')
|
336
|
+
assert_equal false, posting1808219423.deleted_by_author?
|
337
|
+
assert_equal true, posting1808219423.downloaded?
|
338
|
+
assert_equal false, posting1808219423.flagged_for_removal?
|
339
|
+
assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting1808219423.full_section
|
340
|
+
assert_equal true, posting1808219423.has_img?
|
341
|
+
assert_equal false, posting1808219423.has_pic?
|
342
|
+
assert_equal true, posting1808219423.has_pic_or_img?
|
343
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header
|
344
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header_as_plain
|
345
|
+
assert_equal nil, posting1808219423.href
|
346
|
+
assert_equal ["http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg"], posting1808219423.images
|
347
|
+
assert_equal [:img], posting1808219423.img_types
|
348
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
|
349
|
+
assert_equal "Dade/Broward", posting1808219423.location
|
350
|
+
assert_equal [], posting1808219423.pics
|
351
|
+
assert_equal [0, 0, 0, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_date.to_a
|
352
|
+
assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_time.to_a
|
353
|
+
assert_equal 1808219423, posting1808219423.posting_id
|
354
|
+
assert_equal nil, posting1808219423.price
|
355
|
+
assert_equal nil, posting1808219423.reply_to
|
356
|
+
assert_equal "general for sale", posting1808219423.section
|
357
|
+
assert_equal false, posting1808219423.system_post?
|
358
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.title
|
304
359
|
end
|
305
360
|
|
306
361
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libcraigscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 57
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 8
|
9
|
+
- 3
|
10
|
+
version: 0.8.3
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Chris DeRose, DeRose Technologies, Inc.
|
@@ -9,39 +15,51 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-08-02 00:00:00 -04:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: hpricot
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
25
35
|
- !ruby/object:Gem::Dependency
|
26
36
|
name: htmlentities
|
27
|
-
|
28
|
-
|
29
|
-
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
30
40
|
requirements:
|
31
41
|
- - ">="
|
32
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
33
46
|
version: "0"
|
34
|
-
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
35
49
|
- !ruby/object:Gem::Dependency
|
36
50
|
name: activesupport
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
40
54
|
requirements:
|
41
55
|
- - ">="
|
42
56
|
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
43
60
|
version: "0"
|
44
|
-
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
45
63
|
description: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|
46
64
|
email: cderose@derosetechnologies.com
|
47
65
|
executables:
|
@@ -93,7 +111,9 @@ files:
|
|
93
111
|
- test/post_samples/posting1.html
|
94
112
|
- test/post_samples/posting0.html
|
95
113
|
- test/post_samples/posting5.html
|
114
|
+
- test/post_samples/posting1796890756-061710.html
|
96
115
|
- test/post_samples/posting3.html
|
116
|
+
- test/post_samples/posting1808219423.html
|
97
117
|
- test/post_samples/sfbay_art_1223614914.html
|
98
118
|
- test/post_samples/this_post_has_been_deleted_by_its_author.html
|
99
119
|
- test/post_samples/1207457727.html
|
@@ -135,21 +155,27 @@ rdoc_options:
|
|
135
155
|
require_paths:
|
136
156
|
- lib
|
137
157
|
required_ruby_version: !ruby/object:Gem::Requirement
|
158
|
+
none: false
|
138
159
|
requirements:
|
139
160
|
- - ">="
|
140
161
|
- !ruby/object:Gem::Version
|
162
|
+
hash: 3
|
163
|
+
segments:
|
164
|
+
- 0
|
141
165
|
version: "0"
|
142
|
-
version:
|
143
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
none: false
|
144
168
|
requirements:
|
145
169
|
- - ">="
|
146
170
|
- !ruby/object:Gem::Version
|
171
|
+
hash: 3
|
172
|
+
segments:
|
173
|
+
- 0
|
147
174
|
version: "0"
|
148
|
-
version:
|
149
175
|
requirements: []
|
150
176
|
|
151
177
|
rubyforge_project: libcraigwatch
|
152
|
-
rubygems_version: 1.3.
|
178
|
+
rubygems_version: 1.3.7
|
153
179
|
signing_key:
|
154
180
|
specification_version: 3
|
155
181
|
summary: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|