libcraigscrape 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/Rakefile +1 -1
- data/bin/craigwatch +1 -1
- data/lib/listings.rb +1 -1
- data/lib/posting.rb +18 -7
- data/lib/scraper.rb +13 -5
- data/test/post_samples/posting1796890756-061710.html +2318 -0
- data/test/post_samples/posting1808219423.html +2473 -0
- data/test/test_craigslist_posting.rb +56 -1
- metadata +43 -17
@@ -300,7 +300,62 @@ EOD
|
|
300
300
|
assert_equal [], sfbay_art_1223614914.images
|
301
301
|
assert_equal ["http://images.craigslist.org/3kf3o93laZZZZZZZZZ96fbc594a6ceb1f1025.jpg"], sfbay_art_1223614914.pics
|
302
302
|
assert_equal [:pic], sfbay_art_1223614914.img_types
|
303
|
-
|
303
|
+
end
|
304
|
+
|
305
|
+
# This is actually a 'bug' with hpricot itself when the ulimit is set too low.
|
306
|
+
# the Easy fix is running "ulimit -s 16384" before the tests. But the better fix was
|
307
|
+
# to remove the userbody sending these pages to be parsed by Hpricot
|
308
|
+
def test_bugs_found061710
|
309
|
+
posting_061710 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1796890756-061710.html')
|
310
|
+
|
311
|
+
assert_equal false, posting_061710.deleted_by_author?
|
312
|
+
assert_equal true, posting_061710.downloaded?
|
313
|
+
assert_equal false, posting_061710.flagged_for_removal?
|
314
|
+
assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting_061710.full_section
|
315
|
+
assert_equal false, posting_061710.has_img?
|
316
|
+
assert_equal false, posting_061710.has_pic?
|
317
|
+
assert_equal false, posting_061710.has_pic_or_img?
|
318
|
+
assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header
|
319
|
+
assert_equal "*****SOFTWARE**** (Dade/Broward)", posting_061710.header_as_plain
|
320
|
+
assert_equal nil, posting_061710.href
|
321
|
+
assert_equal [], posting_061710.images
|
322
|
+
assert_equal [], posting_061710.img_types
|
323
|
+
assert_equal "*****SOFTWARE****", posting_061710.label
|
324
|
+
assert_equal "Dade/Broward", posting_061710.location
|
325
|
+
assert_equal [], posting_061710.pics
|
326
|
+
assert_equal [0, 0, 0, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_date.to_a
|
327
|
+
assert_equal [0, 22, 13, 17, 6, 2010, 4, 168, true, "EDT"], posting_061710.post_time.to_a
|
328
|
+
assert_equal 1796890756, posting_061710.posting_id
|
329
|
+
assert_equal nil, posting_061710.price
|
330
|
+
assert_equal nil, posting_061710.reply_to
|
331
|
+
assert_equal "general for sale", posting_061710.section
|
332
|
+
assert_equal false, posting_061710.system_post?
|
333
|
+
assert_equal "*****SOFTWARE****", posting_061710.title
|
334
|
+
|
335
|
+
posting1808219423 = CraigScrape::Posting.new relative_uri_for('post_samples/posting1808219423.html')
|
336
|
+
assert_equal false, posting1808219423.deleted_by_author?
|
337
|
+
assert_equal true, posting1808219423.downloaded?
|
338
|
+
assert_equal false, posting1808219423.flagged_for_removal?
|
339
|
+
assert_equal ["south florida craigslist", "miami / dade", "for sale / wanted", "general for sale"], posting1808219423.full_section
|
340
|
+
assert_equal true, posting1808219423.has_img?
|
341
|
+
assert_equal false, posting1808219423.has_pic?
|
342
|
+
assert_equal true, posting1808219423.has_pic_or_img?
|
343
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header
|
344
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More (Dade/Broward)", posting1808219423.header_as_plain
|
345
|
+
assert_equal nil, posting1808219423.href
|
346
|
+
assert_equal ["http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg", "http://i800.photobucket.com/albums/yy287/todofull69/Programas/office-2010.jpg", "http://i844.photobucket.com/albums/ab10/fziqe/adobeblogcopy.jpg", "http://i31.photobucket.com/albums/c383/drapizan/RosettaStone.jpg", "http://i1002.photobucket.com/albums/af142/tagurtoast/Windows_7.jpg"], posting1808219423.images
|
347
|
+
assert_equal [:img], posting1808219423.img_types
|
348
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.label
|
349
|
+
assert_equal "Dade/Broward", posting1808219423.location
|
350
|
+
assert_equal [], posting1808219423.pics
|
351
|
+
assert_equal [0, 0, 0, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_date.to_a
|
352
|
+
assert_equal [0, 35, 7, 24, 6, 2010, 4, 175, true, "EDT"], posting1808219423.post_time.to_a
|
353
|
+
assert_equal 1808219423, posting1808219423.posting_id
|
354
|
+
assert_equal nil, posting1808219423.price
|
355
|
+
assert_equal nil, posting1808219423.reply_to
|
356
|
+
assert_equal "general for sale", posting1808219423.section
|
357
|
+
assert_equal false, posting1808219423.system_post?
|
358
|
+
assert_equal "*Software*AdobeCS5*RosettaStone*AutoCAD*Windows7*Office2010*&* More", posting1808219423.title
|
304
359
|
end
|
305
360
|
|
306
361
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libcraigscrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 57
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 8
|
9
|
+
- 3
|
10
|
+
version: 0.8.3
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Chris DeRose, DeRose Technologies, Inc.
|
@@ -9,39 +15,51 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-
|
18
|
+
date: 2010-08-02 00:00:00 -04:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: hpricot
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
25
35
|
- !ruby/object:Gem::Dependency
|
26
36
|
name: htmlentities
|
27
|
-
|
28
|
-
|
29
|
-
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
30
40
|
requirements:
|
31
41
|
- - ">="
|
32
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
33
46
|
version: "0"
|
34
|
-
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id002
|
35
49
|
- !ruby/object:Gem::Dependency
|
36
50
|
name: activesupport
|
37
|
-
|
38
|
-
|
39
|
-
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
40
54
|
requirements:
|
41
55
|
- - ">="
|
42
56
|
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
43
60
|
version: "0"
|
44
|
-
|
61
|
+
type: :runtime
|
62
|
+
version_requirements: *id003
|
45
63
|
description: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|
46
64
|
email: cderose@derosetechnologies.com
|
47
65
|
executables:
|
@@ -93,7 +111,9 @@ files:
|
|
93
111
|
- test/post_samples/posting1.html
|
94
112
|
- test/post_samples/posting0.html
|
95
113
|
- test/post_samples/posting5.html
|
114
|
+
- test/post_samples/posting1796890756-061710.html
|
96
115
|
- test/post_samples/posting3.html
|
116
|
+
- test/post_samples/posting1808219423.html
|
97
117
|
- test/post_samples/sfbay_art_1223614914.html
|
98
118
|
- test/post_samples/this_post_has_been_deleted_by_its_author.html
|
99
119
|
- test/post_samples/1207457727.html
|
@@ -135,21 +155,27 @@ rdoc_options:
|
|
135
155
|
require_paths:
|
136
156
|
- lib
|
137
157
|
required_ruby_version: !ruby/object:Gem::Requirement
|
158
|
+
none: false
|
138
159
|
requirements:
|
139
160
|
- - ">="
|
140
161
|
- !ruby/object:Gem::Version
|
162
|
+
hash: 3
|
163
|
+
segments:
|
164
|
+
- 0
|
141
165
|
version: "0"
|
142
|
-
version:
|
143
166
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
|
+
none: false
|
144
168
|
requirements:
|
145
169
|
- - ">="
|
146
170
|
- !ruby/object:Gem::Version
|
171
|
+
hash: 3
|
172
|
+
segments:
|
173
|
+
- 0
|
147
174
|
version: "0"
|
148
|
-
version:
|
149
175
|
requirements: []
|
150
176
|
|
151
177
|
rubyforge_project: libcraigwatch
|
152
|
-
rubygems_version: 1.3.
|
178
|
+
rubygems_version: 1.3.7
|
153
179
|
signing_key:
|
154
180
|
specification_version: 3
|
155
181
|
summary: quick, easy, craigslist parsing library that takes the monotony out of working with craigslist posts and listings
|