olek-libcraigscrape 1.1.0 → 1.1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Sony Vaio</title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
+ <meta name="viewport" content="user-scalable=1;">
7
+ <link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=9380f84aa49cda76299b65a80eaa7d6a">
8
+
9
+ <!--[if lt IE 9]>
10
+ <script type="text/javascript" src="http://www.craigslist.org/js/html5shiv.js?v=ed7af45dcbda983c8455631037ebcdda"></script>
11
+ <![endif]-->
12
+ </head>
13
+
14
+ <body class="posting">
15
+
16
+ <article class="container">
17
+ <header class="bchead">
18
+ <a id="ef" href="https://accounts.craigslist.org/eaf?postingID=3469913065&amp;token=U2FsdGVkX18yNjEwMTI2MZhHRuoI2F55LEBh16JPts9t3hqjcgg0Er5mp5WnvvKRFaAEFeAXHVyq9BaziP8ZUbRWhzfTPRil">email this posting to a friend</a> <a href="http://miami.craigslist.org/">south florida craigslist</a> &gt; <a href="http://miami.craigslist.org/mdc/">miami / dade</a> &gt; <a href="http://miami.craigslist.org/mdc/sss/">for sale / wanted</a> &gt; <a href="http://miami.craigslist.org/mdc/syd/">computers - by dealer</a>
19
+ </header>
20
+
21
+ <section class="body">
22
+ <div id="flags">
23
+ <div id="flagMsg">
24
+ please flag with care:
25
+ <a href="http://www.craigslist.org/about/help/flags_and_community_moderation">[?]</a>
26
+ </div>
27
+ <div id="flagChooser">
28
+ <a class="fl" id="flag16" href="/flag/?flagCode=16&amp;postingID=3469913065"
29
+ title="Wrong category, wrong site, discusses another post, or otherwise misplaced">
30
+ miscategorized</a>
31
+ <a class="fl" id="flag28" href="/flag/?flagCode=28&amp;postingID=3469913065"
32
+ title="Violates craigslist Terms Of Use or other posted guidelines">
33
+ prohibited</a>
34
+ <a class="fl" id="flag15" href="/flag/?flagCode=15&amp;postingID=3469913065"
35
+ title="Posted too frequently, in multiple cities/categories, or is too commercial">
36
+ spam/overpost</a>
37
+ <a class="fl" id="flag9" href="/flag/?flagCode=9&amp;postingID=3469913065"
38
+ title="Should be considered for inclusion in the Best-Of-Craigslist">
39
+ best of craigslist</a>
40
+ </div>
41
+ </div>
42
+
43
+ <div class="tsb">
44
+ <em>Avoid scams and fraud by dealing locally!</em>
45
+ Beware any deal involving Western Union, Moneygram, wire transfer, cashier check, money order, shipping, escrow, or any promise of transaction protection/certification/guarantee
46
+ <a href="http://www.craigslist.org/about/scams">More info</a>
47
+ </div>
48
+
49
+ <h2 class="postingtitle">Sony Vaio - $480 (orlando,florida)</h2>
50
+
51
+ <section class="dateReplyBar">
52
+ <hr>
53
+ <div class="postingdate">Date: <time>2012-12-10, 8:51PM EST</time></div>
54
+
55
+ <script type="text/javascript"><!--
56
+ var isPreview = "";
57
+ var postingTitle = "Sony Vaio - $480 (orlando,florida)";
58
+ var bestOf = "";
59
+ var postingURL = "http%3A%2F%2Fmiami.craigslist.org%2Fmdc%2Fsyd%2F3469913065.html";
60
+ var displayEmail = "9cxgv-3469913065@sale.craigslist.org";
61
+
62
+ --></script>
63
+
64
+
65
+ <button id="reply_button">Reply to this post</button>
66
+
67
+ <span id="replytext">Reply to:</span> <small><a href="mailto:9cxgv-3469913065@sale.craigslist.org?subject=Sony%20Vaio%20-%20%24480%20(orlando%2Cflorida)&amp;body=%0A%0Ahttp%3A%2F%2Fmiami.craigslist.org%2Fmdc%2Fsyd%2F3469913065.html%0A">9cxgv-3469913065@sale.craigslist.org</a></small><sup>[<a href="http://www.craigslist.org/about/help/replying_to_posts" target="_blank">?</a>]</sup>
68
+
69
+ <div id="returnemail"> </div>
70
+
71
+
72
+ <hr>
73
+ </section>
74
+
75
+
76
+ <section id="userbody">
77
+ <br>Sony Vaio for sale! Its in great condition but I no longer hard need for it. No low ball offers!!<br><br><br><br>
78
+ <script type="text/javascript">
79
+ <!--
80
+ imgList = ["http://images.craigslist.org/3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1.jpg","http://images.craigslist.org/3K73L43J45G25H55J1cca2d0db7d75fe11448.jpg","http://images.craigslist.org/3E53Gc3F85I95K25M1ccaf6c5790cbd541b57.jpg","http://images.craigslist.org/3G83Kc3F15L45E75J9cca7e4e7fbdfe981fd9.jpg","http://images.craigslist.org/3Kc3ma3N65Le5Kf5U3ccae67bd8aa8129140c.jpg"];
81
+ // -->
82
+ </script>
83
+ <div class="iw">
84
+ <div id="ci">
85
+ <span><!-- --></span>
86
+ <img id="iwi" src="http://images.craigslist.org/3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1.jpg" alt="">
87
+ </div>
88
+ <div id="iwt">
89
+
90
+ <div class="tn">
91
+ <a href="http://images.craigslist.org/3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1.jpg" title="1">
92
+ <img src="http://images.craigslist.org/thumb/3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1.jpg" alt="image 1">
93
+ </a>
94
+ </div>
95
+
96
+ <div class="tn">
97
+ <a href="http://images.craigslist.org/3K73L43J45G25H55J1cca2d0db7d75fe11448.jpg" title="2">
98
+ <img src="http://images.craigslist.org/thumb/3K73L43J45G25H55J1cca2d0db7d75fe11448.jpg" alt="image 2">
99
+ </a>
100
+ </div>
101
+
102
+ <div class="tn">
103
+ <a href="http://images.craigslist.org/3E53Gc3F85I95K25M1ccaf6c5790cbd541b57.jpg" title="3">
104
+ <img src="http://images.craigslist.org/thumb/3E53Gc3F85I95K25M1ccaf6c5790cbd541b57.jpg" alt="image 3">
105
+ </a>
106
+ </div>
107
+
108
+ <div class="tn">
109
+ <a href="http://images.craigslist.org/3G83Kc3F15L45E75J9cca7e4e7fbdfe981fd9.jpg" title="4">
110
+ <img src="http://images.craigslist.org/thumb/3G83Kc3F15L45E75J9cca7e4e7fbdfe981fd9.jpg" alt="image 4">
111
+ </a>
112
+ </div>
113
+
114
+ <div class="tn">
115
+ <a href="http://images.craigslist.org/3Kc3ma3N65Le5Kf5U3ccae67bd8aa8129140c.jpg" title="5">
116
+ <img src="http://images.craigslist.org/thumb/3Kc3ma3N65Le5Kf5U3ccae67bd8aa8129140c.jpg" alt="image 5">
117
+ </a>
118
+ </div>
119
+
120
+ </div>
121
+ </div>
122
+
123
+ <!-- START CLTAGS -->
124
+ <br>
125
+ <ul class="blurbs">
126
+ <li> <!-- CLTAG GeographicArea=orlando,florida -->Location: orlando,florida</li>
127
+ <li>it's NOT ok to contact this poster with services or other commercial interests</li></ul>
128
+ <!-- END CLTAGS -->
129
+ </section>
130
+
131
+ <p class="postingidtext">PostingID:3469913065</p>
132
+
133
+
134
+
135
+ <br class="c">
136
+ </section>
137
+ <footer>
138
+ <ul class="clfooter">
139
+ <li>Copyright &copy; 2012 craigslist, inc.</li>
140
+ <li><a href="http://www.craigslist.org/about/terms.of.use">terms of use</a></li>
141
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
142
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
143
+ </ul>
144
+ </footer>
145
+
146
+ </article>
147
+
148
+
149
+ <script type="text/javascript"><!--
150
+ var pagetype = "posting";
151
+ var pID = "3469913065";
152
+ var wwwurl = "http://www.craigslist.org";
153
+
154
+ --></script>
155
+
156
+ <script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.7.2.js?v=89700834f1601ac3ebc3e5fb3302c040"></script>
157
+ <script type="text/javascript" src="http://www.craigslist.org/js/postings.js?v=d32f9ab28089cba9f207317b8c264ebb"></script>
158
+ <script type="text/javascript" src="http://www.craigslist.org/js/formats.js?v=3e34df20530f6579488bbac70a1e2e1d"></script>
159
+
160
+
161
+
162
+
163
+ </body>
164
+ </html>
165
+
@@ -0,0 +1,48 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2
+ <html>
3
+ <head>
4
+ <title></title>
5
+ <meta name="robots" content="NOARCHIVE,NOFOLLOW">
6
+ <link type="text/css" rel="stylesheet" media="all" href="http://www.craigslist.org/styles/craigslist.css?v=8">
7
+ </head>
8
+
9
+ <body class="posting">
10
+
11
+
12
+ <div class="bchead">
13
+
14
+ <a href="http://charleston.craigslist.org/">charleston craigslist</a> &gt;
15
+
16
+ <a href="http://charleston.craigslist.org/sss/">for sale / wanted</a> &gt;
17
+ <a href="http://charleston.craigslist.org/cto/">cars &amp; trucks - by owner</a>
18
+ </div>
19
+
20
+
21
+
22
+
23
+
24
+ <hr>
25
+ <br>
26
+ <br>
27
+ <h2>This posting has expired.</h2>
28
+ <h5>(The title on the listings page will be removed in just a few minutes.)</h5>
29
+
30
+ <br><br>
31
+
32
+ <hr>
33
+ <ul class="clfooter">
34
+ <li>Copyright &copy; 2011 craigslist, inc.</li>
35
+ <li><a href="http://www.craigslist.org/about/terms.of.use.html">terms of use</a></li>
36
+ <li><a href="http://www.craigslist.org/about/privacy_policy">privacy policy</a></li>
37
+ <li><a href="/forums/?forumID=8">feedback forum</a></li>
38
+ </ul>
39
+
40
+ <script type="text/javascript" src="http://www.craigslist.org/js/jquery-1.4.2.js"></script>
41
+ <script type="text/javascript" src="http://www.craigslist.org/js/postings.js"></script>
42
+ <script type="text/javascript"><!--
43
+ pID = 1968731193;
44
+ -->
45
+ </script>
46
+ </body>
47
+ </html>
48
+
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ describe CraigScrape::GeoListings do
4
+ context "geolisting_iso_us_120412.html" do
5
+ subject{ described_class.new uri_for('geolisting_iso_us_120412.html') }
6
+
7
+ its(:location){should eq('united states') }
8
+ end
9
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe CraigScrape::Listings do
5
+ context "listing_cta_ftl_112612.html" do
6
+ subject { described_class.new( uri_for('listing_cta_ftl_112612.html') ) }
7
+ specify{ subject.posts.should have(100).items }
8
+ specify{ subject.posts.collect(&:post_date).uniq.should eq([Time.parse('2012-11-26 00:00:00 -0500')]) }
9
+ specify{ subject.next_page_href.should eq('index100.html') }
10
+ end
11
+
12
+ context 'listing_search_ppa_nyc_121212.html' do
13
+ subject { described_class.new( uri_for('listing_search_ppa_nyc_121212.html') ) }
14
+
15
+ specify{ subject.posts.should have(100).items }
16
+ specify{ subject.posts.collect(&:post_date).uniq.should eq(['2012-12-12 00:00:00 -0500',
17
+ '2012-12-11 00:00:00 -0500', '2012-12-10 00:00:00 -0500'].collect{|t| Time.parse(t) }) }
18
+ specify{ subject.next_page_href.should eq('http://newyork.craigslist.org/search/ppa?query=kenmore&srchType=A&s=100') }
19
+ end
20
+ end
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe CraigScrape::Posting do
5
+ context "this_post_has_expired_old.html" do
6
+ subject{ described_class.new uri_for('this_post_has_expired_old.html') }
7
+
8
+ its(:posting_has_expired?){ should be_true }
9
+ end
10
+
11
+ context "posting_page_not_found_120512.html" do
12
+ subject{ described_class.new uri_for('posting_page_not_found_120512.html') }
13
+
14
+ its(:system_post?){ should be_true }
15
+ end
16
+
17
+ context "posting_sya_121012.html" do
18
+ # This example was picked since it has pics
19
+ subject{ described_class.new uri_for('posting_sya_121012.html') }
20
+
21
+ its(:full_section) {should eq(["south florida craigslist", "miami / dade", "for sale / wanted", "computers - by dealer"])}
22
+ its(:header) {should eq("Sony Vaio - $480 (orlando,florida)")}
23
+ its(:label) {should eq("Sony Vaio - $480")}
24
+ its(:title) {should eq("Sony Vaio")}
25
+ its(:location) {should eq('orlando,florida')}
26
+ its(:posting_id) {should eq(3469913065)}
27
+ its(:reply_to) {should eq('9cxgv-3469913065@sale.craigslist.org')}
28
+ its(:post_time) {should eq(Time.parse('2012-12-10 20:51:00 -0500'))}
29
+ its(:price) {should eq(480)}
30
+ its(:images) {should eq([])}
31
+ its(:pics) do
32
+ pics_list = ['3Eb3Fc3M25Ne5Ed5J6cca593d0c806b3614c1',
33
+ '3K73L43J45G25H55J1cca2d0db7d75fe11448', '3E53Gc3F85I95K25M1ccaf6c5790cbd541b57',
34
+ '3G83Kc3F15L45E75J9cca7e4e7fbdfe981fd9', '3Kc3ma3N65Le5Kf5U3ccae67bd8aa8129140c'
35
+ ].collect{|src| ['http://images.craigslist.org/', src, '.jpg'].join }
36
+ should eq( pics_list )
37
+ end
38
+ its(:img_types) {should eq([:pic])}
39
+ its(:contents_as_plain) {should eq("Sony Vaio for sale! Its in great condition but I no longer hard need for it. No low ball offers!!")}
40
+ its(:contents) {should eq("<br>Sony Vaio for sale! Its in great condition but I no longer hard need for it. No low ball offers!!<br><br><br><br>")}
41
+ end
42
+
43
+ context "posting_sya_121012-2.html" do
44
+ # This example was picked since it has images and no text
45
+ subject{ described_class.new uri_for('posting_sya_121012-2.html') }
46
+
47
+ its(:full_section) {should eq(["south florida craigslist", "broward county", "for sale / wanted", "computers - by dealer"])}
48
+ its(:header) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES - $80 (BROWARD)")}
49
+ its(:label) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES - $80")}
50
+ its(:title) {should eq("METRO PCS ★ANDROID★SMARTPHONE★ Samsung SCH Admire Red Clean ES")}
51
+ its(:location) {should eq('BROWARD')}
52
+ its(:posting_id) {should eq(3469905497)}
53
+ its(:reply_to) {should eq('z7jmh-3469905497@sale.craigslist.org')}
54
+ its(:post_time) {should eq(Time.parse('2012-12-10 20:47:00 -0500'))}
55
+ its(:price) {should eq(80)}
56
+ its(:images) do
57
+ images_list = ["http://i1157.photobucket.com/albums/p590/emy123000/T2eC16NE9s2fp7dBQuCykypg60_12.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg", "http://i1157.photobucket.com/albums/p590/emy123000/ScreenShot2012-06-25at60811AM.png", "http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg"]
58
+
59
+ should eq(images_list)
60
+ end
61
+ its(:pics) {should eq([])}
62
+ its(:img_types) {should eq([:img])}
63
+ its(:contents_as_plain) {should eq("")}
64
+ its(:contents) {should eq("<a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view&current=T2eC16NE9s2fp7dBQuCykypg60_12.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/T2eC16NE9s2fp7dBQuCykypg60_12.jpg\" border=\"0\" alt=\"Photobucket\"></a><br><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view&current=KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view&current=KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZqwFCS4TIRoZBQKvdVJIQ60_57.jpg\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view&current=ScreenShot2012-06-25at60811AM.png\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/ScreenShot2012-06-25at60811AM.png\" border=\"0\" alt=\"Photobucket\"></a><a href=\"http://s1157.photobucket.com/albums/p590/emy123000/?action=view&current=KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg\" target=\"_blank\" rel=\"nofollow\"><img src=\"http://i1157.photobucket.com/albums/p590/emy123000/KGrHqZowFCp4FZqoWBQvsVUbFdw60_12.jpg\" border=\"0\" alt=\"Photobucket\"></a>")}
65
+ end
66
+
67
+ context "posting_mdc_cto_ftl_112612.html" do
68
+ subject{ described_class.new uri_for('posting_mdc_cto_ftl_112612.html') }
69
+
70
+ its(:title) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap")}
71
+ its(:contents) {should eq("I am selling my 1999 Mustang GT with a 2008 GT 3 Valve engine swap. The car is a 5 speed with 3.73 rear gears. It has a Diablo Sport chip with 2 tunes. The rear tires have plentyt of tread and are Michelin Pilot sports ($700 less than a year ago). The rims are staggered and are less than a year old. The car has a 2003 Mustang Cobra hood. It also has a 2003 Cobra front bumper. The paint on the car is less than a year old. The bad: The car will need 2 front tires soon. They are currently Nitto 555's. The A/C Compressor will need changing soon as it is making a little noise. It works but I dont use it just in case. The drivers side seat needs to be re-upholstered also. Minor problems considering the work that went into this car. The instrument cluster is from a Mach 1 and shows 120,000 miles, but the engine has around 70,000 miles. <br><br>\nA lot of time and money was spent on this car to do the swap right. It is my daily driver and has never given me a single problem. It's always had Mobil 1 Synthetic oil and it's plenty fast and a lot of fun to drive. If you are looking for something unique and you are a Mustang fan, you might want to consider this car.<br><br>\nThe seats are from a newer model Mustang GT. The car runs really good, and turns heads everywhere it goes, especially when I pop the hood and those in the know see the engine that doesnt belong there. : )<br><br>\nI am will consider all offers so please don't be shy, the worse that I can do is say no. I am interested in some specific cars as well that I willl consider on trade. BMW 530 or 540, Infinity G35, Lexus IS 300 or GS300 or 400. I respond better to texts or emails. Call with any quesrtions. 305-310-5993 or email me at Torresa76@aol.com<br><br>\nThanks for looking. : )")}
72
+ its(:contents_as_plain) {should eq("I am selling my 1999 Mustang GT with a 2008 GT 3 Valve engine swap. The car is a 5 speed with 3.73 rear gears. It has a Diablo Sport chip with 2 tunes. The rear tires have plentyt of tread and are Michelin Pilot sports ($700 less than a year ago). The rims are staggered and are less than a year old. The car has a 2003 Mustang Cobra hood. It also has a 2003 Cobra front bumper. The paint on the car is less than a year old. The bad: The car will need 2 front tires soon. They are currently Nitto 555's. The A/C Compressor will need changing soon as it is making a little noise. It works but I dont use it just in case. The drivers side seat needs to be re-upholstered also. Minor problems considering the work that went into this car. The instrument cluster is from a Mach 1 and shows 120,000 miles, but the engine has around 70,000 miles. \nA lot of time and money was spent on this car to do the swap right. It is my daily driver and has never given me a single problem. It's always had Mobil 1 Synthetic oil and it's plenty fast and a lot of fun to drive. If you are looking for something unique and you are a Mustang fan, you might want to consider this car.\nThe seats are from a newer model Mustang GT. The car runs really good, and turns heads everywhere it goes, especially when I pop the hood and those in the know see the engine that doesnt belong there. : )\nI am will consider all offers so please don't be shy, the worse that I can do is say no. I am interested in some specific cars as well that I willl consider on trade. BMW 530 or 540, Infinity G35, Lexus IS 300 or GS300 or 400. I respond better to texts or emails. Call with any quesrtions. 305-310-5993 or email me at Torresa76@aol.com\nThanks for looking. : )")}
73
+ its(:full_section) {should eq(["south florida craigslist", "miami / dade", "for sale / wanted", "cars & trucks - by owner"])}
74
+ its(:header) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap - $8500 (Homestead)")}
75
+ its(:label) {should eq("1999 Mustang GT w/ '08 3 Valve Engine Swap - $8500")}
76
+ its(:location) {should eq('Homestead')}
77
+ its(:posting_id) {should eq(3437079882)}
78
+ its(:reply_to) {should eq(nil)}
79
+ its(:post_time) {should eq(Time.parse('2012-11-26 21:34:00 -0500'))}
80
+ its(:price) {should eq(8500)}
81
+ its(:images) {should eq([])}
82
+ its(:pics) do
83
+ pics_list = ['3M53of3H65N15E15M2cbqdd2e7af939c215a3',
84
+ '3G13F23Hd5I15Nb5T1cbqfb3e2605ddf31b8b', '3n13F23N25Lf5Y65Facbq05143722c4801267',
85
+ '3Ee3Ne3H85N85K15Hecbq79f17c0a2e03136e', '3me3pb3Nb5Le5Hd5Mdcbqe446ce3ce2ef1f80'
86
+ ].collect{|src| ['http://images.craigslist.org/', src, '.jpg'].join }
87
+ should eq( pics_list )
88
+ end
89
+ its(:img_types) {should eq([:pic])}
90
+ end
91
+
92
+ context "posting_daytona_art_120512.html" do
93
+ subject{ described_class.new uri_for('posting_daytona_art_120512.html') }
94
+
95
+ its(:full_section) {should eq(["daytona beach craigslist", "for sale / wanted", "arts & crafts - by owner"])}
96
+ its(:header) {should eq("METAL SCULPTURES GREAT Christmas gifts (ormond)")}
97
+ its(:label) {should eq("METAL SCULPTURES GREAT Christmas gifts")}
98
+ its(:title) {should eq("METAL SCULPTURES GREAT Christmas gifts")}
99
+ its(:location) {should eq('ormond')}
100
+ its(:posting_id) {should eq(3431080802)}
101
+ its(:reply_to) {should eq('rbwts-3431080802@sale.craigslist.org')}
102
+ its(:post_time) {should eq(Time.parse('2012-12-05 21:25:00 -0500'))}
103
+ its(:price) {should eq(nil)}
104
+ its(:images) {should eq([])}
105
+ its(:pics) {should eq(["http://images.craigslist.org/3Kb3M83I85Gc5Ea5H2cbo8eb0fb5e4af71968.jpg", "http://images.craigslist.org/3Lb3M33l35E85F35P0cbod80bd9115e311350.jpg", "http://images.craigslist.org/3Ef3Ib3H55L35K55J6cbof57b4d73878111d0.jpg"])}
106
+ its(:img_types) {should eq([:pic])}
107
+ its(:contents_as_plain) {should eq("Assorted Metal sculptures from local artist, call 386 235-4390")}
108
+ its(:contents) {should eq("Assorted Metal sculptures from local artist, call 386 235-4390")}
109
+ end
110
+
111
+ context "posting_daytona_art_120512-2.html" do
112
+ subject{ described_class.new uri_for('posting_daytona_art_120512-2.html') }
113
+
114
+ its(:full_section) {should eq(["daytona beach craigslist", "for sale / wanted", "arts & crafts - by owner"])}
115
+ its(:header) {should eq("Premier Bouquet Wrap - $2 (PALM COAST)")}
116
+ its(:label) {should eq("Premier Bouquet Wrap - $2")}
117
+ its(:title) {should eq("Premier Bouquet Wrap")}
118
+ its(:location) {should eq('PALM COAST')}
119
+ its(:posting_id) {should eq(3448282416)}
120
+ its(:reply_to) {should eq('nqmhm-3448282416@sale.craigslist.org')}
121
+ its(:post_time) {should eq(Time.parse('2012-12-01 15:02:00 -0500'))}
122
+ its(:price) {should eq(2)}
123
+ its(:images) {should eq([])}
124
+ its(:pics) {should eq(["http://images.craigslist.org/3I93pe3Hf5G75J55M2cc13e19b59314771029.jpg"])}
125
+ its(:img_types) {should eq([:pic])}
126
+ its(:contents_as_plain) {should eq("THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. \nPremier Bouquet Wrap\nFlower Bridal Bouquet Wrap White/Satin \nNew White Satin \nThe wraps are approximately 6 1/2\" Long \nThe bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems\nThis wrap is perfect for covering/decorating the stems on \"Wedding\", Quinceañera\" or \"Prom\" bouquets. They can also be used with \"Wedding bouquet\" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.\nPLEASE CALL . . .\nAJ-518-858-2002")}
127
+ its(:contents) {should eq("THESE ARE USED IN FLOWER / CRAFT SHOPS . ALL ARE NEW, BOXED AND AND VERY WELL MADE. I HAVE A CASE OF THESE I WILL SELL FOR ONE PRICE, OR WILL SELL BY THE PIECE. CASE PRICE IS FOR ABOUT 144 PIECES $75.00. <br><br>\nPremier Bouquet Wrap<br><br>\nFlower Bridal Bouquet Wrap White/Satin <br><br>\nNew White Satin <br><br>\nThe wraps are approximately 6 1/2\" Long <br><br>\nThe bridal bouquet wrap is a creative alternative to tying a ribbon around your flowers. Just slide the wrap around flower stems<br><br>\nThis wrap is perfect for covering/decorating the stems on \"Wedding\", Quinceañera\" or \"Prom\" bouquets. They can also be used with \"Wedding bouquet\" holder handles. These wraps are made with quality Satin material, easy to install and feels soft and smooth on the Bride's or Bridesmaid's hands. These wraps put the finishing touches on any Floral Wedding Bouquet.<br><br>\nPLEASE CALL . . .<br><br>\nAJ-518-858-2002<br><br><br>")}
128
+ end
129
+
130
+ context "posting_mdc_reb_120612.html" do
131
+ subject{ described_class.new uri_for('posting_mdc_reb_120612.html') }
132
+ its(:system_post?){ should be_false }
133
+
134
+ its(:full_section) {should eq(["south florida craigslist", "miami / dade", "housing", "real estate - by broker"])}
135
+ its(:header) {should eq("$1149000 / 3br - 2000ft² - ✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS (Florida Key Islamorada)")}
136
+ its(:label) {should eq("$1149000 / 3br - 2000ft² - ✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS")}
137
+ its(:title) {should eq("✱✱✱BEAUTIFUL HOUSE FOR SALE IN FLORIDA KEYS ")}
138
+ its(:location) {should eq('Florida Key Islamorada')}
139
+ its(:posting_id) {should eq(3438004368)}
140
+ its(:reply_to) {should eq('p7h8m-3438004368@hous.craigslist.org')}
141
+ its(:post_time) {should eq(Time.parse('2012-12-05 12:46:00 -0500'))}
142
+ its(:price) {should eq(1149000)}
143
+ its(:images) {should eq([])}
144
+ its(:pics) {should eq(["http://images.craigslist.org/3M43Jb3ld5L55Z35M5cbr12a6ec99f72d18e2.jpg", "http://images.craigslist.org/3L73H63l45I55L35G4cbr8902484988f3112f.jpg", "http://images.craigslist.org/3Le3Ic3Hf5I75La5M1cbrdd1617f48d4c1f02.jpg"])}
145
+ its(:img_types) {should eq([:pic])}
146
+ its(:contents_as_plain) {should eq("\u0095 $1,149,000.00 \n\u0095 2000ft²\n\u0095 3-bedroom\n\u0095 3-full bath\nJUST REDUCED FOR A QUICK SALE!!!!\nThis great Three Story 3-bedroom 3 Full bath home in Islamorada, Florida Keys is the perfect get-away to relax, enjoy the fresh breezes, the sandy beach and watch spectacular sunsets. A spacious master suite bedroom upstairs has a private bath with whirlpool Jacuzzi tub and private patio. Downstairs you have access to full kitchen custom cabinets, granite countertops, stainless steel appliances, open living-dining room, Travertine marble throughout the whole house, two bedrooms and two full baths. Enjoy a concrete dock and davits with boat access to the Gulf and ocean in less than five minutes via the deep-water canal right outside your door. Enjoy access to a private community beach with picnic tables and tiki huts for fun barbecues; jet skiing; boat ramp and recreational boating and water skiing. Don't miss out on these .Located in a quiet neighborhood at Mile Marker 74 in Islamorada, this location is tastefully furnished, has a gourmet kitchen and is ideal for boaters.\n(hablamos español) \nCall for appointment 305.467.6348 / 786.484.0917\nMarisol Acosta\nLicensed, Realtor Associate\nAkoya Realty LLC\nwww.akoyarealty.com") }
147
+ its(:contents) {should eq("\u0095 $1,149,000.00 <br>\n\u0095 2000ft²<br>\n\u0095 3-bedroom<br>\n\u0095 3-full bath<br><br>\nJUST REDUCED FOR A QUICK SALE!!!!<br><br>\nThis great Three Story 3-bedroom 3 Full bath home in Islamorada, Florida Keys is the perfect get-away to relax, enjoy the fresh breezes, the sandy beach and watch spectacular sunsets. A spacious master suite bedroom upstairs has a private bath with whirlpool Jacuzzi tub and private patio. Downstairs you have access to full kitchen custom cabinets, granite countertops, stainless steel appliances, open living-dining room, Travertine marble throughout the whole house, two bedrooms and two full baths. Enjoy a concrete dock and davits with boat access to the Gulf and ocean in less than five minutes via the deep-water canal right outside your door. Enjoy access to a private community beach with picnic tables and tiki huts for fun barbecues; jet skiing; boat ramp and recreational boating and water skiing. Don't miss out on these .Located in a quiet neighborhood at Mile Marker 74 in Islamorada, this location is tastefully furnished, has a gourmet kitchen and is ideal for boaters.<br><br>\n(hablamos español) <br><br>\nCall for appointment 305.467.6348 / 786.484.0917<br>\nMarisol Acosta<br>\nLicensed, Realtor Associate<br>\nAkoya Realty LLC<br>\nwww.akoyarealty.com") }
148
+ end
149
+ end
@@ -0,0 +1,8 @@
1
+ require_relative '../lib/libcraigscrape'
2
+ # require_relative '../test/libcraigscrape_test_helpers'
3
+ # include LibcraigscrapeTestHelpers
4
+
5
+ def uri_for(filename)
6
+ 'file://%s' % [ File.dirname(File.expand_path(__FILE__)),
7
+ 'assets', filename].join('/')
8
+ end
metadata CHANGED
@@ -1,38 +1,213 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: olek-libcraigscrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
- - Chris DeRose, DeRose Technologies, Inc.
8
+ - Chris DeRose
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-18 00:00:00.000000000 Z
13
- dependencies: []
14
- description: quick, easy, craigslist parsing library that takes the monotony out of
15
- working with craigslist posts and listings
16
- email: cderose@derosetechnologies.com
17
- executables:
18
- - craigwatch
12
+ date: 2012-12-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: htmlentities
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '4.3'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '4.3'
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: 1.4.4
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: 1.4.4
46
+ - !ruby/object:Gem::Dependency
47
+ name: activerecord
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: 3.2.9
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 3.2.9
62
+ - !ruby/object:Gem::Dependency
63
+ name: activesupport
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: 3.2.9
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 3.2.9
78
+ - !ruby/object:Gem::Dependency
79
+ name: rspec
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: kwalify
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: '0.7'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: '0.7'
110
+ - !ruby/object:Gem::Dependency
111
+ name: actionmailer
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: 3.2.9
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: 3.2.9
126
+ - !ruby/object:Gem::Dependency
127
+ name: sqlite3
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: '1.3'
134
+ type: :runtime
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: '1.3'
142
+ - !ruby/object:Gem::Dependency
143
+ name: typhoeus
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: '0.5'
150
+ type: :runtime
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: '0.5'
158
+ - !ruby/object:Gem::Dependency
159
+ name: rake
160
+ requirement: !ruby/object:Gem::Requirement
161
+ none: false
162
+ requirements:
163
+ - - ! '>='
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ type: :runtime
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ none: false
170
+ requirements:
171
+ - - ! '>='
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ - !ruby/object:Gem::Dependency
175
+ name: rspec
176
+ requirement: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: 2.12.0
182
+ type: :development
183
+ prerelease: false
184
+ version_requirements: !ruby/object:Gem::Requirement
185
+ none: false
186
+ requirements:
187
+ - - ! '>='
188
+ - !ruby/object:Gem::Version
189
+ version: 2.12.0
190
+ description: An easy library to do the heavy lifting between you and Craigslist‘s
191
+ posting database. Given a URL, libcraigscrape will follow links, scrape fields,
192
+ and make ruby-sense out of the raw html from craigslist‘s servers. libcraigscrape
193
+ was primarily developed to support the included craigwatch script. See the included
194
+ craigwatch script for examples of libcraigscape in action, and (hopefully) to serve
195
+ an immediate craigscraping need.
196
+ email: info@derosetechnologies.com
197
+ executables: []
19
198
  extensions: []
20
- extra_rdoc_files:
21
- - README
22
- - CHANGELOG
23
- - COPYING
24
- - COPYING.LESSER
25
- - bin/craigwatch
199
+ extra_rdoc_files: []
26
200
  files:
27
- - lib/libcraigscrape.rb
28
- - test/post_samples/1207457727.html
29
- - CHANGELOG
30
- - COPYING
31
- - COPYING.LESSER
32
- - bin/craigwatch
201
+ - Rakefile
33
202
  - bin/craig_report_schema.yml
203
+ - bin/craigwatch
34
204
  - bin/report_mailer/report.html.erb
35
205
  - bin/report_mailer/report.text.erb
206
+ - lib/geo_listings.rb
207
+ - lib/libcraigscrape.rb
208
+ - lib/listings.rb
209
+ - lib/posting.rb
210
+ - lib/scraper.rb
36
211
  - test/geolisting_samples/geo_listing_ca070209.html
37
212
  - test/geolisting_samples/geo_listing_ca_sk070209.html
38
213
  - test/geolisting_samples/geo_listing_cn070209.html
@@ -71,7 +246,7 @@ files:
71
246
  - test/listing_samples/miami_search_sss_rack.6.18.09/miami_search_sss_rack900.6.18.09.html
72
247
  - test/listing_samples/new_listing_span.4.17.10.html
73
248
  - test/listing_samples/short_search_output.html
74
- - README
249
+ - test/post_samples/1207457727.html
75
250
  - test/post_samples/brw_reb_1224008903.html
76
251
  - test/post_samples/posting0.html
77
252
  - test/post_samples/posting1.html
@@ -88,21 +263,30 @@ files:
88
263
  - test/test_craigslist_geolisting.rb
89
264
  - test/test_craigslist_listing.rb
90
265
  - test/test_craigslist_posting.rb
91
- - lib/geo_listings.rb
92
- - Rakefile
93
- - lib/listings.rb
94
- - lib/posting.rb
95
- - lib/scraper.rb
266
+ - spec/assets/geolisting_iso_us_120412.html
267
+ - spec/assets/listing_cta_ftl_112612.html
268
+ - spec/assets/listing_search_ppa_nyc_121212.html
269
+ - spec/assets/posting_daytona_art_120512-2.html
270
+ - spec/assets/posting_daytona_art_120512.html
271
+ - spec/assets/posting_mdc_cto_ftl_112612.html
272
+ - spec/assets/posting_mdc_reb_120612.html
273
+ - spec/assets/posting_page_not_found_120512.html
274
+ - spec/assets/posting_sya_121012-2.html
275
+ - spec/assets/posting_sya_121012.html
276
+ - spec/assets/this_post_has_expired_old.html
277
+ - spec/geolisting_spec.rb
278
+ - spec/listings_spec.rb
279
+ - spec/postings_spec.rb
280
+ - spec/spec_helper.rb
281
+ - Gemfile
282
+ - CHANGELOG
283
+ - COPYING
284
+ - COPYING.LESSER
285
+ - README
96
286
  homepage: http://www.derosetechnologies.com/community/libcraigscrape
97
287
  licenses: []
98
288
  post_install_message:
99
- rdoc_options:
100
- - --quiet
101
- - --title
102
- - The libcraigscrape Reference
103
- - --main
104
- - README
105
- - --inline-source
289
+ rdoc_options: []
106
290
  require_paths:
107
291
  - lib
108
292
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -111,26 +295,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
111
295
  - - ! '>='
112
296
  - !ruby/object:Gem::Version
113
297
  version: '0'
114
- segments:
115
- - 0
116
- hash: -1981985228483291129
117
298
  required_rubygems_version: !ruby/object:Gem::Requirement
118
299
  none: false
119
300
  requirements:
120
301
  - - ! '>='
121
302
  - !ruby/object:Gem::Version
122
303
  version: '0'
123
- segments:
124
- - 0
125
- hash: -1981985228483291129
126
304
  requirements: []
127
- rubyforge_project: libcraigwatch
305
+ rubyforge_project:
128
306
  rubygems_version: 1.8.23
129
307
  signing_key:
130
308
  specification_version: 3
131
309
  summary: quick, easy, craigslist parsing library that takes the monotony out of working
132
310
  with craigslist posts and listings
133
- test_files:
134
- - test/test_craigslist_geolisting.rb
135
- - test/test_craigslist_listing.rb
136
- - test/test_craigslist_posting.rb
311
+ test_files: []