libcraigscrape 1.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +12 -1
- data/Gemfile +12 -0
- data/Rakefile +1 -54
- data/bin/craig_report_schema.yml +4 -1
- data/bin/craigwatch +148 -146
- data/bin/report_mailer/report.html.erb +20 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +7 -6
- data/lib/geo_listings.rb +1 -1
- data/lib/libcraigscrape.rb +52 -59
- data/lib/listings.rb +75 -39
- data/lib/posting.rb +120 -63
- data/lib/scraper.rb +43 -63
- data/spec/assets/geolisting_iso_us_120412.html +441 -0
- data/spec/assets/listing_cta_ftl_112612.html +1470 -0
- data/spec/assets/listing_rea_miami_123012.html +1397 -0
- data/spec/assets/listing_search_ppa_nyc_121212.html +1584 -0
- data/spec/assets/posting_daytona_art_120512-2.html +160 -0
- data/spec/assets/posting_daytona_art_120512.html +153 -0
- data/spec/assets/posting_mdc_cto_ftl_112612.html +170 -0
- data/spec/assets/posting_mdc_reb_120612.html +183 -0
- data/spec/assets/posting_sfbay_1226.html +157 -0
- data/spec/assets/posting_sya_121012-2.html +122 -0
- data/spec/assets/posting_sya_121012.html +165 -0
- data/spec/assets/this_post_has_expired_old.html +48 -0
- data/spec/geolisting_spec.rb +9 -0
- data/spec/listings_spec.rb +77 -0
- data/spec/postings_spec.rb +157 -0
- data/spec/spec_helper.rb +8 -0
- data/test/test_craigslist_geolisting.rb +5 -5
- data/test/test_craigslist_listing.rb +30 -30
- data/test/test_craigslist_posting.rb +25 -145
- metadata +200 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
data/CHANGELOG
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
== Change Log
|
2
2
|
|
3
|
+
=== Release 1.1
|
4
|
+
- ruby 1.9.3 support
|
5
|
+
- migrated from rails 2 gems to rails 3
|
6
|
+
- Replaced Net:Http with typhoeus
|
7
|
+
- Switched to the money gem for price storage
|
8
|
+
- fixed some new parsing bugs introduced by craigslist template changes
|
9
|
+
- added a tz parameter in the craigwatch report definition for overiding the
|
10
|
+
default timezone in your reports.
|
11
|
+
- added support for enable_starttls_auto and tls in craigwatch (Thanks olek!)
|
12
|
+
- added erb evaluation support to the craigwatch definition file (Thanks olek!)
|
13
|
+
|
3
14
|
=== Release 1.0
|
4
15
|
- Replaced hpricot dependency with Nokogiri. Nokogiri should be faster and more reliable. Whoo-hoo!
|
5
16
|
|
@@ -91,4 +102,4 @@
|
|
91
102
|
- Found an example of a screwy listing that was starting date h4's , but not actually listing the dates in them (see mia_fua_index8900.5.21.09.html test case). Added test case - handled appropriately. Seems like a bug in craigslist itself.
|
92
103
|
|
93
104
|
=== Release 0.5.0 (April 30, 2009)
|
94
|
-
- First release. Not much to say - hopefully someone else finds this useful.
|
105
|
+
- First release. Not much to say - hopefully someone else finds this useful.
|
data/Gemfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'htmlentities', '~>4.3'
|
4
|
+
gem 'nokogiri', '>= 1.4.4'
|
5
|
+
gem 'activerecord', '~>3.2.9'
|
6
|
+
gem 'activesupport', '~>3.2.9'
|
7
|
+
gem 'rspec'
|
8
|
+
gem 'kwalify', '~>0.7'
|
9
|
+
gem 'actionmailer', '~>3.2.9'
|
10
|
+
gem 'sqlite3', '~>1.3'
|
11
|
+
gem 'typhoeus', '~>0.5'
|
12
|
+
gem 'money', '~>5.0.0'
|
data/Rakefile
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'rake'
|
2
2
|
require 'rake/clean'
|
3
|
-
require '
|
4
|
-
require 'rake/rdoctask'
|
3
|
+
require 'rdoc/task'
|
5
4
|
require 'rake/testtask'
|
6
5
|
require 'fileutils'
|
7
6
|
require 'tempfile'
|
@@ -37,10 +36,6 @@ SPEC =
|
|
37
36
|
s.files = PKG_FILES
|
38
37
|
s.require_paths = ["lib"]
|
39
38
|
s.test_files = FileList['test/test_*.rb']
|
40
|
-
s.add_dependency 'nokogiri', '>= 1.4.4'
|
41
|
-
s.add_dependency 'htmlentities', '>= 4.0.0'
|
42
|
-
s.add_dependency 'activesupport','>= 2.3.0', '< 3'
|
43
|
-
s.add_dependency 'activerecord', '>= 2.3.0', '< 3'
|
44
39
|
end
|
45
40
|
|
46
41
|
desc "Run all the tests"
|
@@ -58,14 +53,6 @@ Rake::RDocTask.new do |rdoc|
|
|
58
53
|
rdoc.rdoc_files.add RDOC_FILES+Dir.glob('lib/*.rb').sort_by{|a,b| (a == 'lib/libcraigscrape.rb') ? -1 : 0 }
|
59
54
|
end
|
60
55
|
|
61
|
-
Rake::GemPackageTask.new(SPEC) do |p|
|
62
|
-
p.need_tar = false
|
63
|
-
p.need_tar_gz = false
|
64
|
-
p.need_tar_bz2 = false
|
65
|
-
p.need_zip = false
|
66
|
-
p.gem_spec = SPEC
|
67
|
-
end
|
68
|
-
|
69
56
|
task "lib" do
|
70
57
|
directory "lib"
|
71
58
|
end
|
@@ -79,44 +66,4 @@ task :uninstall => [:clean] do
|
|
79
66
|
sh %{sudo gem uninstall #{NAME}}
|
80
67
|
end
|
81
68
|
|
82
|
-
require 'roodi'
|
83
|
-
require 'roodi_task'
|
84
|
-
|
85
|
-
namespace :code_tests do
|
86
|
-
desc "Analyze for code complexity"
|
87
|
-
task :flog do
|
88
|
-
require 'flog'
|
89
|
-
|
90
|
-
flog = Flog.new
|
91
|
-
flog.flog_files ['lib']
|
92
|
-
threshold = 105
|
93
|
-
|
94
|
-
bad_methods = flog.totals.select do |name, score|
|
95
|
-
score > threshold
|
96
|
-
end
|
97
|
-
|
98
|
-
bad_methods.sort { |a,b| a[1] <=> b[1] }.each do |name, score|
|
99
|
-
puts "%8.1f: %s" % [score, name]
|
100
|
-
end
|
101
|
-
|
102
|
-
puts "WARNING : #{bad_methods.size} methods have a flog complexity > #{threshold}" unless bad_methods.empty?
|
103
|
-
end
|
104
|
-
|
105
|
-
desc "Analyze for code duplication"
|
106
|
-
require 'flay'
|
107
|
-
task :flay do
|
108
|
-
threshold = 25
|
109
|
-
flay = Flay.new({:fuzzy => false, :verbose => false, :mass => threshold})
|
110
|
-
flay.process(*Flay.expand_dirs_to_files(['lib']))
|
111
|
-
|
112
|
-
flay.report
|
113
|
-
|
114
|
-
raise "#{flay.masses.size} chunks of code have a duplicate mass > #{threshold}" unless flay.masses.empty?
|
115
|
-
end
|
116
|
-
|
117
|
-
RoodiTask.new 'roodi', ['lib/*.rb'], 'roodi.yml'
|
118
|
-
end
|
119
|
-
|
120
|
-
desc "Run all code tests"
|
121
|
-
task :code_tests => %w(code_tests:flog code_tests:flay code_tests:roodi)
|
122
69
|
|
data/bin/craig_report_schema.yml
CHANGED
@@ -4,6 +4,7 @@ mapping:
|
|
4
4
|
"debug_database": { type: bool, required: no }
|
5
5
|
"debug_mailer": { type: bool, required: no }
|
6
6
|
"debug_craigscrape": { type: bool, required: no }
|
7
|
+
"tz": { type: str, required: no }
|
7
8
|
|
8
9
|
"report_name": { type: str, required: no }
|
9
10
|
"email_to": { type: str, required: yes }
|
@@ -18,6 +19,8 @@ mapping:
|
|
18
19
|
"domain": { type: str, required: no }
|
19
20
|
"password": { type: str, required: no }
|
20
21
|
"authentication": { type: str, required: no }
|
22
|
+
"enable_starttls_auto": { type: bool, required: no }
|
23
|
+
"tls": { type: bool, required: no }
|
21
24
|
"tracking_database":
|
22
25
|
type: map
|
23
26
|
mapping:
|
@@ -63,4 +66,4 @@ mapping:
|
|
63
66
|
"starting":
|
64
67
|
type: str
|
65
68
|
required: no
|
66
|
-
pattern: /^[\d]{1,2}\/[\d]{1,2}\/(?:[\d]{2}|[\d]{4})$/
|
69
|
+
pattern: /^[\d]{1,2}\/[\d]{1,2}\/(?:[\d]{2}|[\d]{4})$/
|