libcraigscrape 1.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +12 -1
- data/Gemfile +12 -0
- data/Rakefile +1 -54
- data/bin/craig_report_schema.yml +4 -1
- data/bin/craigwatch +148 -146
- data/bin/report_mailer/report.html.erb +20 -0
- data/bin/report_mailer/{craigslist_report.plain.erb → report.text.erb} +7 -6
- data/lib/geo_listings.rb +1 -1
- data/lib/libcraigscrape.rb +52 -59
- data/lib/listings.rb +75 -39
- data/lib/posting.rb +120 -63
- data/lib/scraper.rb +43 -63
- data/spec/assets/geolisting_iso_us_120412.html +441 -0
- data/spec/assets/listing_cta_ftl_112612.html +1470 -0
- data/spec/assets/listing_rea_miami_123012.html +1397 -0
- data/spec/assets/listing_search_ppa_nyc_121212.html +1584 -0
- data/spec/assets/posting_daytona_art_120512-2.html +160 -0
- data/spec/assets/posting_daytona_art_120512.html +153 -0
- data/spec/assets/posting_mdc_cto_ftl_112612.html +170 -0
- data/spec/assets/posting_mdc_reb_120612.html +183 -0
- data/spec/assets/posting_sfbay_1226.html +157 -0
- data/spec/assets/posting_sya_121012-2.html +122 -0
- data/spec/assets/posting_sya_121012.html +165 -0
- data/spec/assets/this_post_has_expired_old.html +48 -0
- data/spec/geolisting_spec.rb +9 -0
- data/spec/listings_spec.rb +77 -0
- data/spec/postings_spec.rb +157 -0
- data/spec/spec_helper.rb +8 -0
- data/test/test_craigslist_geolisting.rb +5 -5
- data/test/test_craigslist_listing.rb +30 -30
- data/test/test_craigslist_posting.rb +25 -145
- metadata +200 -114
- data/bin/report_mailer/craigslist_report.html.erb +0 -17
data/CHANGELOG
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
== Change Log
|
2
2
|
|
3
|
+
=== Release 1.1
|
4
|
+
- ruby 1.9.3 support
|
5
|
+
- migrated from rails 2 gems to rails 3
|
6
|
+
- Replaced Net:Http with typhoeus
|
7
|
+
- Switched to the money gem for price storage
|
8
|
+
- fixed some new parsing bugs introduced by craigslist template changes
|
9
|
+
- added a tz parameter in the craigwatch report definition for overiding the
|
10
|
+
default timezone in your reports.
|
11
|
+
- added support for enable_starttls_auto and tls in craigwatch (Thanks olek!)
|
12
|
+
- added erb evaluation support to the craigwatch definition file (Thanks olek!)
|
13
|
+
|
3
14
|
=== Release 1.0
|
4
15
|
- Replaced hpricot dependency with Nokogiri. Nokogiri should be faster and more reliable. Whoo-hoo!
|
5
16
|
|
@@ -91,4 +102,4 @@
|
|
91
102
|
- Found an example of a screwy listing that was starting date h4's , but not actually listing the dates in them (see mia_fua_index8900.5.21.09.html test case). Added test case - handled appropriately. Seems like a bug in craigslist itself.
|
92
103
|
|
93
104
|
=== Release 0.5.0 (April 30, 2009)
|
94
|
-
- First release. Not much to say - hopefully someone else finds this useful.
|
105
|
+
- First release. Not much to say - hopefully someone else finds this useful.
|
data/Gemfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
gem 'htmlentities', '~>4.3'
|
4
|
+
gem 'nokogiri', '>= 1.4.4'
|
5
|
+
gem 'activerecord', '~>3.2.9'
|
6
|
+
gem 'activesupport', '~>3.2.9'
|
7
|
+
gem 'rspec'
|
8
|
+
gem 'kwalify', '~>0.7'
|
9
|
+
gem 'actionmailer', '~>3.2.9'
|
10
|
+
gem 'sqlite3', '~>1.3'
|
11
|
+
gem 'typhoeus', '~>0.5'
|
12
|
+
gem 'money', '~>5.0.0'
|
data/Rakefile
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'rake'
|
2
2
|
require 'rake/clean'
|
3
|
-
require '
|
4
|
-
require 'rake/rdoctask'
|
3
|
+
require 'rdoc/task'
|
5
4
|
require 'rake/testtask'
|
6
5
|
require 'fileutils'
|
7
6
|
require 'tempfile'
|
@@ -37,10 +36,6 @@ SPEC =
|
|
37
36
|
s.files = PKG_FILES
|
38
37
|
s.require_paths = ["lib"]
|
39
38
|
s.test_files = FileList['test/test_*.rb']
|
40
|
-
s.add_dependency 'nokogiri', '>= 1.4.4'
|
41
|
-
s.add_dependency 'htmlentities', '>= 4.0.0'
|
42
|
-
s.add_dependency 'activesupport','>= 2.3.0', '< 3'
|
43
|
-
s.add_dependency 'activerecord', '>= 2.3.0', '< 3'
|
44
39
|
end
|
45
40
|
|
46
41
|
desc "Run all the tests"
|
@@ -58,14 +53,6 @@ Rake::RDocTask.new do |rdoc|
|
|
58
53
|
rdoc.rdoc_files.add RDOC_FILES+Dir.glob('lib/*.rb').sort_by{|a,b| (a == 'lib/libcraigscrape.rb') ? -1 : 0 }
|
59
54
|
end
|
60
55
|
|
61
|
-
Rake::GemPackageTask.new(SPEC) do |p|
|
62
|
-
p.need_tar = false
|
63
|
-
p.need_tar_gz = false
|
64
|
-
p.need_tar_bz2 = false
|
65
|
-
p.need_zip = false
|
66
|
-
p.gem_spec = SPEC
|
67
|
-
end
|
68
|
-
|
69
56
|
task "lib" do
|
70
57
|
directory "lib"
|
71
58
|
end
|
@@ -79,44 +66,4 @@ task :uninstall => [:clean] do
|
|
79
66
|
sh %{sudo gem uninstall #{NAME}}
|
80
67
|
end
|
81
68
|
|
82
|
-
require 'roodi'
|
83
|
-
require 'roodi_task'
|
84
|
-
|
85
|
-
namespace :code_tests do
|
86
|
-
desc "Analyze for code complexity"
|
87
|
-
task :flog do
|
88
|
-
require 'flog'
|
89
|
-
|
90
|
-
flog = Flog.new
|
91
|
-
flog.flog_files ['lib']
|
92
|
-
threshold = 105
|
93
|
-
|
94
|
-
bad_methods = flog.totals.select do |name, score|
|
95
|
-
score > threshold
|
96
|
-
end
|
97
|
-
|
98
|
-
bad_methods.sort { |a,b| a[1] <=> b[1] }.each do |name, score|
|
99
|
-
puts "%8.1f: %s" % [score, name]
|
100
|
-
end
|
101
|
-
|
102
|
-
puts "WARNING : #{bad_methods.size} methods have a flog complexity > #{threshold}" unless bad_methods.empty?
|
103
|
-
end
|
104
|
-
|
105
|
-
desc "Analyze for code duplication"
|
106
|
-
require 'flay'
|
107
|
-
task :flay do
|
108
|
-
threshold = 25
|
109
|
-
flay = Flay.new({:fuzzy => false, :verbose => false, :mass => threshold})
|
110
|
-
flay.process(*Flay.expand_dirs_to_files(['lib']))
|
111
|
-
|
112
|
-
flay.report
|
113
|
-
|
114
|
-
raise "#{flay.masses.size} chunks of code have a duplicate mass > #{threshold}" unless flay.masses.empty?
|
115
|
-
end
|
116
|
-
|
117
|
-
RoodiTask.new 'roodi', ['lib/*.rb'], 'roodi.yml'
|
118
|
-
end
|
119
|
-
|
120
|
-
desc "Run all code tests"
|
121
|
-
task :code_tests => %w(code_tests:flog code_tests:flay code_tests:roodi)
|
122
69
|
|
data/bin/craig_report_schema.yml
CHANGED
@@ -4,6 +4,7 @@ mapping:
|
|
4
4
|
"debug_database": { type: bool, required: no }
|
5
5
|
"debug_mailer": { type: bool, required: no }
|
6
6
|
"debug_craigscrape": { type: bool, required: no }
|
7
|
+
"tz": { type: str, required: no }
|
7
8
|
|
8
9
|
"report_name": { type: str, required: no }
|
9
10
|
"email_to": { type: str, required: yes }
|
@@ -18,6 +19,8 @@ mapping:
|
|
18
19
|
"domain": { type: str, required: no }
|
19
20
|
"password": { type: str, required: no }
|
20
21
|
"authentication": { type: str, required: no }
|
22
|
+
"enable_starttls_auto": { type: bool, required: no }
|
23
|
+
"tls": { type: bool, required: no }
|
21
24
|
"tracking_database":
|
22
25
|
type: map
|
23
26
|
mapping:
|
@@ -63,4 +66,4 @@ mapping:
|
|
63
66
|
"starting":
|
64
67
|
type: str
|
65
68
|
required: no
|
66
|
-
pattern: /^[\d]{1,2}\/[\d]{1,2}\/(?:[\d]{2}|[\d]{4})$/
|
69
|
+
pattern: /^[\d]{1,2}\/[\d]{1,2}\/(?:[\d]{2}|[\d]{4})$/
|