classifieds_cli_app 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -3
- data/lib/classifieds/auto_scraper.rb +15 -15
- data/lib/classifieds/boat_scraper.rb +15 -15
- data/lib/classifieds/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c16f45478a5a574ffad8f21da75ab3c5d82be2ac
|
|
4
|
+
data.tar.gz: 33cb3156c226ffec9ccee433077c0f80a61758bc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7295e6ab313b0d86a611a3e0c43c140df3526bfd48b6411b2fb9ce1b40a0090b9cb0924c515cf17338dc51a20a541e3d85e8426efbff6df2a86e938162528efa
|
|
7
|
+
data.tar.gz: b653a96f8ae4be2341ac1979e08ec111d1798397e6a1b1909193f83904331bb9feefb0da345cf44b1d56d2e814b59dc7bd72ad05c4cdcaea5b2a17b65a8f30ad
|
data/README.md
CHANGED
|
@@ -3,6 +3,10 @@
|
|
|
3
3
|
Display classified ads from the current online edition of Newsday.com and BoatTrader.com.
|
|
4
4
|
This gem was created to meet the requirements of the learn.co CLI Data Gem Project.
|
|
5
5
|
|
|
6
|
+
0.1.1 Initial release.
|
|
7
|
+
0.1.2 Improved column display formatting.
|
|
8
|
+
0.1.3 Improved use of Nokogiri methods.
|
|
9
|
+
|
|
6
10
|
## Installation
|
|
7
11
|
|
|
8
12
|
Add this line to your application's Gemfile:
|
|
@@ -13,15 +17,15 @@ gem 'classifieds-cli-app'
|
|
|
13
17
|
|
|
14
18
|
And then execute:
|
|
15
19
|
|
|
16
|
-
|
|
20
|
+
`$ bundle`
|
|
17
21
|
|
|
18
22
|
Or install it yourself as:
|
|
19
23
|
|
|
20
|
-
|
|
24
|
+
`$ gem install classifieds-cli-app`
|
|
21
25
|
|
|
22
26
|
## Usage
|
|
23
27
|
|
|
24
|
-
|
|
28
|
+
`$ classifieds`
|
|
25
29
|
|
|
26
30
|
## Development
|
|
27
31
|
|
|
@@ -9,12 +9,12 @@ class Classifieds::AutoScraper # converts automobile classified listings into o
|
|
|
9
9
|
id = listing_id(result)
|
|
10
10
|
title_parts = title_parts(result) # => [year, make, model]
|
|
11
11
|
|
|
12
|
-
description_pod_div = result.
|
|
12
|
+
description_pod_div = result.at_css('.aiDescriptionPod')
|
|
13
13
|
start_date = start_date(description_pod_div)
|
|
14
14
|
mileage = mileage(description_pod_div)
|
|
15
15
|
sale_price = sale_price(description_pod_div)
|
|
16
16
|
|
|
17
|
-
contact_div = result.
|
|
17
|
+
contact_div = result.at_css('.contactLinks')
|
|
18
18
|
seller_name = seller_name(contact_div)
|
|
19
19
|
seller_location = seller_location(contact_div)
|
|
20
20
|
seller_phone = seller_phone(contact_div, id, results_url_file)
|
|
@@ -31,14 +31,14 @@ class Classifieds::AutoScraper # converts automobile classified listings into o
|
|
|
31
31
|
# Returns detail attributes and values in detail_values hash
|
|
32
32
|
def self.scrape_results_detail_page(detail_doc, item_condition, detail_values)
|
|
33
33
|
# Create some entries manually.
|
|
34
|
-
detail_values['Description'.to_sym] = detail_doc.
|
|
34
|
+
detail_values['Description'.to_sym] = detail_doc.at_css('.aiDetailsDescription h2').next.text.strip # Description must be first attribute.
|
|
35
35
|
detail_values['Condition'.to_sym] = item_condition
|
|
36
36
|
detail_values['Certified'.to_sym] = ''
|
|
37
37
|
# Create the rest from scraping the html's detail attrribute/value table.
|
|
38
38
|
detail_cells = detail_cells(detail_doc)
|
|
39
39
|
index = 0
|
|
40
40
|
while index < detail_cells.size
|
|
41
|
-
if ("\u00A0" == detail_cells[index].text)
|
|
41
|
+
if ("\u00A0" == detail_cells[index].text) && detail_cells[index].at_css('span.aiCPOiconDetail')
|
|
42
42
|
detail_values[:Certified] = 'Yes' # The table row containing attribute Certified Icon does not have a value column.
|
|
43
43
|
index += 1
|
|
44
44
|
else # Grab the attribute and value from the html table.
|
|
@@ -66,43 +66,43 @@ private
|
|
|
66
66
|
# Returns a summary record's detail page url
|
|
67
67
|
def self.detail_url(url, doc) # detail link is given as relative to the summary page's domain
|
|
68
68
|
uri = URI.parse(url)
|
|
69
|
-
"#{uri.scheme}://#{uri.host}#{doc.
|
|
69
|
+
"#{uri.scheme}://#{uri.host}#{doc.at_css('.aiResultTitle h3 a')['href']}"
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
# Returns the listing's id
|
|
73
73
|
def self.listing_id(doc) # e.g. from "aiResultsMainDiv547967889"
|
|
74
|
-
doc.
|
|
74
|
+
doc.at_css('.aiResultsMainDiv')['id'].match(/\d+/).to_s
|
|
75
75
|
end
|
|
76
76
|
|
|
77
77
|
# Returns the listing's mileage value
|
|
78
78
|
def self.mileage(doc)
|
|
79
|
-
value = doc.
|
|
79
|
+
value = doc.at_css('.listingType').text # e.g. 'Mileage: xx,xxx'
|
|
80
80
|
(value.include? 'Available') ? 'NA' : value.match(/Mileage: (\d*,{,1}\d+)/)[1]
|
|
81
81
|
end
|
|
82
82
|
|
|
83
83
|
# Returns the listing's sale price
|
|
84
84
|
def self.sale_price(doc)
|
|
85
|
-
value = doc.
|
|
85
|
+
value = doc.at_css('.price').text
|
|
86
86
|
(value.include? 'Call') ? 'Call' : value
|
|
87
87
|
end
|
|
88
88
|
|
|
89
89
|
# Returns the seller's address
|
|
90
90
|
def self.seller_location(doc)
|
|
91
|
-
doc.
|
|
91
|
+
doc.at_css('strong')[1].text.strip
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
# Returns the seller's name
|
|
95
95
|
def self.seller_name(doc)
|
|
96
|
-
doc.
|
|
96
|
+
doc.at_css('strong')[0].text.strip
|
|
97
97
|
end
|
|
98
98
|
|
|
99
99
|
PHONE_PATTERN = '\(\d\d\d\) \d\d\d-\d\d\d\d'
|
|
100
100
|
|
|
101
101
|
# Returns the seller's phone number, if it exists
|
|
102
102
|
def self.seller_phone(doc, id, url_file)
|
|
103
|
-
span = doc.
|
|
104
|
-
if
|
|
105
|
-
match_data = span
|
|
103
|
+
span = doc.at_css('span')
|
|
104
|
+
if span
|
|
105
|
+
match_data = span.text.match(/#{PHONE_PATTERN}/)
|
|
106
106
|
match_data ? match_data.to_s : ''
|
|
107
107
|
else
|
|
108
108
|
seller_phone_private(url_file, id)
|
|
@@ -118,12 +118,12 @@ private
|
|
|
118
118
|
|
|
119
119
|
# Returns listing start date
|
|
120
120
|
def self.start_date(doc)
|
|
121
|
-
doc.
|
|
121
|
+
doc.at_css('.listingDate').text
|
|
122
122
|
end
|
|
123
123
|
|
|
124
124
|
# Returns the summary listing's title
|
|
125
125
|
def self.title(doc)
|
|
126
|
-
doc.
|
|
126
|
+
doc.at_css('.aiResultTitle h3').text # '2010 Ford Explorer XL'
|
|
127
127
|
end
|
|
128
128
|
|
|
129
129
|
# Returns the year, make, and model from the title string
|
|
@@ -9,7 +9,7 @@ class Classifieds::BoatScraper # converts Boattrader.com power boat classified
|
|
|
9
9
|
id = listing_id(result)
|
|
10
10
|
next if id.nil?
|
|
11
11
|
|
|
12
|
-
descr_div = result.
|
|
12
|
+
descr_div = result.at_css('.description')
|
|
13
13
|
title_parts = split_title(descr_div) # => [year, make, model]
|
|
14
14
|
start_date = '' # Listing start_date currently not available from web page.
|
|
15
15
|
sale_price = sale_price(descr_div)
|
|
@@ -53,14 +53,14 @@ private
|
|
|
53
53
|
# Returns a summary record's detail page url
|
|
54
54
|
def self.detail_url(url, doc) # detail link is given as relative to the summary page's domain
|
|
55
55
|
uri = URI.parse(url)
|
|
56
|
-
"#{uri.scheme}://#{uri.host}#{doc.
|
|
56
|
+
"#{uri.scheme}://#{uri.host}#{doc.at_css('.inner a')['href']}"
|
|
57
57
|
end
|
|
58
58
|
|
|
59
59
|
# Returns detail attributes and values in detail_values hash
|
|
60
60
|
def self.do_alt_processing(doc, item_condition, detail_values)
|
|
61
61
|
# Create some entries manually.
|
|
62
|
-
main_content = doc.
|
|
63
|
-
detail_values['Description'.to_sym] = main_content.
|
|
62
|
+
main_content = doc.at_css('#main-content')
|
|
63
|
+
detail_values['Description'.to_sym] = main_content.at_css('p').text.strip # Description must be first attribute.
|
|
64
64
|
detail_values['Condition'.to_sym] = item_condition
|
|
65
65
|
|
|
66
66
|
# Create the rest from scraping the html's detail attrribute/value table.
|
|
@@ -80,7 +80,7 @@ private
|
|
|
80
80
|
# Returns detail attributes and values in detail_values hash
|
|
81
81
|
def self.do_normal_processing(doc, boat_doc, item_condition, detail_values)
|
|
82
82
|
# Create some entries manually.
|
|
83
|
-
detail_values['Description'.to_sym] = doc.
|
|
83
|
+
detail_values['Description'.to_sym] = doc.at_css('#main-details').text.strip
|
|
84
84
|
detail_values['Condition'.to_sym] = item_condition
|
|
85
85
|
|
|
86
86
|
# Create the rest from scraping the html's detail attrribute/value table.
|
|
@@ -95,7 +95,7 @@ private
|
|
|
95
95
|
process_detail_list_alt(dl_tag, detail_values)
|
|
96
96
|
else
|
|
97
97
|
attribute = attribute_tag.text.chomp(':')
|
|
98
|
-
value_tag = detail_cells[index].
|
|
98
|
+
value_tag = detail_cells[index].at_css('td')
|
|
99
99
|
|
|
100
100
|
if value_tag
|
|
101
101
|
text_value = (attribute == 'Owner Video' ? 'Yes' : value_tag.text) # substitute Yes for the video link.
|
|
@@ -107,7 +107,7 @@ private
|
|
|
107
107
|
end
|
|
108
108
|
}
|
|
109
109
|
|
|
110
|
-
detail_values['Phone'.to_sym] = doc.
|
|
110
|
+
detail_values['Phone'.to_sym] = doc.at_css('.phone').text # NOTE: keep phone number last in list for display consistency.
|
|
111
111
|
end
|
|
112
112
|
|
|
113
113
|
# Returns the listing's id. If the listing doesn't have an id, it's not really a listing.
|
|
@@ -148,25 +148,25 @@ private
|
|
|
148
148
|
|
|
149
149
|
# Returns the listing's sale price
|
|
150
150
|
def self.sale_price(doc)
|
|
151
|
-
value = doc.
|
|
152
|
-
value
|
|
151
|
+
value = doc.at_css('.price .data')
|
|
152
|
+
value ? value.text : doc.at_css('.price .txt').children[1].text
|
|
153
153
|
end
|
|
154
154
|
|
|
155
155
|
# Returns the seller's address
|
|
156
156
|
def self.seller_location(doc)
|
|
157
|
-
value = doc.
|
|
158
|
-
value
|
|
157
|
+
value = doc.at_css('.location .data')
|
|
158
|
+
value ? value.text : doc.at_css('.location .txt').children[1].text
|
|
159
159
|
end
|
|
160
160
|
|
|
161
161
|
# Returns the seller's name
|
|
162
162
|
def self.seller_name(doc)
|
|
163
|
-
value = doc.
|
|
164
|
-
value
|
|
163
|
+
value = doc.at_css('.offered-by .data')
|
|
164
|
+
value ? value.text : doc.at_css('.offered-by .txt').children[1].text
|
|
165
165
|
end
|
|
166
166
|
|
|
167
167
|
# Returns the Seller's phone number
|
|
168
168
|
def self.seller_phone(doc)
|
|
169
|
-
phone = doc.
|
|
169
|
+
phone = doc.at_css('.seller-info .phone').text
|
|
170
170
|
phone.reverse! # alt listing phone has css format 'direction: rtl'
|
|
171
171
|
phone[0] = '('; phone[4] = ')'
|
|
172
172
|
phone
|
|
@@ -185,6 +185,6 @@ private
|
|
|
185
185
|
|
|
186
186
|
# Returns the summary listing's title
|
|
187
187
|
def self.title(doc)
|
|
188
|
-
doc.
|
|
188
|
+
doc.at_css('.name').text # '2000 JASON 25 Downeaster' (also Grady White, Grady-White, Sea Ray, ...)
|
|
189
189
|
end
|
|
190
190
|
end
|
data/lib/classifieds/version.rb
CHANGED