leboncoin 0.0.2 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +7 -5
- data/leboncoin.gemspec +2 -2
- data/lib/leboncoin.rb +6 -130
- data/lib/leboncoin/htmlutils.rb +22 -0
- data/lib/leboncoin/items.rb +78 -0
- data/lib/leboncoin/search.rb +125 -0
- metadata +5 -2
data/README.rdoc
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
= Leboncoin
|
2
2
|
|
3
|
-
|
3
|
+
Let's search through leboncoin.fr items!
|
4
4
|
|
5
5
|
== Install
|
6
6
|
|
7
|
-
gem install
|
7
|
+
gem install leboncoin
|
8
8
|
|
9
9
|
== Usage
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
LeBonCoin::Search('dreamcast', 10).each do |item|
|
12
|
+
puts "#{item['title']}"
|
13
|
+
end
|
14
|
+
puts LeBonCoin::Search('dreamcast').to_json
|
15
|
+
puts LeBonCoin::Search('dreamcast').to_rss
|
14
16
|
|
15
17
|
== License
|
16
18
|
|
data/leboncoin.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{leboncoin}
|
5
|
-
s.version = "0.0.
|
5
|
+
s.version = "0.0.4"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Karim DRIDI"]
|
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.description = %q{leboncoin toolkit.}
|
11
11
|
s.email = %q{karim.dridi@gmail.com}
|
12
12
|
s.extra_rdoc_files = ["README.rdoc", "lib/leboncoin.rb"]
|
13
|
-
s.files = ["README.rdoc", "lib/leboncoin.rb", "leboncoin.gemspec"]
|
13
|
+
s.files = ["README.rdoc", "lib/leboncoin/htmlutils.rb", "lib/leboncoin/items.rb", "lib/leboncoin/search.rb", "lib/leboncoin.rb", "leboncoin.gemspec"]
|
14
14
|
s.homepage = %q{http://github.com/kdridi/leboncoin}
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Leboncoin", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
data/lib/leboncoin.rb
CHANGED
@@ -1,135 +1,11 @@
|
|
1
|
-
|
2
|
-
VERSION = '0.0.2'
|
1
|
+
require 'leboncoin/search'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
-
.gsub(/é/, "é")
|
8
|
-
.gsub(/™/, "™")
|
9
|
-
.gsub(/®/, "©")
|
3
|
+
module LeBonCoin
|
4
|
+
VERSION = '0.0.4'
|
10
5
|
|
11
|
-
|
12
|
-
|
13
|
-
.
|
14
|
-
.gsub(/\u0095/, "•")
|
15
|
-
.gsub(/\u0099/, "™")
|
16
|
-
.gsub(/\u0080/, "€")
|
17
|
-
|
18
|
-
return str
|
19
|
-
end
|
20
|
-
|
21
|
-
def retrieveData(url)
|
22
|
-
require 'open-uri'
|
23
|
-
return open(url)
|
24
|
-
end
|
25
|
-
|
26
|
-
def createDocument(url)
|
27
|
-
require 'nokogiri'
|
28
|
-
return Nokogiri::HTML(retrieveData(url))
|
29
|
-
end
|
30
|
-
|
31
|
-
def createItem(node)
|
32
|
-
item = Hash.new
|
33
|
-
|
34
|
-
# DATE
|
35
|
-
require 'date'
|
36
|
-
item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
|
37
|
-
item["dateEN"] = item["dateFR"]
|
38
|
-
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
39
|
-
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
40
|
-
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
41
|
-
|
42
|
-
item["date"] = DateTime.parse(item["dateEN"])
|
43
|
-
|
44
|
-
# IMAGE
|
45
|
-
item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
|
46
|
-
|
47
|
-
# NAME & LINK
|
48
|
-
item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
|
49
|
-
item["link"] = node.xpath('td[3]/a')[0]["href"].strip
|
50
|
-
|
51
|
-
# PRICE
|
52
|
-
item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
|
53
|
-
item["currency"] = "EUR"
|
54
|
-
|
55
|
-
# DESCRIPTION
|
56
|
-
doc = createDocument(item["link"])
|
57
|
-
item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
58
|
-
item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
59
|
-
item["postcode"] = item["city"][/[0-9]+/]
|
60
|
-
item["city"] = item["city"].gsub(/[0-9]+ /, "")
|
61
|
-
|
62
|
-
return item
|
63
|
-
end
|
64
|
-
|
65
|
-
def parseItems(items, url, size)
|
66
|
-
doc = createDocument url
|
67
|
-
|
68
|
-
continue = true
|
69
|
-
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
70
|
-
if items.size < size
|
71
|
-
items.push createItem(node)
|
72
|
-
end
|
6
|
+
class << self
|
7
|
+
def Search keywords, size = 10
|
8
|
+
LeBonCoin::Search.parseItems "http://www.Leboncoin.fr/occasions/?f=a&th=1&q=" + keywords, size
|
73
9
|
end
|
74
|
-
|
75
|
-
if items.size < size
|
76
|
-
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
77
|
-
parseItems(items, node['href'], size)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def createItems(url, size)
|
83
|
-
items = Array.new
|
84
|
-
parseItems(items, url, size)
|
85
|
-
|
86
|
-
return items
|
87
|
-
end
|
88
|
-
|
89
|
-
def createJSON(url, size)
|
90
|
-
require 'json'
|
91
|
-
return JSON.pretty_generate(createItems(url, size))
|
92
10
|
end
|
93
|
-
|
94
|
-
def createRSS(url, size)
|
95
|
-
require 'rss/maker'
|
96
|
-
|
97
|
-
content = RSS::Maker.make("2.0") do |m|
|
98
|
-
m.channel.title = "leboncoin.fr"
|
99
|
-
m.channel.link = url
|
100
|
-
m.channel.description = "leboncoin.fr"
|
101
|
-
m.items.do_sort = true
|
102
|
-
createItems(url, size).each do |item|
|
103
|
-
title = ""
|
104
|
-
price = ""
|
105
|
-
if item["price"] != nil
|
106
|
-
price = item["price"].to_s + " " + item["currency"]
|
107
|
-
title = "[" + price + "] "
|
108
|
-
end
|
109
|
-
|
110
|
-
postcode = ""
|
111
|
-
if item["postcode"] != nil
|
112
|
-
postcode = item["postcode"]
|
113
|
-
end
|
114
|
-
|
115
|
-
i = m.items.new_item
|
116
|
-
i.title = title + item["title"]
|
117
|
-
i.link = item["link"]
|
118
|
-
begin
|
119
|
-
i.description = convert("<img src='" + item["image"] + "'/><br/>" \
|
120
|
-
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
121
|
-
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
122
|
-
+ "<p>" + item["description"] + "</p><hr/>" \
|
123
|
-
+ "<b>Prix</b> : " + price + "<hr/>")
|
124
|
-
rescue
|
125
|
-
require 'json'
|
126
|
-
puts ">>>> " + JSON.pretty_generate(item)
|
127
|
-
end
|
128
|
-
i.date = Time.now #item["date"]
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
11
|
end
|
134
|
-
|
135
|
-
# puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module LeBonCoin
|
2
|
+
module HTMLUtils
|
3
|
+
class << self
|
4
|
+
def convert str
|
5
|
+
require 'htmlentities'
|
6
|
+
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
+
.gsub(/é/, "é")
|
8
|
+
.gsub(/™/, "™")
|
9
|
+
.gsub(/®/, "©")
|
10
|
+
|
11
|
+
str = HTMLEntities.new.decode(str)
|
12
|
+
.gsub(/\u0092/, "'")
|
13
|
+
.gsub(/\u0096/, "-")
|
14
|
+
.gsub(/\u0095/, "•")
|
15
|
+
.gsub(/\u0099/, "™")
|
16
|
+
.gsub(/\u0080/, "€")
|
17
|
+
|
18
|
+
return str
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'leboncoin/htmlutils'
|
2
|
+
|
3
|
+
module LeBonCoin
|
4
|
+
module Search
|
5
|
+
module SearchItems
|
6
|
+
class << self
|
7
|
+
###
|
8
|
+
# Default constructor
|
9
|
+
def new link
|
10
|
+
@link = link
|
11
|
+
@items = Array.new
|
12
|
+
return self
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# Create a new item
|
17
|
+
def createItem item = Hash.new
|
18
|
+
@items.push item
|
19
|
+
return item
|
20
|
+
end
|
21
|
+
|
22
|
+
def each
|
23
|
+
@items.each do |item|
|
24
|
+
yield item if block_given?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def size
|
29
|
+
return @items.size
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_json
|
33
|
+
require 'json'
|
34
|
+
return JSON.pretty_generate(@items)
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_rss
|
38
|
+
require 'rss/maker'
|
39
|
+
|
40
|
+
content = RSS::Maker.make("2.0") do |m|
|
41
|
+
m.channel.title = "leboncoin.fr"
|
42
|
+
m.channel.link = @link
|
43
|
+
m.channel.description = "leboncoin.fr"
|
44
|
+
m.items.do_sort = true
|
45
|
+
@items.each do |item|
|
46
|
+
price = ""
|
47
|
+
if item["price"] != nil
|
48
|
+
price = item["price"].to_s + " " + item["currency"]
|
49
|
+
end
|
50
|
+
|
51
|
+
postcode = ""
|
52
|
+
if item["postcode"] != nil
|
53
|
+
postcode = item["postcode"]
|
54
|
+
end
|
55
|
+
|
56
|
+
i = m.items.new_item
|
57
|
+
i.title = item["title"]
|
58
|
+
i.link = item["link"]
|
59
|
+
begin
|
60
|
+
i.description = LeBonCoin::HTMLUtils.convert("<img src='" + item["image"] + "'/><br/>" \
|
61
|
+
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
62
|
+
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
63
|
+
+ "<p>" + item["description"] + "</p><hr/>" \
|
64
|
+
+ "<b>Prix</b> : " + price + "<hr/>")
|
65
|
+
rescue
|
66
|
+
require 'json'
|
67
|
+
puts ">>>> ERROR >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
|
68
|
+
puts JSON.pretty_generate(item)
|
69
|
+
puts "\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
|
70
|
+
end
|
71
|
+
i.date = Time.now #item["date"]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'leboncoin/items'
|
2
|
+
|
3
|
+
module LeBonCoin
|
4
|
+
module Search
|
5
|
+
class << self
|
6
|
+
###
|
7
|
+
# Load the given URL as a well-formed HTML document
|
8
|
+
def loadHTML url
|
9
|
+
require 'open-uri'
|
10
|
+
require 'nokogiri'
|
11
|
+
|
12
|
+
doc = begin
|
13
|
+
Nokogiri::HTML(open(url))
|
14
|
+
rescue
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
return doc
|
19
|
+
end
|
20
|
+
|
21
|
+
###
|
22
|
+
# Parse items from a given URL
|
23
|
+
def parseItems url, size, items = LeBonCoin::Search::SearchItems.new(url)
|
24
|
+
doc = loadHTML url
|
25
|
+
|
26
|
+
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
27
|
+
if items.size < size
|
28
|
+
parseItemNode node, items.createItem
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
if items.size < size
|
33
|
+
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
34
|
+
parse node['href'], size, items
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return items
|
39
|
+
end
|
40
|
+
|
41
|
+
###
|
42
|
+
# Parse item from a given XML node.
|
43
|
+
def parseItemNode(node, item = Hash.new)
|
44
|
+
# DATE
|
45
|
+
item["date"] = begin
|
46
|
+
require 'date'
|
47
|
+
DateTime.parse(
|
48
|
+
node.xpath('td[1]')[0].inner_html.strip
|
49
|
+
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
50
|
+
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
51
|
+
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
52
|
+
)
|
53
|
+
rescue
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# IMAGE
|
58
|
+
item["image"] = begin
|
59
|
+
node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip
|
60
|
+
rescue
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# TITLE
|
65
|
+
item["title"] = begin
|
66
|
+
LeBonCoin::HTMLUtils.convert(node.xpath('td[3]/a')[0].content.strip)
|
67
|
+
rescue
|
68
|
+
"UNKNOW TITLE"
|
69
|
+
end
|
70
|
+
|
71
|
+
# LINK
|
72
|
+
item["link"] = begin
|
73
|
+
node.xpath('td[3]/a')[0]["href"].strip
|
74
|
+
rescue
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# PRICE
|
79
|
+
item["currency"] = "EUR"
|
80
|
+
item["price"] = begin
|
81
|
+
node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i
|
82
|
+
rescue
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
|
86
|
+
return parseItem item["link"], item
|
87
|
+
end
|
88
|
+
|
89
|
+
###
|
90
|
+
# Parse item from a given HTML link.
|
91
|
+
def parseItem url, item = Hash.new
|
92
|
+
doc = loadHTML url
|
93
|
+
|
94
|
+
# DESCRIPTION
|
95
|
+
item["description"] = begin
|
96
|
+
LeBonCoin::HTMLUtils.convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
97
|
+
rescue
|
98
|
+
nil
|
99
|
+
end
|
100
|
+
|
101
|
+
value = begin
|
102
|
+
doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
103
|
+
rescue
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
|
107
|
+
# POSTCODE
|
108
|
+
item["postcode"] = begin
|
109
|
+
value[/[0-9]+/]
|
110
|
+
rescue
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
|
114
|
+
# CITY
|
115
|
+
item["city"] = begin
|
116
|
+
value.gsub(/[0-9]+ /, "")
|
117
|
+
rescue
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
|
121
|
+
return item
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Karim DRIDI
|
@@ -54,6 +54,9 @@ extra_rdoc_files:
|
|
54
54
|
- lib/leboncoin.rb
|
55
55
|
files:
|
56
56
|
- README.rdoc
|
57
|
+
- lib/leboncoin/htmlutils.rb
|
58
|
+
- lib/leboncoin/items.rb
|
59
|
+
- lib/leboncoin/search.rb
|
57
60
|
- lib/leboncoin.rb
|
58
61
|
- leboncoin.gemspec
|
59
62
|
has_rdoc: true
|