leboncoin 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +7 -5
- data/leboncoin.gemspec +2 -2
- data/lib/leboncoin.rb +6 -130
- data/lib/leboncoin/htmlutils.rb +22 -0
- data/lib/leboncoin/items.rb +78 -0
- data/lib/leboncoin/search.rb +125 -0
- metadata +5 -2
data/README.rdoc
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
= Leboncoin
|
2
2
|
|
3
|
-
|
3
|
+
Let's search through leboncoin.fr items!
|
4
4
|
|
5
5
|
== Install
|
6
6
|
|
7
|
-
gem install
|
7
|
+
gem install leboncoin
|
8
8
|
|
9
9
|
== Usage
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
LeBonCoin::Search('dreamcast', 10).each do |item|
|
12
|
+
puts "#{item['title']}"
|
13
|
+
end
|
14
|
+
puts LeBonCoin::Search('dreamcast').to_json
|
15
|
+
puts LeBonCoin::Search('dreamcast').to_rss
|
14
16
|
|
15
17
|
== License
|
16
18
|
|
data/leboncoin.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{leboncoin}
|
5
|
-
s.version = "0.0.
|
5
|
+
s.version = "0.0.4"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Karim DRIDI"]
|
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.description = %q{leboncoin toolkit.}
|
11
11
|
s.email = %q{karim.dridi@gmail.com}
|
12
12
|
s.extra_rdoc_files = ["README.rdoc", "lib/leboncoin.rb"]
|
13
|
-
s.files = ["README.rdoc", "lib/leboncoin.rb", "leboncoin.gemspec"]
|
13
|
+
s.files = ["README.rdoc", "lib/leboncoin/htmlutils.rb", "lib/leboncoin/items.rb", "lib/leboncoin/search.rb", "lib/leboncoin.rb", "leboncoin.gemspec"]
|
14
14
|
s.homepage = %q{http://github.com/kdridi/leboncoin}
|
15
15
|
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Leboncoin", "--main", "README.rdoc"]
|
16
16
|
s.require_paths = ["lib"]
|
data/lib/leboncoin.rb
CHANGED
@@ -1,135 +1,11 @@
|
|
1
|
-
|
2
|
-
VERSION = '0.0.2'
|
1
|
+
require 'leboncoin/search'
|
3
2
|
|
4
|
-
|
5
|
-
|
6
|
-
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
-
.gsub(/é/, "é")
|
8
|
-
.gsub(/™/, "™")
|
9
|
-
.gsub(/®/, "©")
|
3
|
+
module LeBonCoin
|
4
|
+
VERSION = '0.0.4'
|
10
5
|
|
11
|
-
|
12
|
-
|
13
|
-
.
|
14
|
-
.gsub(/\u0095/, "•")
|
15
|
-
.gsub(/\u0099/, "™")
|
16
|
-
.gsub(/\u0080/, "€")
|
17
|
-
|
18
|
-
return str
|
19
|
-
end
|
20
|
-
|
21
|
-
def retrieveData(url)
|
22
|
-
require 'open-uri'
|
23
|
-
return open(url)
|
24
|
-
end
|
25
|
-
|
26
|
-
def createDocument(url)
|
27
|
-
require 'nokogiri'
|
28
|
-
return Nokogiri::HTML(retrieveData(url))
|
29
|
-
end
|
30
|
-
|
31
|
-
def createItem(node)
|
32
|
-
item = Hash.new
|
33
|
-
|
34
|
-
# DATE
|
35
|
-
require 'date'
|
36
|
-
item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
|
37
|
-
item["dateEN"] = item["dateFR"]
|
38
|
-
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
39
|
-
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
40
|
-
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
41
|
-
|
42
|
-
item["date"] = DateTime.parse(item["dateEN"])
|
43
|
-
|
44
|
-
# IMAGE
|
45
|
-
item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
|
46
|
-
|
47
|
-
# NAME & LINK
|
48
|
-
item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
|
49
|
-
item["link"] = node.xpath('td[3]/a')[0]["href"].strip
|
50
|
-
|
51
|
-
# PRICE
|
52
|
-
item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
|
53
|
-
item["currency"] = "EUR"
|
54
|
-
|
55
|
-
# DESCRIPTION
|
56
|
-
doc = createDocument(item["link"])
|
57
|
-
item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
58
|
-
item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
59
|
-
item["postcode"] = item["city"][/[0-9]+/]
|
60
|
-
item["city"] = item["city"].gsub(/[0-9]+ /, "")
|
61
|
-
|
62
|
-
return item
|
63
|
-
end
|
64
|
-
|
65
|
-
def parseItems(items, url, size)
|
66
|
-
doc = createDocument url
|
67
|
-
|
68
|
-
continue = true
|
69
|
-
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
70
|
-
if items.size < size
|
71
|
-
items.push createItem(node)
|
72
|
-
end
|
6
|
+
class << self
|
7
|
+
def Search keywords, size = 10
|
8
|
+
LeBonCoin::Search.parseItems "http://www.Leboncoin.fr/occasions/?f=a&th=1&q=" + keywords, size
|
73
9
|
end
|
74
|
-
|
75
|
-
if items.size < size
|
76
|
-
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
77
|
-
parseItems(items, node['href'], size)
|
78
|
-
end
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
def createItems(url, size)
|
83
|
-
items = Array.new
|
84
|
-
parseItems(items, url, size)
|
85
|
-
|
86
|
-
return items
|
87
|
-
end
|
88
|
-
|
89
|
-
def createJSON(url, size)
|
90
|
-
require 'json'
|
91
|
-
return JSON.pretty_generate(createItems(url, size))
|
92
10
|
end
|
93
|
-
|
94
|
-
def createRSS(url, size)
|
95
|
-
require 'rss/maker'
|
96
|
-
|
97
|
-
content = RSS::Maker.make("2.0") do |m|
|
98
|
-
m.channel.title = "leboncoin.fr"
|
99
|
-
m.channel.link = url
|
100
|
-
m.channel.description = "leboncoin.fr"
|
101
|
-
m.items.do_sort = true
|
102
|
-
createItems(url, size).each do |item|
|
103
|
-
title = ""
|
104
|
-
price = ""
|
105
|
-
if item["price"] != nil
|
106
|
-
price = item["price"].to_s + " " + item["currency"]
|
107
|
-
title = "[" + price + "] "
|
108
|
-
end
|
109
|
-
|
110
|
-
postcode = ""
|
111
|
-
if item["postcode"] != nil
|
112
|
-
postcode = item["postcode"]
|
113
|
-
end
|
114
|
-
|
115
|
-
i = m.items.new_item
|
116
|
-
i.title = title + item["title"]
|
117
|
-
i.link = item["link"]
|
118
|
-
begin
|
119
|
-
i.description = convert("<img src='" + item["image"] + "'/><br/>" \
|
120
|
-
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
121
|
-
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
122
|
-
+ "<p>" + item["description"] + "</p><hr/>" \
|
123
|
-
+ "<b>Prix</b> : " + price + "<hr/>")
|
124
|
-
rescue
|
125
|
-
require 'json'
|
126
|
-
puts ">>>> " + JSON.pretty_generate(item)
|
127
|
-
end
|
128
|
-
i.date = Time.now #item["date"]
|
129
|
-
end
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
11
|
end
|
134
|
-
|
135
|
-
# puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module LeBonCoin
|
2
|
+
module HTMLUtils
|
3
|
+
class << self
|
4
|
+
def convert str
|
5
|
+
require 'htmlentities'
|
6
|
+
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
+
.gsub(/é/, "é")
|
8
|
+
.gsub(/™/, "™")
|
9
|
+
.gsub(/®/, "©")
|
10
|
+
|
11
|
+
str = HTMLEntities.new.decode(str)
|
12
|
+
.gsub(/\u0092/, "'")
|
13
|
+
.gsub(/\u0096/, "-")
|
14
|
+
.gsub(/\u0095/, "•")
|
15
|
+
.gsub(/\u0099/, "™")
|
16
|
+
.gsub(/\u0080/, "€")
|
17
|
+
|
18
|
+
return str
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'leboncoin/htmlutils'
|
2
|
+
|
3
|
+
module LeBonCoin
|
4
|
+
module Search
|
5
|
+
module SearchItems
|
6
|
+
class << self
|
7
|
+
###
|
8
|
+
# Default constructor
|
9
|
+
def new link
|
10
|
+
@link = link
|
11
|
+
@items = Array.new
|
12
|
+
return self
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# Create a new item
|
17
|
+
def createItem item = Hash.new
|
18
|
+
@items.push item
|
19
|
+
return item
|
20
|
+
end
|
21
|
+
|
22
|
+
def each
|
23
|
+
@items.each do |item|
|
24
|
+
yield item if block_given?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def size
|
29
|
+
return @items.size
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_json
|
33
|
+
require 'json'
|
34
|
+
return JSON.pretty_generate(@items)
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_rss
|
38
|
+
require 'rss/maker'
|
39
|
+
|
40
|
+
content = RSS::Maker.make("2.0") do |m|
|
41
|
+
m.channel.title = "leboncoin.fr"
|
42
|
+
m.channel.link = @link
|
43
|
+
m.channel.description = "leboncoin.fr"
|
44
|
+
m.items.do_sort = true
|
45
|
+
@items.each do |item|
|
46
|
+
price = ""
|
47
|
+
if item["price"] != nil
|
48
|
+
price = item["price"].to_s + " " + item["currency"]
|
49
|
+
end
|
50
|
+
|
51
|
+
postcode = ""
|
52
|
+
if item["postcode"] != nil
|
53
|
+
postcode = item["postcode"]
|
54
|
+
end
|
55
|
+
|
56
|
+
i = m.items.new_item
|
57
|
+
i.title = item["title"]
|
58
|
+
i.link = item["link"]
|
59
|
+
begin
|
60
|
+
i.description = LeBonCoin::HTMLUtils.convert("<img src='" + item["image"] + "'/><br/>" \
|
61
|
+
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
62
|
+
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
63
|
+
+ "<p>" + item["description"] + "</p><hr/>" \
|
64
|
+
+ "<b>Prix</b> : " + price + "<hr/>")
|
65
|
+
rescue
|
66
|
+
require 'json'
|
67
|
+
puts ">>>> ERROR >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
|
68
|
+
puts JSON.pretty_generate(item)
|
69
|
+
puts "\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
|
70
|
+
end
|
71
|
+
i.date = Time.now #item["date"]
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'leboncoin/items'
|
2
|
+
|
3
|
+
module LeBonCoin
|
4
|
+
module Search
|
5
|
+
class << self
|
6
|
+
###
|
7
|
+
# Load the given URL as a well-formed HTML document
|
8
|
+
def loadHTML url
|
9
|
+
require 'open-uri'
|
10
|
+
require 'nokogiri'
|
11
|
+
|
12
|
+
doc = begin
|
13
|
+
Nokogiri::HTML(open(url))
|
14
|
+
rescue
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
return doc
|
19
|
+
end
|
20
|
+
|
21
|
+
###
|
22
|
+
# Parse items from a given URL
|
23
|
+
def parseItems url, size, items = LeBonCoin::Search::SearchItems.new(url)
|
24
|
+
doc = loadHTML url
|
25
|
+
|
26
|
+
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
27
|
+
if items.size < size
|
28
|
+
parseItemNode node, items.createItem
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
if items.size < size
|
33
|
+
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
34
|
+
parse node['href'], size, items
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return items
|
39
|
+
end
|
40
|
+
|
41
|
+
###
|
42
|
+
# Parse item from a given XML node.
|
43
|
+
def parseItemNode(node, item = Hash.new)
|
44
|
+
# DATE
|
45
|
+
item["date"] = begin
|
46
|
+
require 'date'
|
47
|
+
DateTime.parse(
|
48
|
+
node.xpath('td[1]')[0].inner_html.strip
|
49
|
+
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
50
|
+
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
51
|
+
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
52
|
+
)
|
53
|
+
rescue
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# IMAGE
|
58
|
+
item["image"] = begin
|
59
|
+
node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip
|
60
|
+
rescue
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
|
64
|
+
# TITLE
|
65
|
+
item["title"] = begin
|
66
|
+
LeBonCoin::HTMLUtils.convert(node.xpath('td[3]/a')[0].content.strip)
|
67
|
+
rescue
|
68
|
+
"UNKNOW TITLE"
|
69
|
+
end
|
70
|
+
|
71
|
+
# LINK
|
72
|
+
item["link"] = begin
|
73
|
+
node.xpath('td[3]/a')[0]["href"].strip
|
74
|
+
rescue
|
75
|
+
nil
|
76
|
+
end
|
77
|
+
|
78
|
+
# PRICE
|
79
|
+
item["currency"] = "EUR"
|
80
|
+
item["price"] = begin
|
81
|
+
node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i
|
82
|
+
rescue
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
|
86
|
+
return parseItem item["link"], item
|
87
|
+
end
|
88
|
+
|
89
|
+
###
|
90
|
+
# Parse item from a given HTML link.
|
91
|
+
def parseItem url, item = Hash.new
|
92
|
+
doc = loadHTML url
|
93
|
+
|
94
|
+
# DESCRIPTION
|
95
|
+
item["description"] = begin
|
96
|
+
LeBonCoin::HTMLUtils.convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
97
|
+
rescue
|
98
|
+
nil
|
99
|
+
end
|
100
|
+
|
101
|
+
value = begin
|
102
|
+
doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
103
|
+
rescue
|
104
|
+
nil
|
105
|
+
end
|
106
|
+
|
107
|
+
# POSTCODE
|
108
|
+
item["postcode"] = begin
|
109
|
+
value[/[0-9]+/]
|
110
|
+
rescue
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
|
114
|
+
# CITY
|
115
|
+
item["city"] = begin
|
116
|
+
value.gsub(/[0-9]+ /, "")
|
117
|
+
rescue
|
118
|
+
nil
|
119
|
+
end
|
120
|
+
|
121
|
+
return item
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 4
|
9
|
+
version: 0.0.4
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Karim DRIDI
|
@@ -54,6 +54,9 @@ extra_rdoc_files:
|
|
54
54
|
- lib/leboncoin.rb
|
55
55
|
files:
|
56
56
|
- README.rdoc
|
57
|
+
- lib/leboncoin/htmlutils.rb
|
58
|
+
- lib/leboncoin/items.rb
|
59
|
+
- lib/leboncoin/search.rb
|
57
60
|
- lib/leboncoin.rb
|
58
61
|
- leboncoin.gemspec
|
59
62
|
has_rdoc: true
|