leboncoin 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/leboncoin.rb +135 -0
- metadata +89 -0
data/lib/leboncoin.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
class Leboncoin
|
2
|
+
VERSION = '1.0.0'
|
3
|
+
|
4
|
+
def convert(str)
|
5
|
+
require 'htmlentities'
|
6
|
+
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
+
.gsub(/é/, "é")
|
8
|
+
.gsub(/™/, "™")
|
9
|
+
.gsub(/®/, "©")
|
10
|
+
|
11
|
+
str = HTMLEntities.new.decode(str)
|
12
|
+
.gsub(/\u0092/, "'")
|
13
|
+
.gsub(/\u0096/, "-")
|
14
|
+
.gsub(/\u0095/, "•")
|
15
|
+
.gsub(/\u0099/, "™")
|
16
|
+
.gsub(/\u0080/, "€")
|
17
|
+
|
18
|
+
return str
|
19
|
+
end
|
20
|
+
|
21
|
+
def retrieveData(url)
|
22
|
+
require 'open-uri'
|
23
|
+
return open(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
def createDocument(url)
|
27
|
+
require 'nokogiri'
|
28
|
+
return Nokogiri::HTML(retrieveData(url))
|
29
|
+
end
|
30
|
+
|
31
|
+
def createItem(node)
|
32
|
+
item = Hash.new
|
33
|
+
|
34
|
+
# DATE
|
35
|
+
require 'date'
|
36
|
+
item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
|
37
|
+
item["dateEN"] = item["dateFR"]
|
38
|
+
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
39
|
+
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
40
|
+
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
41
|
+
|
42
|
+
item["date"] = DateTime.parse(item["dateEN"])
|
43
|
+
|
44
|
+
# IMAGE
|
45
|
+
item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
|
46
|
+
|
47
|
+
# NAME & LINK
|
48
|
+
item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
|
49
|
+
item["link"] = node.xpath('td[3]/a')[0]["href"].strip
|
50
|
+
|
51
|
+
# PRICE
|
52
|
+
item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
|
53
|
+
item["currency"] = "EUR"
|
54
|
+
|
55
|
+
# DESCRIPTION
|
56
|
+
doc = createDocument(item["link"])
|
57
|
+
item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
58
|
+
item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
59
|
+
item["postcode"] = item["city"][/[0-9]+/]
|
60
|
+
item["city"] = item["city"].gsub(/[0-9]+ /, "")
|
61
|
+
|
62
|
+
return item
|
63
|
+
end
|
64
|
+
|
65
|
+
def parseItems(items, url, size)
|
66
|
+
doc = createDocument url
|
67
|
+
|
68
|
+
continue = true
|
69
|
+
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
70
|
+
if items.size < size
|
71
|
+
items.push createItem(node)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
if items.size < size
|
76
|
+
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
77
|
+
parseItems(items, node['href'], size)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def createItems(url, size)
|
83
|
+
items = Array.new
|
84
|
+
parseItems(items, url, size)
|
85
|
+
|
86
|
+
return items
|
87
|
+
end
|
88
|
+
|
89
|
+
def createJSON(url, size)
|
90
|
+
require 'json'
|
91
|
+
return JSON.pretty_generate(createItems(url, size))
|
92
|
+
end
|
93
|
+
|
94
|
+
def createRSS(url, size)
|
95
|
+
require 'rss/maker'
|
96
|
+
|
97
|
+
content = RSS::Maker.make("2.0") do |m|
|
98
|
+
m.channel.title = "leboncoin.fr"
|
99
|
+
m.channel.link = url
|
100
|
+
m.channel.description = "leboncoin.fr"
|
101
|
+
m.items.do_sort = true
|
102
|
+
createItems(url, size).each do |item|
|
103
|
+
title = ""
|
104
|
+
price = ""
|
105
|
+
if item["price"] != nil
|
106
|
+
price = item["price"].to_s + " " + item["currency"]
|
107
|
+
title = "[" + price + "] "
|
108
|
+
end
|
109
|
+
|
110
|
+
postcode = ""
|
111
|
+
if item["postcode"] != nil
|
112
|
+
postcode = item["postcode"]
|
113
|
+
end
|
114
|
+
|
115
|
+
i = m.items.new_item
|
116
|
+
i.title = title + item["title"]
|
117
|
+
i.link = item["link"]
|
118
|
+
begin
|
119
|
+
i.description = convert("<img src='" + item["image"] + "'/><br/>" \
|
120
|
+
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
121
|
+
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
122
|
+
+ "<p>" + item["description"] + "</p><hr/>" \
|
123
|
+
+ "<b>Prix</b> : " + price + "<hr/>")
|
124
|
+
rescue
|
125
|
+
require 'json'
|
126
|
+
puts ">>>> " + JSON.pretty_generate(item)
|
127
|
+
end
|
128
|
+
i.date = Time.now #item["date"]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
# puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: leboncoin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors: []
|
12
|
+
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-10-25 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: htmlentities
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
description:
|
47
|
+
email:
|
48
|
+
executables: []
|
49
|
+
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
extra_rdoc_files: []
|
53
|
+
|
54
|
+
files:
|
55
|
+
- lib/leboncoin.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage:
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
requirements: []
|
82
|
+
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.3.7
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Lets search through leboncoin.fr items!
|
88
|
+
test_files: []
|
89
|
+
|