leboncoin 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/leboncoin.rb +135 -0
- metadata +89 -0
data/lib/leboncoin.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
class Leboncoin
|
2
|
+
VERSION = '1.0.0'
|
3
|
+
|
4
|
+
def convert(str)
|
5
|
+
require 'htmlentities'
|
6
|
+
str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
|
7
|
+
.gsub(/é/, "é")
|
8
|
+
.gsub(/™/, "™")
|
9
|
+
.gsub(/®/, "©")
|
10
|
+
|
11
|
+
str = HTMLEntities.new.decode(str)
|
12
|
+
.gsub(/\u0092/, "'")
|
13
|
+
.gsub(/\u0096/, "-")
|
14
|
+
.gsub(/\u0095/, "•")
|
15
|
+
.gsub(/\u0099/, "™")
|
16
|
+
.gsub(/\u0080/, "€")
|
17
|
+
|
18
|
+
return str
|
19
|
+
end
|
20
|
+
|
21
|
+
def retrieveData(url)
|
22
|
+
require 'open-uri'
|
23
|
+
return open(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
def createDocument(url)
|
27
|
+
require 'nokogiri'
|
28
|
+
return Nokogiri::HTML(retrieveData(url))
|
29
|
+
end
|
30
|
+
|
31
|
+
def createItem(node)
|
32
|
+
item = Hash.new
|
33
|
+
|
34
|
+
# DATE
|
35
|
+
require 'date'
|
36
|
+
item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
|
37
|
+
item["dateEN"] = item["dateFR"]
|
38
|
+
.gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
|
39
|
+
.gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
|
40
|
+
.gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
|
41
|
+
|
42
|
+
item["date"] = DateTime.parse(item["dateEN"])
|
43
|
+
|
44
|
+
# IMAGE
|
45
|
+
item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
|
46
|
+
|
47
|
+
# NAME & LINK
|
48
|
+
item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
|
49
|
+
item["link"] = node.xpath('td[3]/a')[0]["href"].strip
|
50
|
+
|
51
|
+
# PRICE
|
52
|
+
item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
|
53
|
+
item["currency"] = "EUR"
|
54
|
+
|
55
|
+
# DESCRIPTION
|
56
|
+
doc = createDocument(item["link"])
|
57
|
+
item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
|
58
|
+
item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
|
59
|
+
item["postcode"] = item["city"][/[0-9]+/]
|
60
|
+
item["city"] = item["city"].gsub(/[0-9]+ /, "")
|
61
|
+
|
62
|
+
return item
|
63
|
+
end
|
64
|
+
|
65
|
+
def parseItems(items, url, size)
|
66
|
+
doc = createDocument url
|
67
|
+
|
68
|
+
continue = true
|
69
|
+
doc.xpath('//table[@id="hl"]/tr').each do |node|
|
70
|
+
if items.size < size
|
71
|
+
items.push createItem(node)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
if items.size < size
|
76
|
+
doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
|
77
|
+
parseItems(items, node['href'], size)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def createItems(url, size)
|
83
|
+
items = Array.new
|
84
|
+
parseItems(items, url, size)
|
85
|
+
|
86
|
+
return items
|
87
|
+
end
|
88
|
+
|
89
|
+
def createJSON(url, size)
|
90
|
+
require 'json'
|
91
|
+
return JSON.pretty_generate(createItems(url, size))
|
92
|
+
end
|
93
|
+
|
94
|
+
def createRSS(url, size)
|
95
|
+
require 'rss/maker'
|
96
|
+
|
97
|
+
content = RSS::Maker.make("2.0") do |m|
|
98
|
+
m.channel.title = "leboncoin.fr"
|
99
|
+
m.channel.link = url
|
100
|
+
m.channel.description = "leboncoin.fr"
|
101
|
+
m.items.do_sort = true
|
102
|
+
createItems(url, size).each do |item|
|
103
|
+
title = ""
|
104
|
+
price = ""
|
105
|
+
if item["price"] != nil
|
106
|
+
price = item["price"].to_s + " " + item["currency"]
|
107
|
+
title = "[" + price + "] "
|
108
|
+
end
|
109
|
+
|
110
|
+
postcode = ""
|
111
|
+
if item["postcode"] != nil
|
112
|
+
postcode = item["postcode"]
|
113
|
+
end
|
114
|
+
|
115
|
+
i = m.items.new_item
|
116
|
+
i.title = title + item["title"]
|
117
|
+
i.link = item["link"]
|
118
|
+
begin
|
119
|
+
i.description = convert("<img src='" + item["image"] + "'/><br/>" \
|
120
|
+
+ "<b>Ville</b> : " + item["city"] + "<br/>" \
|
121
|
+
+ "<b>Code postal</b> : " + postcode + "<br/>" \
|
122
|
+
+ "<p>" + item["description"] + "</p><hr/>" \
|
123
|
+
+ "<b>Prix</b> : " + price + "<hr/>")
|
124
|
+
rescue
|
125
|
+
require 'json'
|
126
|
+
puts ">>>> " + JSON.pretty_generate(item)
|
127
|
+
end
|
128
|
+
i.date = Time.now #item["date"]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
# puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: leboncoin
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors: []
|
12
|
+
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-10-25 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: htmlentities
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: nokogiri
|
35
|
+
prerelease: false
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
segments:
|
42
|
+
- 0
|
43
|
+
version: "0"
|
44
|
+
type: :runtime
|
45
|
+
version_requirements: *id002
|
46
|
+
description:
|
47
|
+
email:
|
48
|
+
executables: []
|
49
|
+
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
extra_rdoc_files: []
|
53
|
+
|
54
|
+
files:
|
55
|
+
- lib/leboncoin.rb
|
56
|
+
has_rdoc: true
|
57
|
+
homepage:
|
58
|
+
licenses: []
|
59
|
+
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
segments:
|
71
|
+
- 0
|
72
|
+
version: "0"
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
requirements: []
|
82
|
+
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.3.7
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Lets search through leboncoin.fr items!
|
88
|
+
test_files: []
|
89
|
+
|