leboncoin 0.0.2 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,18 @@
1
1
  = Leboncoin
2
2
 
3
- Search library for leboncoin.fr
3
+ Let's search through leboncoin.fr items!
4
4
 
5
5
  == Install
6
6
 
7
- gem install kdridi-leboncoin --source http://gems.github.com
7
+ gem install leboncoin
8
8
 
9
9
  == Usage
10
10
 
11
- Blablabla ...
12
-
13
- Leboncoin.seach("keywords")
11
+ LeBonCoin::Search('dreamcast', 10).each do |item|
12
+ puts "#{item['title']}"
13
+ end
14
+ puts LeBonCoin::Search('dreamcast').to_json
15
+ puts LeBonCoin::Search('dreamcast').to_rss
14
16
 
15
17
  == License
16
18
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{leboncoin}
5
- s.version = "0.0.2"
5
+ s.version = "0.0.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Karim DRIDI"]
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.description = %q{leboncoin toolkit.}
11
11
  s.email = %q{karim.dridi@gmail.com}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/leboncoin.rb"]
13
- s.files = ["README.rdoc", "lib/leboncoin.rb", "leboncoin.gemspec"]
13
+ s.files = ["README.rdoc", "lib/leboncoin/htmlutils.rb", "lib/leboncoin/items.rb", "lib/leboncoin/search.rb", "lib/leboncoin.rb", "leboncoin.gemspec"]
14
14
  s.homepage = %q{http://github.com/kdridi/leboncoin}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Leboncoin", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -1,135 +1,11 @@
1
- class Leboncoin
2
- VERSION = '0.0.2'
1
+ require 'leboncoin/search'
3
2
 
4
- def convert(str)
5
- require 'htmlentities'
6
- str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
7
- .gsub(/é/, "é")
8
- .gsub(/™/, "™")
9
- .gsub(/®/, "©")
3
+ module LeBonCoin
4
+ VERSION = '0.0.4'
10
5
 
11
- str = HTMLEntities.new.decode(str)
12
- .gsub(/\u0092/, "'")
13
- .gsub(/\u0096/, "-")
14
- .gsub(/\u0095/, "•")
15
- .gsub(/\u0099/, "™")
16
- .gsub(/\u0080/, "€")
17
-
18
- return str
19
- end
20
-
21
- def retrieveData(url)
22
- require 'open-uri'
23
- return open(url)
24
- end
25
-
26
- def createDocument(url)
27
- require 'nokogiri'
28
- return Nokogiri::HTML(retrieveData(url))
29
- end
30
-
31
- def createItem(node)
32
- item = Hash.new
33
-
34
- # DATE
35
- require 'date'
36
- item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
37
- item["dateEN"] = item["dateFR"]
38
- .gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
39
- .gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
40
- .gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
41
-
42
- item["date"] = DateTime.parse(item["dateEN"])
43
-
44
- # IMAGE
45
- item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
46
-
47
- # NAME & LINK
48
- item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
49
- item["link"] = node.xpath('td[3]/a')[0]["href"].strip
50
-
51
- # PRICE
52
- item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
53
- item["currency"] = "EUR"
54
-
55
- # DESCRIPTION
56
- doc = createDocument(item["link"])
57
- item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
58
- item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
59
- item["postcode"] = item["city"][/[0-9]+/]
60
- item["city"] = item["city"].gsub(/[0-9]+ /, "")
61
-
62
- return item
63
- end
64
-
65
- def parseItems(items, url, size)
66
- doc = createDocument url
67
-
68
- continue = true
69
- doc.xpath('//table[@id="hl"]/tr').each do |node|
70
- if items.size < size
71
- items.push createItem(node)
72
- end
6
+ class << self
7
+ def Search keywords, size = 10
8
+ LeBonCoin::Search.parseItems "http://www.Leboncoin.fr/occasions/?f=a&th=1&q=" + keywords, size
73
9
  end
74
-
75
- if items.size < size
76
- doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
77
- parseItems(items, node['href'], size)
78
- end
79
- end
80
- end
81
-
82
- def createItems(url, size)
83
- items = Array.new
84
- parseItems(items, url, size)
85
-
86
- return items
87
- end
88
-
89
- def createJSON(url, size)
90
- require 'json'
91
- return JSON.pretty_generate(createItems(url, size))
92
10
  end
93
-
94
- def createRSS(url, size)
95
- require 'rss/maker'
96
-
97
- content = RSS::Maker.make("2.0") do |m|
98
- m.channel.title = "leboncoin.fr"
99
- m.channel.link = url
100
- m.channel.description = "leboncoin.fr"
101
- m.items.do_sort = true
102
- createItems(url, size).each do |item|
103
- title = ""
104
- price = ""
105
- if item["price"] != nil
106
- price = item["price"].to_s + " " + item["currency"]
107
- title = "[" + price + "] "
108
- end
109
-
110
- postcode = ""
111
- if item["postcode"] != nil
112
- postcode = item["postcode"]
113
- end
114
-
115
- i = m.items.new_item
116
- i.title = title + item["title"]
117
- i.link = item["link"]
118
- begin
119
- i.description = convert("<img src='" + item["image"] + "'/><br/>" \
120
- + "<b>Ville</b> : " + item["city"] + "<br/>" \
121
- + "<b>Code postal</b> : " + postcode + "<br/>" \
122
- + "<p>" + item["description"] + "</p><hr/>" \
123
- + "<b>Prix</b> : " + price + "<hr/>")
124
- rescue
125
- require 'json'
126
- puts ">>>> " + JSON.pretty_generate(item)
127
- end
128
- i.date = Time.now #item["date"]
129
- end
130
- end
131
- end
132
-
133
11
  end
134
-
135
- # puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
@@ -0,0 +1,22 @@
1
+ module LeBonCoin
2
+ module HTMLUtils
3
+ class << self
4
+ def convert str
5
+ require 'htmlentities'
6
+ str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
7
+ .gsub(/&#xc3;&#xa9;/, "&eacute;")
8
+ .gsub(/&#xc2;&#x99;/, "&#153;")
9
+ .gsub(/&#xc2;&#xae;/, "&copy;")
10
+
11
+ str = HTMLEntities.new.decode(str)
12
+ .gsub(/\u0092/, "'")
13
+ .gsub(/\u0096/, "-")
14
+ .gsub(/\u0095/, "&#149;")
15
+ .gsub(/\u0099/, "&#153;")
16
+ .gsub(/\u0080/, "&euro;")
17
+
18
+ return str
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,78 @@
1
+ require 'leboncoin/htmlutils'
2
+
3
+ module LeBonCoin
4
+ module Search
5
+ module SearchItems
6
+ class << self
7
+ ###
8
+ # Default constructor
9
+ def new link
10
+ @link = link
11
+ @items = Array.new
12
+ return self
13
+ end
14
+
15
+ ###
16
+ # Create a new item
17
+ def createItem item = Hash.new
18
+ @items.push item
19
+ return item
20
+ end
21
+
22
+ def each
23
+ @items.each do |item|
24
+ yield item if block_given?
25
+ end
26
+ end
27
+
28
+ def size
29
+ return @items.size
30
+ end
31
+
32
+ def to_json
33
+ require 'json'
34
+ return JSON.pretty_generate(@items)
35
+ end
36
+
37
+ def to_rss
38
+ require 'rss/maker'
39
+
40
+ content = RSS::Maker.make("2.0") do |m|
41
+ m.channel.title = "leboncoin.fr"
42
+ m.channel.link = @link
43
+ m.channel.description = "leboncoin.fr"
44
+ m.items.do_sort = true
45
+ @items.each do |item|
46
+ price = ""
47
+ if item["price"] != nil
48
+ price = item["price"].to_s + " " + item["currency"]
49
+ end
50
+
51
+ postcode = ""
52
+ if item["postcode"] != nil
53
+ postcode = item["postcode"]
54
+ end
55
+
56
+ i = m.items.new_item
57
+ i.title = item["title"]
58
+ i.link = item["link"]
59
+ begin
60
+ i.description = LeBonCoin::HTMLUtils.convert("<img src='" + item["image"] + "'/><br/>" \
61
+ + "<b>Ville</b> : " + item["city"] + "<br/>" \
62
+ + "<b>Code postal</b> : " + postcode + "<br/>" \
63
+ + "<p>" + item["description"] + "</p><hr/>" \
64
+ + "<b>Prix</b> : " + price + "<hr/>")
65
+ rescue
66
+ require 'json'
67
+ puts ">>>> ERROR >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
68
+ puts JSON.pretty_generate(item)
69
+ puts "\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
70
+ end
71
+ i.date = Time.now #item["date"]
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,125 @@
1
+ require 'leboncoin/items'
2
+
3
+ module LeBonCoin
4
+ module Search
5
+ class << self
6
+ ###
7
+ # Load the given URL as a well-formed HTML document
8
+ def loadHTML url
9
+ require 'open-uri'
10
+ require 'nokogiri'
11
+
12
+ doc = begin
13
+ Nokogiri::HTML(open(url))
14
+ rescue
15
+ nil
16
+ end
17
+
18
+ return doc
19
+ end
20
+
21
+ ###
22
+ # Parse items from a given URL
23
+ def parseItems url, size, items = LeBonCoin::Search::SearchItems.new(url)
24
+ doc = loadHTML url
25
+
26
+ doc.xpath('//table[@id="hl"]/tr').each do |node|
27
+ if items.size < size
28
+ parseItemNode node, items.createItem
29
+ end
30
+ end
31
+
32
+ if items.size < size
33
+ doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
34
+ parse node['href'], size, items
35
+ end
36
+ end
37
+
38
+ return items
39
+ end
40
+
41
+ ###
42
+ # Parse item from a given XML node.
43
+ def parseItemNode(node, item = Hash.new)
44
+ # DATE
45
+ item["date"] = begin
46
+ require 'date'
47
+ DateTime.parse(
48
+ node.xpath('td[1]')[0].inner_html.strip
49
+ .gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
50
+ .gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
51
+ .gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
52
+ )
53
+ rescue
54
+ nil
55
+ end
56
+
57
+ # IMAGE
58
+ item["image"] = begin
59
+ node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip
60
+ rescue
61
+ nil
62
+ end
63
+
64
+ # TITLE
65
+ item["title"] = begin
66
+ LeBonCoin::HTMLUtils.convert(node.xpath('td[3]/a')[0].content.strip)
67
+ rescue
68
+ "UNKNOW TITLE"
69
+ end
70
+
71
+ # LINK
72
+ item["link"] = begin
73
+ node.xpath('td[3]/a')[0]["href"].strip
74
+ rescue
75
+ nil
76
+ end
77
+
78
+ # PRICE
79
+ item["currency"] = "EUR"
80
+ item["price"] = begin
81
+ node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i
82
+ rescue
83
+ nil
84
+ end
85
+
86
+ return parseItem item["link"], item
87
+ end
88
+
89
+ ###
90
+ # Parse item from a given HTML link.
91
+ def parseItem url, item = Hash.new
92
+ doc = loadHTML url
93
+
94
+ # DESCRIPTION
95
+ item["description"] = begin
96
+ LeBonCoin::HTMLUtils.convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
97
+ rescue
98
+ nil
99
+ end
100
+
101
+ value = begin
102
+ doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
103
+ rescue
104
+ nil
105
+ end
106
+
107
+ # POSTCODE
108
+ item["postcode"] = begin
109
+ value[/[0-9]+/]
110
+ rescue
111
+ nil
112
+ end
113
+
114
+ # CITY
115
+ item["city"] = begin
116
+ value.gsub(/[0-9]+ /, "")
117
+ rescue
118
+ nil
119
+ end
120
+
121
+ return item
122
+ end
123
+ end
124
+ end
125
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Karim DRIDI
@@ -54,6 +54,9 @@ extra_rdoc_files:
54
54
  - lib/leboncoin.rb
55
55
  files:
56
56
  - README.rdoc
57
+ - lib/leboncoin/htmlutils.rb
58
+ - lib/leboncoin/items.rb
59
+ - lib/leboncoin/search.rb
57
60
  - lib/leboncoin.rb
58
61
  - leboncoin.gemspec
59
62
  has_rdoc: true