leboncoin 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,16 +1,18 @@
1
1
  = Leboncoin
2
2
 
3
- Search library for leboncoin.fr
3
+ Let's search through leboncoin.fr items!
4
4
 
5
5
  == Install
6
6
 
7
- gem install kdridi-leboncoin --source http://gems.github.com
7
+ gem install leboncoin
8
8
 
9
9
  == Usage
10
10
 
11
- Blablabla ...
12
-
13
- Leboncoin.seach("keywords")
11
+ LeBonCoin::Search('dreamcast', 10).each do |item|
12
+ puts "#{item['title']}"
13
+ end
14
+ puts LeBonCoin::Search('dreamcast').to_json
15
+ puts LeBonCoin::Search('dreamcast').to_rss
14
16
 
15
17
  == License
16
18
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{leboncoin}
5
- s.version = "0.0.2"
5
+ s.version = "0.0.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Karim DRIDI"]
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.description = %q{leboncoin toolkit.}
11
11
  s.email = %q{karim.dridi@gmail.com}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/leboncoin.rb"]
13
- s.files = ["README.rdoc", "lib/leboncoin.rb", "leboncoin.gemspec"]
13
+ s.files = ["README.rdoc", "lib/leboncoin/htmlutils.rb", "lib/leboncoin/items.rb", "lib/leboncoin/search.rb", "lib/leboncoin.rb", "leboncoin.gemspec"]
14
14
  s.homepage = %q{http://github.com/kdridi/leboncoin}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Leboncoin", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -1,135 +1,11 @@
1
- class Leboncoin
2
- VERSION = '0.0.2'
1
+ require 'leboncoin/search'
3
2
 
4
- def convert(str)
5
- require 'htmlentities'
6
- str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
7
- .gsub(/é/, "é")
8
- .gsub(/™/, "™")
9
- .gsub(/®/, "©")
3
+ module LeBonCoin
4
+ VERSION = '0.0.4'
10
5
 
11
- str = HTMLEntities.new.decode(str)
12
- .gsub(/\u0092/, "'")
13
- .gsub(/\u0096/, "-")
14
- .gsub(/\u0095/, "•")
15
- .gsub(/\u0099/, "™")
16
- .gsub(/\u0080/, "€")
17
-
18
- return str
19
- end
20
-
21
- def retrieveData(url)
22
- require 'open-uri'
23
- return open(url)
24
- end
25
-
26
- def createDocument(url)
27
- require 'nokogiri'
28
- return Nokogiri::HTML(retrieveData(url))
29
- end
30
-
31
- def createItem(node)
32
- item = Hash.new
33
-
34
- # DATE
35
- require 'date'
36
- item["dateFR"] = node.xpath('td[1]')[0].inner_html.strip
37
- item["dateEN"] = item["dateFR"]
38
- .gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
39
- .gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
40
- .gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
41
-
42
- item["date"] = DateTime.parse(item["dateEN"])
43
-
44
- # IMAGE
45
- item["image"] = begin node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip rescue nil end
46
-
47
- # NAME & LINK
48
- item["title"] = convert(node.xpath('td[3]/a')[0].content.strip)
49
- item["link"] = node.xpath('td[3]/a')[0]["href"].strip
50
-
51
- # PRICE
52
- item["price"] = begin node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i rescue nil end
53
- item["currency"] = "EUR"
54
-
55
- # DESCRIPTION
56
- doc = createDocument(item["link"])
57
- item["description"] = convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
58
- item["city"] = doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
59
- item["postcode"] = item["city"][/[0-9]+/]
60
- item["city"] = item["city"].gsub(/[0-9]+ /, "")
61
-
62
- return item
63
- end
64
-
65
- def parseItems(items, url, size)
66
- doc = createDocument url
67
-
68
- continue = true
69
- doc.xpath('//table[@id="hl"]/tr').each do |node|
70
- if items.size < size
71
- items.push createItem(node)
72
- end
6
+ class << self
7
+ def Search keywords, size = 10
8
+ LeBonCoin::Search.parseItems "http://www.Leboncoin.fr/occasions/?f=a&th=1&q=" + keywords, size
73
9
  end
74
-
75
- if items.size < size
76
- doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
77
- parseItems(items, node['href'], size)
78
- end
79
- end
80
- end
81
-
82
- def createItems(url, size)
83
- items = Array.new
84
- parseItems(items, url, size)
85
-
86
- return items
87
- end
88
-
89
- def createJSON(url, size)
90
- require 'json'
91
- return JSON.pretty_generate(createItems(url, size))
92
10
  end
93
-
94
- def createRSS(url, size)
95
- require 'rss/maker'
96
-
97
- content = RSS::Maker.make("2.0") do |m|
98
- m.channel.title = "leboncoin.fr"
99
- m.channel.link = url
100
- m.channel.description = "leboncoin.fr"
101
- m.items.do_sort = true
102
- createItems(url, size).each do |item|
103
- title = ""
104
- price = ""
105
- if item["price"] != nil
106
- price = item["price"].to_s + " " + item["currency"]
107
- title = "[" + price + "] "
108
- end
109
-
110
- postcode = ""
111
- if item["postcode"] != nil
112
- postcode = item["postcode"]
113
- end
114
-
115
- i = m.items.new_item
116
- i.title = title + item["title"]
117
- i.link = item["link"]
118
- begin
119
- i.description = convert("<img src='" + item["image"] + "'/><br/>" \
120
- + "<b>Ville</b> : " + item["city"] + "<br/>" \
121
- + "<b>Code postal</b> : " + postcode + "<br/>" \
122
- + "<p>" + item["description"] + "</p><hr/>" \
123
- + "<b>Prix</b> : " + price + "<hr/>")
124
- rescue
125
- require 'json'
126
- puts ">>>> " + JSON.pretty_generate(item)
127
- end
128
- i.date = Time.now #item["date"]
129
- end
130
- end
131
- end
132
-
133
11
  end
134
-
135
- # puts Leboncoin.new.createJSON('http://www.leboncoin.fr/occasions/?f=a&th=1&q=dreamcast', 2)
@@ -0,0 +1,22 @@
1
+ module LeBonCoin
2
+ module HTMLUtils
3
+ class << self
4
+ def convert str
5
+ require 'htmlentities'
6
+ str = HTMLEntities.new.encode(str.force_encoding("ISO-8859-15").encode("UTF-8"), :hexadecimal)
7
+ .gsub(/&#xc3;&#xa9;/, "&eacute;")
8
+ .gsub(/&#xc2;&#x99;/, "&#153;")
9
+ .gsub(/&#xc2;&#xae;/, "&copy;")
10
+
11
+ str = HTMLEntities.new.decode(str)
12
+ .gsub(/\u0092/, "'")
13
+ .gsub(/\u0096/, "-")
14
+ .gsub(/\u0095/, "&#149;")
15
+ .gsub(/\u0099/, "&#153;")
16
+ .gsub(/\u0080/, "&euro;")
17
+
18
+ return str
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,78 @@
1
+ require 'leboncoin/htmlutils'
2
+
3
+ module LeBonCoin
4
+ module Search
5
+ module SearchItems
6
+ class << self
7
+ ###
8
+ # Default constructor
9
+ def new link
10
+ @link = link
11
+ @items = Array.new
12
+ return self
13
+ end
14
+
15
+ ###
16
+ # Create a new item
17
+ def createItem item = Hash.new
18
+ @items.push item
19
+ return item
20
+ end
21
+
22
+ def each
23
+ @items.each do |item|
24
+ yield item if block_given?
25
+ end
26
+ end
27
+
28
+ def size
29
+ return @items.size
30
+ end
31
+
32
+ def to_json
33
+ require 'json'
34
+ return JSON.pretty_generate(@items)
35
+ end
36
+
37
+ def to_rss
38
+ require 'rss/maker'
39
+
40
+ content = RSS::Maker.make("2.0") do |m|
41
+ m.channel.title = "leboncoin.fr"
42
+ m.channel.link = @link
43
+ m.channel.description = "leboncoin.fr"
44
+ m.items.do_sort = true
45
+ @items.each do |item|
46
+ price = ""
47
+ if item["price"] != nil
48
+ price = item["price"].to_s + " " + item["currency"]
49
+ end
50
+
51
+ postcode = ""
52
+ if item["postcode"] != nil
53
+ postcode = item["postcode"]
54
+ end
55
+
56
+ i = m.items.new_item
57
+ i.title = item["title"]
58
+ i.link = item["link"]
59
+ begin
60
+ i.description = LeBonCoin::HTMLUtils.convert("<img src='" + item["image"] + "'/><br/>" \
61
+ + "<b>Ville</b> : " + item["city"] + "<br/>" \
62
+ + "<b>Code postal</b> : " + postcode + "<br/>" \
63
+ + "<p>" + item["description"] + "</p><hr/>" \
64
+ + "<b>Prix</b> : " + price + "<hr/>")
65
+ rescue
66
+ require 'json'
67
+ puts ">>>> ERROR >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n"
68
+ puts JSON.pretty_generate(item)
69
+ puts "\n<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<"
70
+ end
71
+ i.date = Time.now #item["date"]
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,125 @@
1
+ require 'leboncoin/items'
2
+
3
+ module LeBonCoin
4
+ module Search
5
+ class << self
6
+ ###
7
+ # Load the given URL as a well-formed HTML document
8
+ def loadHTML url
9
+ require 'open-uri'
10
+ require 'nokogiri'
11
+
12
+ doc = begin
13
+ Nokogiri::HTML(open(url))
14
+ rescue
15
+ nil
16
+ end
17
+
18
+ return doc
19
+ end
20
+
21
+ ###
22
+ # Parse items from a given URL
23
+ def parseItems url, size, items = LeBonCoin::Search::SearchItems.new(url)
24
+ doc = loadHTML url
25
+
26
+ doc.xpath('//table[@id="hl"]/tr').each do |node|
27
+ if items.size < size
28
+ parseItemNode node, items.createItem
29
+ end
30
+ end
31
+
32
+ if items.size < size
33
+ doc.xpath('//a[starts-with(text(), "Page suivante")]').each do |node|
34
+ parse node['href'], size, items
35
+ end
36
+ end
37
+
38
+ return items
39
+ end
40
+
41
+ ###
42
+ # Parse item from a given XML node.
43
+ def parseItemNode(node, item = Hash.new)
44
+ # DATE
45
+ item["date"] = begin
46
+ require 'date'
47
+ DateTime.parse(
48
+ node.xpath('td[1]')[0].inner_html.strip
49
+ .gsub(/ ao.t<br>/, " aug<br>").gsub(/<br>/, " ")
50
+ .gsub(/Aujourd'hui/, (Date.today - 0).strftime('%d %b').downcase)
51
+ .gsub(/Hier/, (Date.today - 1).strftime('%d %b').downcase)
52
+ )
53
+ rescue
54
+ nil
55
+ end
56
+
57
+ # IMAGE
58
+ item["image"] = begin
59
+ node.xpath('td[2]/table/tbody/tr[2]/td[2]/a/img')[0]["src"].strip
60
+ rescue
61
+ nil
62
+ end
63
+
64
+ # TITLE
65
+ item["title"] = begin
66
+ LeBonCoin::HTMLUtils.convert(node.xpath('td[3]/a')[0].content.strip)
67
+ rescue
68
+ "UNKNOW TITLE"
69
+ end
70
+
71
+ # LINK
72
+ item["link"] = begin
73
+ node.xpath('td[3]/a')[0]["href"].strip
74
+ rescue
75
+ nil
76
+ end
77
+
78
+ # PRICE
79
+ item["currency"] = "EUR"
80
+ item["price"] = begin
81
+ node.xpath('td[3]/text()[3]')[0].content.strip.gsub(/..$/, "").to_i
82
+ rescue
83
+ nil
84
+ end
85
+
86
+ return parseItem item["link"], item
87
+ end
88
+
89
+ ###
90
+ # Parse item from a given HTML link.
91
+ def parseItem url, item = Hash.new
92
+ doc = loadHTML url
93
+
94
+ # DESCRIPTION
95
+ item["description"] = begin
96
+ LeBonCoin::HTMLUtils.convert(doc.xpath('//span[@class="lbcAd_text"]').inner_html)
97
+ rescue
98
+ nil
99
+ end
100
+
101
+ value = begin
102
+ doc.xpath('//span[@class="ad_details_400"]/strong').inner_html.strip
103
+ rescue
104
+ nil
105
+ end
106
+
107
+ # POSTCODE
108
+ item["postcode"] = begin
109
+ value[/[0-9]+/]
110
+ rescue
111
+ nil
112
+ end
113
+
114
+ # CITY
115
+ item["city"] = begin
116
+ value.gsub(/[0-9]+ /, "")
117
+ rescue
118
+ nil
119
+ end
120
+
121
+ return item
122
+ end
123
+ end
124
+ end
125
+ end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 2
9
- version: 0.0.2
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Karim DRIDI
@@ -54,6 +54,9 @@ extra_rdoc_files:
54
54
  - lib/leboncoin.rb
55
55
  files:
56
56
  - README.rdoc
57
+ - lib/leboncoin/htmlutils.rb
58
+ - lib/leboncoin/items.rb
59
+ - lib/leboncoin/search.rb
57
60
  - lib/leboncoin.rb
58
61
  - leboncoin.gemspec
59
62
  has_rdoc: true