someter 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest +9 -0
- data/README +22 -0
- data/Rakefile +12 -0
- data/lib/aggregate.rb +33 -0
- data/lib/dimension.rb +49 -0
- data/lib/review.rb +3 -0
- data/lib/someter.rb +75 -0
- data/lib/test.rb +119 -0
- data/someter.gemspec +33 -0
- data/someter.rb +4 -0
- metadata +100 -0
data/Manifest
ADDED
data/README
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
A gem that parses html pages looking for someter data and transforming it to json
|
2
|
+
|
3
|
+
* install :
|
4
|
+
|
5
|
+
gem install someter
|
6
|
+
|
7
|
+
Note : you need to install nokogiri (gem install nokogiri)
|
8
|
+
|
9
|
+
* use :
|
10
|
+
|
11
|
+
sm = SoMeter.new(:string =>string_to_parse)
|
12
|
+
|
13
|
+
or :
|
14
|
+
|
15
|
+
sm = SoMeter.new(:url =>valide_url)
|
16
|
+
|
17
|
+
|
18
|
+
* print data in json format :
|
19
|
+
|
20
|
+
puts sm.to_json
|
21
|
+
|
22
|
+
* for a more complete example, see : lib/test.rb
|
data/Rakefile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'echoe'
|
4
|
+
|
5
|
+
Echoe.new('someter', '0.0.2') do |p|
|
6
|
+
p.description = "A gem that parses html pages looking for someter data and transforming it to json"
|
7
|
+
p.url = "http://github.com/addame/socialmeter"
|
8
|
+
p.author = "Mehdi Adda"
|
9
|
+
p.email = "mehdi.adda @nospam@ gmail.com"
|
10
|
+
p.ignore_pattern = ["tmp/*", "script/*"]
|
11
|
+
p.development_dependencies = ['nokogiri']
|
12
|
+
end
|
data/lib/aggregate.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
class Aggregate
|
4
|
+
attr_accessor :dimension, :item, :aggs
|
5
|
+
|
6
|
+
def to_json
|
7
|
+
to_h.to_json
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_h
|
11
|
+
{:dimension =>@dimension, :item=>@item, :aggs=>@aggs }
|
12
|
+
end
|
13
|
+
|
14
|
+
# parse the nokogiri doc and extract the aggregation properties it contains
|
15
|
+
def self.parse(agg_doc)
|
16
|
+
a = self.new
|
17
|
+
# extract the item and dimension names
|
18
|
+
[:item, :dimension].each do |arg|
|
19
|
+
agg_doc.css(".#{arg.to_s}").each { |elem|
|
20
|
+
a.send("#{arg.to_s}=", elem.inner_text)
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
# extract aggregation values and the associated types (functions)
|
25
|
+
a.aggs = [];agg_doc.css('.aggs > .func').each do |vs|
|
26
|
+
dimension_type = vs[:class].gsub(/func /,'')
|
27
|
+
dimension_value = vs.inner_text
|
28
|
+
a.aggs << {:function=> dimension_type, :value => dimension_value}
|
29
|
+
end
|
30
|
+
#puts "aggregation object : #{a.aggs.to_s}"
|
31
|
+
return a
|
32
|
+
end
|
33
|
+
end
|
data/lib/dimension.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
class Dimension
|
5
|
+
attr_accessor :name, :type, :values, :best, :worst, :min, :max, :step_to_best,
|
6
|
+
:step_to_worst
|
7
|
+
|
8
|
+
def to_json
|
9
|
+
h = to_h
|
10
|
+
h.to_json
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_h
|
14
|
+
h = {}
|
15
|
+
h[:type]=@type unless @type.nil?
|
16
|
+
h[:values]=@values unless @values.nil? or @values.empty?
|
17
|
+
h[:best]=@best unless @best.nil?
|
18
|
+
h[:worst]=@worst unless @worst.nil?
|
19
|
+
h[:min]=@min unless @min.nil?
|
20
|
+
h[:max]=@max unless @max.nil?
|
21
|
+
h[:step_to_best]=@step_to_best unless @step_to_best.nil?
|
22
|
+
h[:step_to_worst]=@step_to_worst unless @step_to_worst.nil?
|
23
|
+
{@name=>h}
|
24
|
+
end
|
25
|
+
|
26
|
+
# parse the nokogiri doc and extract the dimension propreties it contains
|
27
|
+
def self.parse(dim_doc)
|
28
|
+
d = self.new
|
29
|
+
|
30
|
+
# extract dimension name
|
31
|
+
d.name = dim_doc[:id]
|
32
|
+
|
33
|
+
# extract dimension type
|
34
|
+
dim_doc.css('.type').each { |t| d.type = t.inner_text}
|
35
|
+
|
36
|
+
# extract dimension type
|
37
|
+
d.values = [];dim_doc.css('.values > .value').each { |vs| d.values << vs.inner_text}
|
38
|
+
|
39
|
+
# extract the remaining properties of the dimension
|
40
|
+
[:type, :best, :worst, :min, :max, :step_to_best,:step_to_worst].each do |arg|
|
41
|
+
dim_doc.css(".#{arg.to_s}").each do |elem|
|
42
|
+
d.send("#{arg}=", elem.inner_text)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
#puts "dimension object : #{d.name.to_s}"
|
46
|
+
return d
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/lib/review.rb
ADDED
data/lib/someter.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
# crawl a webpage and extract review data and transforms it to json
|
2
|
+
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'dimension'
|
5
|
+
require 'review'
|
6
|
+
require 'aggregate'
|
7
|
+
require 'json'
|
8
|
+
|
9
|
+
class SoMeter
|
10
|
+
|
11
|
+
attr_accessor :reviews, :url, :string, :doc
|
12
|
+
|
13
|
+
def initialize(options)
|
14
|
+
@url = options[:url] unless options[:url].nil?
|
15
|
+
@string = options[:string] unless options[:string].nil?
|
16
|
+
if(!@url.nil?)
|
17
|
+
@doc = nokogiri_doc
|
18
|
+
elsif(!@string.nil?)
|
19
|
+
@doc = Nokogiri::HTML(@string.force_encoding('utf-8'))
|
20
|
+
end
|
21
|
+
so_meter_it
|
22
|
+
end
|
23
|
+
|
24
|
+
def print_reviews
|
25
|
+
end
|
26
|
+
|
27
|
+
def print_review(index)
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_json
|
32
|
+
@reviews.to_json
|
33
|
+
end
|
34
|
+
|
35
|
+
def print
|
36
|
+
puts @reviews[:dimensions].to_s
|
37
|
+
puts @reviews[:aggregates].to_s
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def so_meter_it
|
44
|
+
@reviews = {}
|
45
|
+
#@reviews[:dimensions] = dimensions
|
46
|
+
#puts "@reviews[:dimensions] = #{@reviews[:dimensions]}"
|
47
|
+
@reviews[:aggregates] = aggregates
|
48
|
+
#puts "@reviews[:aggregates] = #{@reviews[:aggregates]}"
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
def dimensions
|
54
|
+
dims = []
|
55
|
+
@doc.css('.someter.dimension').each { |dim|
|
56
|
+
dims << Dimension.parse(dim)
|
57
|
+
}
|
58
|
+
return dims
|
59
|
+
end
|
60
|
+
|
61
|
+
def aggregates
|
62
|
+
aggs = []
|
63
|
+
@doc.css('.someter.aggregates').each { |agg|
|
64
|
+
aggs << Aggregate.parse(agg)
|
65
|
+
}
|
66
|
+
return aggs
|
67
|
+
end
|
68
|
+
|
69
|
+
def nokogiri_doc
|
70
|
+
f = open(@url)
|
71
|
+
f.rewind
|
72
|
+
Nokogiri::HTML(f.readlines.join("\n").force_encoding('utf-8'))
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
data/lib/test.rb
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'someter'
|
2
|
+
|
3
|
+
|
4
|
+
string_to_parse = <<END_STR
|
5
|
+
<div>
|
6
|
+
<!--taste rating dimension-->
|
7
|
+
<span class="someter dimension" id="taste">
|
8
|
+
<span class="type">integer</span><!--optional if
|
9
|
+
step_to_best, min and max are specified-->
|
10
|
+
<span class="min">0</span>
|
11
|
+
<span class="max">10</span>
|
12
|
+
<span class="best">10</span><!--optional-->
|
13
|
+
<span class="worst">0</span><!--optional-->
|
14
|
+
<span class="step_to_best">+1</span><!--optional-->
|
15
|
+
<span class="step_to_worst">-1</span><!--optional-->
|
16
|
+
</span>
|
17
|
+
|
18
|
+
<!--dressing rating dimension-->
|
19
|
+
<span class="someter dimension" id="dressing">
|
20
|
+
<span class="type">integer</span><!--optional if
|
21
|
+
step_to_best, min and max are specified-->
|
22
|
+
<span class="min">0</span>
|
23
|
+
<span class="max">10</span>
|
24
|
+
<span class="best">10</span><!--optional-->
|
25
|
+
<span class="worst">0</span><!--optional-->
|
26
|
+
<span class="step_to_best">+1</span><!--optional-->
|
27
|
+
<span class="step_to_worst">-1</span><!--optional-->
|
28
|
+
</span>
|
29
|
+
|
30
|
+
<!--dough rating dimension-->
|
31
|
+
<span class="someter dimension" id="dough">
|
32
|
+
<span class="type">integer</span><!--optional if
|
33
|
+
step_to_best, min and max are specified-->
|
34
|
+
<span class="min">0</span>
|
35
|
+
<span class="max">3</span>
|
36
|
+
<span class="best">0</span><!--optional-->
|
37
|
+
<span class="worst">3</span><!--optional-->
|
38
|
+
<span class="step_to_best">+1</span><!--optional-->
|
39
|
+
<span class="step_to_worst">-1</span><!--optional-->
|
40
|
+
</span>
|
41
|
+
|
42
|
+
<!--looking rating dimension-->
|
43
|
+
<span class="someter dimension" id="looking">
|
44
|
+
<span class="type">string</span><!--optional if
|
45
|
+
step_to_best, min and max are specified-->
|
46
|
+
<span class="values">
|
47
|
+
<span class="value">ugly</span>
|
48
|
+
<span class="value">not to bad</span>
|
49
|
+
<span class="value">good</span>
|
50
|
+
<span class="value">very good</span>
|
51
|
+
</span>
|
52
|
+
<span class="best">very good</span><!--optional-->
|
53
|
+
<span class="worst">ugly</span><!--optional-->
|
54
|
+
</span>
|
55
|
+
|
56
|
+
<div class="someter aggregates">
|
57
|
+
<span class="item">L'Amourita Pizza</span>
|
58
|
+
<span class="dimension">taste</span>
|
59
|
+
<span class="aggs">
|
60
|
+
Taste: average
|
61
|
+
<span class="func average">6</span>
|
62
|
+
based on
|
63
|
+
<span class="func ratings">24</span>,
|
64
|
+
maximum
|
65
|
+
<span class="func max">8</span>
|
66
|
+
</span>
|
67
|
+
</div>
|
68
|
+
<div class="someter aggregates">
|
69
|
+
<span class="item">L'Amourita Pizza</span>
|
70
|
+
<span class="dimension">dressing</span>
|
71
|
+
<span class="aggs">
|
72
|
+
Dressing: average
|
73
|
+
<span class="func average">7</span>
|
74
|
+
based on
|
75
|
+
<span class="func ratings">24</span>,
|
76
|
+
maximum
|
77
|
+
<span class="func max">10</span>
|
78
|
+
</span>
|
79
|
+
</div>
|
80
|
+
<div class="someter aggregates">
|
81
|
+
<span class="item">L'Amourita Pizza</span>
|
82
|
+
<span class="dimension">dough</span>
|
83
|
+
<span class="aggs">
|
84
|
+
Dough: average
|
85
|
+
<span class="func average">1</span>
|
86
|
+
based on
|
87
|
+
<span class="func ratings">24</span>,
|
88
|
+
maximum
|
89
|
+
<span class="func max">3</span>
|
90
|
+
</span>
|
91
|
+
</div>
|
92
|
+
<div class="someter aggregates">
|
93
|
+
<span class="item">L'Amourita Pizza</span>
|
94
|
+
<span class="dimension">looking</span>
|
95
|
+
<span class="aggs">
|
96
|
+
Looking: average
|
97
|
+
<span class="func average">good</span>
|
98
|
+
based on
|
99
|
+
<span class="func ratings">24</span>,
|
100
|
+
maximum
|
101
|
+
<span class="func max">very good</span>
|
102
|
+
</span>
|
103
|
+
</div>
|
104
|
+
</div>
|
105
|
+
END_STR
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
def write_to_file(message)
|
110
|
+
File.open('test.txt', 'a') do |f2|
|
111
|
+
f2.puts message
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
sm = SoMeter.new(:string =>string_to_parse)
|
117
|
+
|
118
|
+
#puts sm.to_js
|
119
|
+
write_to_file sm.to_json
|
data/someter.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{someter}
|
5
|
+
s.version = "0.0.2"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Mehdi Adda"]
|
9
|
+
s.date = %q{2010-07-15}
|
10
|
+
s.description = %q{A gem that parses html pages looking for someter data and transforming it to json}
|
11
|
+
s.email = %q{mehdi.adda @nospam@ gmail.com}
|
12
|
+
s.extra_rdoc_files = ["README", "lib/aggregate.rb", "lib/dimension.rb", "lib/review.rb", "lib/someter.rb", "lib/test.rb"]
|
13
|
+
s.files = ["Manifest", "README", "Rakefile", "lib/aggregate.rb", "lib/dimension.rb", "lib/review.rb", "lib/someter.rb", "lib/test.rb", "someter.rb", "someter.gemspec"]
|
14
|
+
s.homepage = %q{http://github.com/addame/socialmeter}
|
15
|
+
s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Someter", "--main", "README"]
|
16
|
+
s.require_paths = ["lib"]
|
17
|
+
s.rubyforge_project = %q{someter}
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.summary = %q{A gem that parses html pages looking for someter data and transforming it to json}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_development_dependency(%q<nokogiri>, [">= 0"])
|
27
|
+
else
|
28
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
29
|
+
end
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
32
|
+
end
|
33
|
+
end
|
data/someter.rb
ADDED
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: someter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Mehdi Adda
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-07-15 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: A gem that parses html pages looking for someter data and transforming it to json
|
36
|
+
email: mehdi.adda @nospam@ gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- README
|
43
|
+
- lib/aggregate.rb
|
44
|
+
- lib/dimension.rb
|
45
|
+
- lib/review.rb
|
46
|
+
- lib/someter.rb
|
47
|
+
- lib/test.rb
|
48
|
+
files:
|
49
|
+
- Manifest
|
50
|
+
- README
|
51
|
+
- Rakefile
|
52
|
+
- lib/aggregate.rb
|
53
|
+
- lib/dimension.rb
|
54
|
+
- lib/review.rb
|
55
|
+
- lib/someter.rb
|
56
|
+
- lib/test.rb
|
57
|
+
- someter.rb
|
58
|
+
- someter.gemspec
|
59
|
+
has_rdoc: true
|
60
|
+
homepage: http://github.com/addame/socialmeter
|
61
|
+
licenses: []
|
62
|
+
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options:
|
65
|
+
- --line-numbers
|
66
|
+
- --inline-source
|
67
|
+
- --title
|
68
|
+
- Someter
|
69
|
+
- --main
|
70
|
+
- README
|
71
|
+
require_paths:
|
72
|
+
- lib
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
hash: 3
|
79
|
+
segments:
|
80
|
+
- 0
|
81
|
+
version: "0"
|
82
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 11
|
88
|
+
segments:
|
89
|
+
- 1
|
90
|
+
- 2
|
91
|
+
version: "1.2"
|
92
|
+
requirements: []
|
93
|
+
|
94
|
+
rubyforge_project: someter
|
95
|
+
rubygems_version: 1.3.7
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: A gem that parses html pages looking for someter data and transforming it to json
|
99
|
+
test_files: []
|
100
|
+
|