hentry_consumer 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -6
- data/README.md +5 -3
- data/hentry_consumer.gemspec +8 -4
- data/lib/hentry_consumer/element.rb +37 -17
- data/lib/hentry_consumer/format_rules.rb +4 -3
- data/lib/hentry_consumer/h_card.rb +1 -2
- data/lib/hentry_consumer/h_entry.rb +42 -41
- data/lib/hentry_consumer/h_feed.rb +9 -8
- data/lib/hentry_consumer/version.rb +1 -1
- data/lib/hentry_consumer.rb +10 -5
- data/spec/lib/hentry_consumer/h_card_spec.rb +20 -7
- data/spec/lib/hentry_consumer/h_entry_spec.rb +128 -52
- data/spec/lib/hentry_consumer/h_feed_spec.rb +4 -8
- data/spec/support/nested_example.html +620 -0
- metadata +70 -2
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,7 @@ serialized h-entry objects. The returned Object structure looks something like t
|
|
22
22
|
|
23
23
|
## Current Version
|
24
24
|
|
25
|
-
0.
|
25
|
+
0.5.0
|
26
26
|
|
27
27
|
|
28
28
|
## Requirements
|
@@ -57,10 +57,12 @@ require "hentry_consumer"
|
|
57
57
|
HentryConsumer.parse(File|URL)
|
58
58
|
```
|
59
59
|
[Example Gist of HTML with h-entry posts](https://raw.github.com/gist/3835447/7128a66a3ac7e971a82daac5fa2076d17b88e435/gistfile1.html)
|
60
|
+
[Another Example Gist of HTML with h-entry posts](https://gist.github.com/88d6d476483e9528fb3a)
|
60
61
|
|
61
62
|
## Authors
|
62
63
|
|
63
64
|
* Bookis Smuin / [@bookis](https://github.com/bookis)
|
65
|
+
* Jessica Lynn Suttles / [@jlsuttles](https://github.com/jlsuttles)
|
64
66
|
|
65
67
|
## Contributions
|
66
68
|
|
@@ -78,13 +80,13 @@ If you find bugs, have feature requests or questions, please
|
|
78
80
|
### Specs
|
79
81
|
|
80
82
|
```bash
|
81
|
-
|
83
|
+
guard
|
82
84
|
```
|
83
85
|
|
84
86
|
### Releases
|
85
87
|
|
86
88
|
```bash
|
87
|
-
|
89
|
+
rake release
|
88
90
|
```
|
89
91
|
|
90
92
|
|
data/hentry_consumer.gemspec
CHANGED
@@ -6,8 +6,8 @@ require 'hentry_consumer/version'
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
7
|
gem.name = "hentry_consumer"
|
8
8
|
gem.version = HentryConsumer::VERSION
|
9
|
-
gem.authors = ["Bookis Smuin"]
|
10
|
-
gem.email = ["vegan.bookis@gmail.com"]
|
9
|
+
gem.authors = ["Bookis Smuin", "Jessica Lynn Suttles"]
|
10
|
+
gem.email = ["vegan.bookis@gmail.com", "jlsuttles@gmail.com"]
|
11
11
|
gem.description = %q{A hATOM feed parser}
|
12
12
|
gem.summary = %q{Takes in HTML containing an h-feed classed element and returns serialized data based on the Microformat 2 hEntry specs}
|
13
13
|
gem.homepage = "https://github.com/G5/hentry_consumer"
|
@@ -17,7 +17,11 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
|
-
gem.
|
21
|
-
gem.
|
20
|
+
gem.add_runtime_dependency 'nokogiri'
|
21
|
+
gem.add_runtime_dependency 'json'
|
22
22
|
|
23
|
+
gem.add_development_dependency "rspec", "~> 2.11.0"
|
24
|
+
gem.add_development_dependency "guard-rspec", "~> 2.1.0"
|
25
|
+
gem.add_development_dependency "rb-fsevent", "~> 0.9.2"
|
26
|
+
gem.add_development_dependency "debugger", "~> 1.2.1"
|
23
27
|
end
|
@@ -2,29 +2,40 @@ module HentryConsumer
|
|
2
2
|
class Element
|
3
3
|
attr_accessor :element
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@element =
|
7
|
-
parse_elements
|
5
|
+
def initialize(element)
|
6
|
+
@element = element
|
7
|
+
parse_elements(@element)
|
8
8
|
end
|
9
|
-
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
|
10
|
+
def parse_elements(elements)
|
11
|
+
if elements.is_a?(Nokogiri::XML::NodeSet)
|
12
|
+
elements.each do |element|
|
13
|
+
parse_elements(element)
|
14
|
+
end
|
15
|
+
else
|
16
|
+
parse_element(elements)
|
17
|
+
end
|
16
18
|
end
|
17
19
|
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
def parse_element(element)
|
21
|
+
classes = element["class"]
|
22
|
+
# element may be a microformat element
|
23
|
+
if classes =~ /(p|n|e|i|u|dt)-/
|
24
|
+
classes.split.each do |c|
|
25
|
+
parse_microformat(element, c)
|
22
26
|
end
|
27
|
+
# element may contain a microformat element
|
28
|
+
else
|
29
|
+
parse_elements(element.children)
|
23
30
|
end
|
24
31
|
end
|
25
32
|
|
26
|
-
def
|
27
|
-
|
33
|
+
def parse_microformat(element, c)
|
34
|
+
assign_value(symbolize_class(c), element.text)
|
35
|
+
end
|
36
|
+
|
37
|
+
def symbolize_class(c)
|
38
|
+
c.to_s.downcase.gsub(/\w{1,2}-/, "").to_sym
|
28
39
|
end
|
29
40
|
|
30
41
|
def [](key)
|
@@ -37,6 +48,7 @@ module HentryConsumer
|
|
37
48
|
|
38
49
|
def assign_value(symbolized_class, value)
|
39
50
|
return unless self.respond_to?(symbolized_class)
|
51
|
+
value = value.gsub('\n', " ").strip if value.is_a?(String)
|
40
52
|
if FormatRules.can_have_many?(symbolized_class)
|
41
53
|
self[symbolized_class] ||= []
|
42
54
|
self[symbolized_class] << value
|
@@ -44,5 +56,13 @@ module HentryConsumer
|
|
44
56
|
self[symbolized_class] = value
|
45
57
|
end
|
46
58
|
end
|
59
|
+
|
60
|
+
def to_html
|
61
|
+
@element.to_html
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_xml
|
65
|
+
@element.to_xml
|
66
|
+
end
|
47
67
|
end
|
48
|
-
end
|
68
|
+
end
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class FormatRules
|
3
|
-
REQUIRED
|
4
|
-
UNIQUE
|
3
|
+
REQUIRED = []
|
4
|
+
UNIQUE = [:uid, :bookmark]
|
5
|
+
|
5
6
|
class << self
|
6
7
|
def required?(format)
|
7
8
|
REQUIRED.include? format
|
@@ -15,4 +16,4 @@ module HentryConsumer
|
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
18
|
-
end
|
19
|
+
end
|
@@ -1,59 +1,60 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class HEntry < Element
|
3
|
-
|
4
3
|
attr_accessor :name, :categories, :author, :content, :bookmark, :published_at, :summary
|
5
4
|
alias_method :authors, :author
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
# overrides Element#parse_microformat
|
7
|
+
def parse_microformat(element, c)
|
8
|
+
case c
|
9
|
+
when "p-author" then parse_author(element)
|
10
|
+
when "p-category" then parse_category(element)
|
11
|
+
when "e-content" then parse_content(element)
|
12
|
+
when "dt-published" then parse_published(element)
|
13
|
+
when "u-uid" then parse_uid(element)
|
14
|
+
else parse_general(element, c)
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
18
|
+
def parse_author(element)
|
19
|
+
assign_value :author, HCard.new(element.children)
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_category(element)
|
23
|
+
self.categories ||= {}
|
24
|
+
self.categories[element.text.gsub("\n", " ").strip] = element["href"]
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_content(element)
|
28
|
+
assign_value :content, element.inner_html
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_published(element)
|
32
|
+
assign_value :published_at, element["datetime"]
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_uid(element)
|
36
|
+
assign_value :bookmark, element["href"]
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse_general(element, c)
|
40
|
+
assign_value symbolize_class(c), element.text
|
41
|
+
end
|
42
|
+
|
17
43
|
def to_json(*a)
|
18
44
|
{:items =>
|
19
45
|
[{
|
20
46
|
:type => ["h-entry"],
|
21
47
|
:properties => {
|
22
|
-
:name
|
23
|
-
:categories
|
24
|
-
:author
|
25
|
-
:content
|
26
|
-
:bookmark
|
27
|
-
:published_at
|
28
|
-
:summary
|
48
|
+
:name => self.name,
|
49
|
+
:categories => self.categories,
|
50
|
+
:author => self.author,
|
51
|
+
:content => self.content,
|
52
|
+
:bookmark => self.bookmark,
|
53
|
+
:published_at => self.published_at,
|
54
|
+
:summary => self.summary
|
29
55
|
}
|
30
56
|
}]
|
31
57
|
}.to_json(a)
|
32
58
|
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def parse_element(microformat, klass)
|
37
|
-
key, value = case klass
|
38
|
-
when 'p-author'
|
39
|
-
[symbolize_class(klass), HCard.new(microformat)]
|
40
|
-
when 'p-category'
|
41
|
-
self.categories ||= {}
|
42
|
-
self.categories[microformat.text.gsub('\n', " ").strip] = microformat["href"]
|
43
|
-
when 'e-content'
|
44
|
-
[:content, parse_content(microformat)]
|
45
|
-
when'dt-published'
|
46
|
-
[:published_at, microformat["datetime"]]
|
47
|
-
when "u-uid"
|
48
|
-
[:bookmark, microformat['href']]
|
49
|
-
else
|
50
|
-
[symbolize_class(klass), microformat.text.gsub('\n', " ").strip]
|
51
|
-
end
|
52
|
-
assign_value(key, value)
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse_content(microformat)
|
56
|
-
microformat.inner_html
|
57
|
-
end
|
58
59
|
end
|
59
|
-
end
|
60
|
+
end
|
@@ -1,21 +1,22 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class HFeed
|
3
|
-
attr_accessor :entries
|
3
|
+
attr_accessor :html, :entries
|
4
|
+
|
4
5
|
def initialize(html)
|
6
|
+
@html = Nokogiri::HTML(open(html).read)
|
5
7
|
@entries = []
|
6
|
-
parse_html
|
8
|
+
parse_html
|
7
9
|
end
|
8
10
|
|
9
|
-
def parse_html
|
10
|
-
|
11
|
-
|
12
|
-
entry = HEntry.new(mf_entry)
|
11
|
+
def parse_html
|
12
|
+
self.html.css(".h-entry").each do |hentry|
|
13
|
+
entry = HEntry.new(hentry.children)
|
13
14
|
self.entries << entry
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
17
18
|
def to_html
|
18
|
-
self.
|
19
|
+
self.html.css(".h-entry").collect(&:to_html).join
|
19
20
|
end
|
20
21
|
alias_method :to_s, :to_html
|
21
22
|
|
@@ -29,4 +30,4 @@ module HentryConsumer
|
|
29
30
|
end
|
30
31
|
|
31
32
|
end
|
32
|
-
end
|
33
|
+
end
|
data/lib/hentry_consumer.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
+
# I don't think you're supposed to require gems here,
|
2
|
+
# they should go in the Gemspec as dependencies
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'json'
|
4
|
-
require 'hentry_consumer/element'
|
5
6
|
Gem.find_files("hentry_consumer/**/*.rb").each { |path| require path }
|
6
7
|
|
7
8
|
module HentryConsumer
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
module ClassMethods
|
10
|
+
def parse(html)
|
11
|
+
HFeed.new(html)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
extend ClassMethods
|
15
|
+
def self.included(other)
|
16
|
+
other.extend(ClassMethods)
|
11
17
|
end
|
12
|
-
|
13
18
|
end
|
@@ -1,17 +1,30 @@
|
|
1
1
|
require 'hentry_consumer'
|
2
2
|
|
3
|
-
describe HentryConsumer::
|
3
|
+
describe HentryConsumer::HCard do
|
4
4
|
before do
|
5
5
|
stub_const("HentryConsumer::FormatRules::REQUIRED", [:url, :email])
|
6
6
|
stub_const("HentryConsumer::FormatRules::UNIQUE", [:uid, :bookmark])
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
describe "example.html" do
|
10
|
+
let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
|
11
|
+
let(:entry) { result.entries.first }
|
12
|
+
subject { entry.authors.first }
|
12
13
|
|
13
|
-
|
14
|
+
its(:name) { should eq ["Jessica Suttles"]}
|
14
15
|
|
15
|
-
|
16
|
-
|
16
|
+
its(:emails) { should have(1).things }
|
17
|
+
its(:urls) { should have(2).things }
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "nested_example.html" do
|
21
|
+
let(:result) { HentryConsumer.parse(File.open("spec/support/nested_example.html")) }
|
22
|
+
let(:entry) { result.entries.first }
|
23
|
+
subject { entry.authors.first }
|
24
|
+
|
25
|
+
its(:name) { should eq ["Jessica"]}
|
26
|
+
|
27
|
+
its(:emails) { should have(1).things }
|
28
|
+
its(:urls) { should have(1).things }
|
29
|
+
end
|
17
30
|
end
|
@@ -1,75 +1,151 @@
|
|
1
1
|
require 'hentry_consumer'
|
2
2
|
|
3
3
|
describe HentryConsumer::HEntry do
|
4
|
-
|
5
|
-
|
4
|
+
describe "example.html" do
|
5
|
+
let(:result) { HentryConsumer.parse("spec/support/example.html") }
|
6
|
+
let(:entry) { result.entries.first }
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
it "has a name" do
|
12
|
-
entry.name.should eq ["Senior Cat Living"]
|
13
|
-
end
|
8
|
+
it "should have an array of entries" do
|
9
|
+
entry.should be_an_instance_of HentryConsumer::HEntry
|
10
|
+
end
|
14
11
|
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
it "has a name" do
|
13
|
+
entry.name.should eq ["Senior Cat Living"]
|
14
|
+
end
|
18
15
|
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
it "has a summary" do
|
17
|
+
entry.summary.should eq ["Signed up with 3 locations"]
|
18
|
+
end
|
22
19
|
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
it "has a time" do
|
21
|
+
entry.published_at.should eq ["2012-08-26 20:09-0700"]
|
22
|
+
end
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
it "has an author as an hcard" do
|
31
|
-
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
32
|
-
end
|
24
|
+
it "has a bookmark" do
|
25
|
+
entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
|
26
|
+
end
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
entry.categories.should be_an_instance_of Hash
|
28
|
+
it "should have 2 authors" do
|
29
|
+
entry.authors.should have(2).things HentryConsumer::HCard
|
37
30
|
end
|
38
31
|
|
39
|
-
it "has
|
40
|
-
entry.
|
32
|
+
it "has an author as an hcard" do
|
33
|
+
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "categories" do
|
37
|
+
it "has an array of categories" do
|
38
|
+
entry.categories.should be_an_instance_of Hash
|
39
|
+
end
|
40
|
+
|
41
|
+
it "has a key of the content" do
|
42
|
+
entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "content" do
|
47
|
+
let(:content) { entry.content }
|
48
|
+
|
49
|
+
it "should be a blob of html" do
|
50
|
+
content.first.should match /Locations/
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should be a blob of html" do
|
54
|
+
content.first.should match /\<dt\>/
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should be a blob of html" do
|
58
|
+
content.first.should_not match /time/
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "json" do
|
64
|
+
let(:json) { JSON.parse(entry.to_json) }
|
65
|
+
|
66
|
+
it { json["items"].should be_an_instance_of Array }
|
67
|
+
it { json["items"].first["type"].should include 'h-entry'}
|
68
|
+
it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
|
69
|
+
it { json["items"].first["properties"]["content"].first.should match /Locations/ }
|
70
|
+
it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
|
71
|
+
it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
|
72
|
+
it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
|
73
|
+
it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
|
74
|
+
it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
|
41
75
|
end
|
42
76
|
end
|
43
77
|
|
44
|
-
describe "
|
45
|
-
let(:
|
78
|
+
describe "nested_example.html" do
|
79
|
+
let(:result) { HentryConsumer.parse("spec/support/nested_example.html") }
|
80
|
+
let(:entry) { result.entries.first }
|
46
81
|
|
47
|
-
it "should
|
48
|
-
|
82
|
+
it "should have an array of entries" do
|
83
|
+
entry.should be_an_instance_of HentryConsumer::HEntry
|
49
84
|
end
|
50
85
|
|
51
|
-
it "
|
52
|
-
|
86
|
+
it "has a name" do
|
87
|
+
entry.name.should eq ["Wabi Sabi Town"]
|
53
88
|
end
|
54
89
|
|
55
|
-
it "
|
56
|
-
|
90
|
+
it "has a summary" do
|
91
|
+
entry.summary.should eq ["Signed up with 2 locations"]
|
57
92
|
end
|
58
93
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
it
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
it
|
72
|
-
|
73
|
-
|
94
|
+
it "has a time" do
|
95
|
+
entry.published_at.should eq ["2012-10-10T19:11:17Z"]
|
96
|
+
end
|
97
|
+
|
98
|
+
it "has a bookmark" do
|
99
|
+
entry.bookmark.should eq "http://localhost:3000/customers/3"
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should have 1 author" do
|
103
|
+
entry.authors.should have(1).things HentryConsumer::HCard
|
104
|
+
end
|
105
|
+
|
106
|
+
it "has an author as an hcard" do
|
107
|
+
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "categories" do
|
111
|
+
it "has an array of categories" do
|
112
|
+
entry.categories.should be_an_instance_of Hash
|
113
|
+
end
|
74
114
|
|
115
|
+
it "has a key of the content" do
|
116
|
+
entry.categories["Some Category"].should eq "#"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "content" do
|
121
|
+
let(:content) { entry.content }
|
122
|
+
|
123
|
+
it "should be a blob of html" do
|
124
|
+
content.first.should match /Locations/
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should be a blob of html" do
|
128
|
+
content.first.should match /\<dt\>/
|
129
|
+
end
|
130
|
+
|
131
|
+
it "should be a blob of html" do
|
132
|
+
content.first.should_not match /time/
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
describe "json" do
|
138
|
+
let(:json) { JSON.parse(entry.to_json) }
|
139
|
+
|
140
|
+
it { json["items"].should be_an_instance_of Array }
|
141
|
+
it { json["items"].first["type"].should include 'h-entry'}
|
142
|
+
it { json["items"].first["properties"]["name"].should eq ['Wabi Sabi Town']}
|
143
|
+
it { json["items"].first["properties"]["content"].first.should match /Locations/ }
|
144
|
+
it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
|
145
|
+
it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
|
146
|
+
it { json["items"].first["properties"]["bookmark"].should eq "http://localhost:3000/customers/3" }
|
147
|
+
it { json["items"].first["properties"]["published_at"].should eq ["2012-10-10T19:11:17Z"] }
|
148
|
+
it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
|
149
|
+
end
|
150
|
+
end
|
75
151
|
end
|