hentry_consumer 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -6
- data/README.md +5 -3
- data/hentry_consumer.gemspec +8 -4
- data/lib/hentry_consumer/element.rb +37 -17
- data/lib/hentry_consumer/format_rules.rb +4 -3
- data/lib/hentry_consumer/h_card.rb +1 -2
- data/lib/hentry_consumer/h_entry.rb +42 -41
- data/lib/hentry_consumer/h_feed.rb +9 -8
- data/lib/hentry_consumer/version.rb +1 -1
- data/lib/hentry_consumer.rb +10 -5
- data/spec/lib/hentry_consumer/h_card_spec.rb +20 -7
- data/spec/lib/hentry_consumer/h_entry_spec.rb +128 -52
- data/spec/lib/hentry_consumer/h_feed_spec.rb +4 -8
- data/spec/support/nested_example.html +620 -0
- metadata +70 -2
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -22,7 +22,7 @@ serialized h-entry objects. The returned Object structure looks something like t
|
|
22
22
|
|
23
23
|
## Current Version
|
24
24
|
|
25
|
-
0.
|
25
|
+
0.5.0
|
26
26
|
|
27
27
|
|
28
28
|
## Requirements
|
@@ -57,10 +57,12 @@ require "hentry_consumer"
|
|
57
57
|
HentryConsumer.parse(File|URL)
|
58
58
|
```
|
59
59
|
[Example Gist of HTML with h-entry posts](https://raw.github.com/gist/3835447/7128a66a3ac7e971a82daac5fa2076d17b88e435/gistfile1.html)
|
60
|
+
[Another Example Gist of HTML with h-entry posts](https://gist.github.com/88d6d476483e9528fb3a)
|
60
61
|
|
61
62
|
## Authors
|
62
63
|
|
63
64
|
* Bookis Smuin / [@bookis](https://github.com/bookis)
|
65
|
+
* Jessica Lynn Suttles / [@jlsuttles](https://github.com/jlsuttles)
|
64
66
|
|
65
67
|
## Contributions
|
66
68
|
|
@@ -78,13 +80,13 @@ If you find bugs, have feature requests or questions, please
|
|
78
80
|
### Specs
|
79
81
|
|
80
82
|
```bash
|
81
|
-
|
83
|
+
guard
|
82
84
|
```
|
83
85
|
|
84
86
|
### Releases
|
85
87
|
|
86
88
|
```bash
|
87
|
-
|
89
|
+
rake release
|
88
90
|
```
|
89
91
|
|
90
92
|
|
data/hentry_consumer.gemspec
CHANGED
@@ -6,8 +6,8 @@ require 'hentry_consumer/version'
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
7
|
gem.name = "hentry_consumer"
|
8
8
|
gem.version = HentryConsumer::VERSION
|
9
|
-
gem.authors = ["Bookis Smuin"]
|
10
|
-
gem.email = ["vegan.bookis@gmail.com"]
|
9
|
+
gem.authors = ["Bookis Smuin", "Jessica Lynn Suttles"]
|
10
|
+
gem.email = ["vegan.bookis@gmail.com", "jlsuttles@gmail.com"]
|
11
11
|
gem.description = %q{A hATOM feed parser}
|
12
12
|
gem.summary = %q{Takes in HTML containing an h-feed classed element and returns serialized data based on the Microformat 2 hEntry specs}
|
13
13
|
gem.homepage = "https://github.com/G5/hentry_consumer"
|
@@ -17,7 +17,11 @@ Gem::Specification.new do |gem|
|
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
|
-
gem.
|
21
|
-
gem.
|
20
|
+
gem.add_runtime_dependency 'nokogiri'
|
21
|
+
gem.add_runtime_dependency 'json'
|
22
22
|
|
23
|
+
gem.add_development_dependency "rspec", "~> 2.11.0"
|
24
|
+
gem.add_development_dependency "guard-rspec", "~> 2.1.0"
|
25
|
+
gem.add_development_dependency "rb-fsevent", "~> 0.9.2"
|
26
|
+
gem.add_development_dependency "debugger", "~> 1.2.1"
|
23
27
|
end
|
@@ -2,29 +2,40 @@ module HentryConsumer
|
|
2
2
|
class Element
|
3
3
|
attr_accessor :element
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@element =
|
7
|
-
parse_elements
|
5
|
+
def initialize(element)
|
6
|
+
@element = element
|
7
|
+
parse_elements(@element)
|
8
8
|
end
|
9
|
-
|
10
|
-
def
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
|
10
|
+
def parse_elements(elements)
|
11
|
+
if elements.is_a?(Nokogiri::XML::NodeSet)
|
12
|
+
elements.each do |element|
|
13
|
+
parse_elements(element)
|
14
|
+
end
|
15
|
+
else
|
16
|
+
parse_element(elements)
|
17
|
+
end
|
16
18
|
end
|
17
19
|
|
18
|
-
def
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
def parse_element(element)
|
21
|
+
classes = element["class"]
|
22
|
+
# element may be a microformat element
|
23
|
+
if classes =~ /(p|n|e|i|u|dt)-/
|
24
|
+
classes.split.each do |c|
|
25
|
+
parse_microformat(element, c)
|
22
26
|
end
|
27
|
+
# element may contain a microformat element
|
28
|
+
else
|
29
|
+
parse_elements(element.children)
|
23
30
|
end
|
24
31
|
end
|
25
32
|
|
26
|
-
def
|
27
|
-
|
33
|
+
def parse_microformat(element, c)
|
34
|
+
assign_value(symbolize_class(c), element.text)
|
35
|
+
end
|
36
|
+
|
37
|
+
def symbolize_class(c)
|
38
|
+
c.to_s.downcase.gsub(/\w{1,2}-/, "").to_sym
|
28
39
|
end
|
29
40
|
|
30
41
|
def [](key)
|
@@ -37,6 +48,7 @@ module HentryConsumer
|
|
37
48
|
|
38
49
|
def assign_value(symbolized_class, value)
|
39
50
|
return unless self.respond_to?(symbolized_class)
|
51
|
+
value = value.gsub('\n', " ").strip if value.is_a?(String)
|
40
52
|
if FormatRules.can_have_many?(symbolized_class)
|
41
53
|
self[symbolized_class] ||= []
|
42
54
|
self[symbolized_class] << value
|
@@ -44,5 +56,13 @@ module HentryConsumer
|
|
44
56
|
self[symbolized_class] = value
|
45
57
|
end
|
46
58
|
end
|
59
|
+
|
60
|
+
def to_html
|
61
|
+
@element.to_html
|
62
|
+
end
|
63
|
+
|
64
|
+
def to_xml
|
65
|
+
@element.to_xml
|
66
|
+
end
|
47
67
|
end
|
48
|
-
end
|
68
|
+
end
|
@@ -1,7 +1,8 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class FormatRules
|
3
|
-
REQUIRED
|
4
|
-
UNIQUE
|
3
|
+
REQUIRED = []
|
4
|
+
UNIQUE = [:uid, :bookmark]
|
5
|
+
|
5
6
|
class << self
|
6
7
|
def required?(format)
|
7
8
|
REQUIRED.include? format
|
@@ -15,4 +16,4 @@ module HentryConsumer
|
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
18
|
-
end
|
19
|
+
end
|
@@ -1,59 +1,60 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class HEntry < Element
|
3
|
-
|
4
3
|
attr_accessor :name, :categories, :author, :content, :bookmark, :published_at, :summary
|
5
4
|
alias_method :authors, :author
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
6
|
+
# overrides Element#parse_microformat
|
7
|
+
def parse_microformat(element, c)
|
8
|
+
case c
|
9
|
+
when "p-author" then parse_author(element)
|
10
|
+
when "p-category" then parse_category(element)
|
11
|
+
when "e-content" then parse_content(element)
|
12
|
+
when "dt-published" then parse_published(element)
|
13
|
+
when "u-uid" then parse_uid(element)
|
14
|
+
else parse_general(element, c)
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
18
|
+
def parse_author(element)
|
19
|
+
assign_value :author, HCard.new(element.children)
|
20
|
+
end
|
21
|
+
|
22
|
+
def parse_category(element)
|
23
|
+
self.categories ||= {}
|
24
|
+
self.categories[element.text.gsub("\n", " ").strip] = element["href"]
|
25
|
+
end
|
26
|
+
|
27
|
+
def parse_content(element)
|
28
|
+
assign_value :content, element.inner_html
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_published(element)
|
32
|
+
assign_value :published_at, element["datetime"]
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_uid(element)
|
36
|
+
assign_value :bookmark, element["href"]
|
37
|
+
end
|
38
|
+
|
39
|
+
def parse_general(element, c)
|
40
|
+
assign_value symbolize_class(c), element.text
|
41
|
+
end
|
42
|
+
|
17
43
|
def to_json(*a)
|
18
44
|
{:items =>
|
19
45
|
[{
|
20
46
|
:type => ["h-entry"],
|
21
47
|
:properties => {
|
22
|
-
:name
|
23
|
-
:categories
|
24
|
-
:author
|
25
|
-
:content
|
26
|
-
:bookmark
|
27
|
-
:published_at
|
28
|
-
:summary
|
48
|
+
:name => self.name,
|
49
|
+
:categories => self.categories,
|
50
|
+
:author => self.author,
|
51
|
+
:content => self.content,
|
52
|
+
:bookmark => self.bookmark,
|
53
|
+
:published_at => self.published_at,
|
54
|
+
:summary => self.summary
|
29
55
|
}
|
30
56
|
}]
|
31
57
|
}.to_json(a)
|
32
58
|
end
|
33
|
-
|
34
|
-
private
|
35
|
-
|
36
|
-
def parse_element(microformat, klass)
|
37
|
-
key, value = case klass
|
38
|
-
when 'p-author'
|
39
|
-
[symbolize_class(klass), HCard.new(microformat)]
|
40
|
-
when 'p-category'
|
41
|
-
self.categories ||= {}
|
42
|
-
self.categories[microformat.text.gsub('\n', " ").strip] = microformat["href"]
|
43
|
-
when 'e-content'
|
44
|
-
[:content, parse_content(microformat)]
|
45
|
-
when'dt-published'
|
46
|
-
[:published_at, microformat["datetime"]]
|
47
|
-
when "u-uid"
|
48
|
-
[:bookmark, microformat['href']]
|
49
|
-
else
|
50
|
-
[symbolize_class(klass), microformat.text.gsub('\n', " ").strip]
|
51
|
-
end
|
52
|
-
assign_value(key, value)
|
53
|
-
end
|
54
|
-
|
55
|
-
def parse_content(microformat)
|
56
|
-
microformat.inner_html
|
57
|
-
end
|
58
59
|
end
|
59
|
-
end
|
60
|
+
end
|
@@ -1,21 +1,22 @@
|
|
1
1
|
module HentryConsumer
|
2
2
|
class HFeed
|
3
|
-
attr_accessor :entries
|
3
|
+
attr_accessor :html, :entries
|
4
|
+
|
4
5
|
def initialize(html)
|
6
|
+
@html = Nokogiri::HTML(open(html).read)
|
5
7
|
@entries = []
|
6
|
-
parse_html
|
8
|
+
parse_html
|
7
9
|
end
|
8
10
|
|
9
|
-
def parse_html
|
10
|
-
|
11
|
-
|
12
|
-
entry = HEntry.new(mf_entry)
|
11
|
+
def parse_html
|
12
|
+
self.html.css(".h-entry").each do |hentry|
|
13
|
+
entry = HEntry.new(hentry.children)
|
13
14
|
self.entries << entry
|
14
15
|
end
|
15
16
|
end
|
16
17
|
|
17
18
|
def to_html
|
18
|
-
self.
|
19
|
+
self.html.css(".h-entry").collect(&:to_html).join
|
19
20
|
end
|
20
21
|
alias_method :to_s, :to_html
|
21
22
|
|
@@ -29,4 +30,4 @@ module HentryConsumer
|
|
29
30
|
end
|
30
31
|
|
31
32
|
end
|
32
|
-
end
|
33
|
+
end
|
data/lib/hentry_consumer.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
+
# I don't think you're supposed to require gems here,
|
2
|
+
# they should go in the Gemspec as dependencies
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'open-uri'
|
3
5
|
require 'json'
|
4
|
-
require 'hentry_consumer/element'
|
5
6
|
Gem.find_files("hentry_consumer/**/*.rb").each { |path| require path }
|
6
7
|
|
7
8
|
module HentryConsumer
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
module ClassMethods
|
10
|
+
def parse(html)
|
11
|
+
HFeed.new(html)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
extend ClassMethods
|
15
|
+
def self.included(other)
|
16
|
+
other.extend(ClassMethods)
|
11
17
|
end
|
12
|
-
|
13
18
|
end
|
@@ -1,17 +1,30 @@
|
|
1
1
|
require 'hentry_consumer'
|
2
2
|
|
3
|
-
describe HentryConsumer::
|
3
|
+
describe HentryConsumer::HCard do
|
4
4
|
before do
|
5
5
|
stub_const("HentryConsumer::FormatRules::REQUIRED", [:url, :email])
|
6
6
|
stub_const("HentryConsumer::FormatRules::UNIQUE", [:uid, :bookmark])
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
describe "example.html" do
|
10
|
+
let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
|
11
|
+
let(:entry) { result.entries.first }
|
12
|
+
subject { entry.authors.first }
|
12
13
|
|
13
|
-
|
14
|
+
its(:name) { should eq ["Jessica Suttles"]}
|
14
15
|
|
15
|
-
|
16
|
-
|
16
|
+
its(:emails) { should have(1).things }
|
17
|
+
its(:urls) { should have(2).things }
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "nested_example.html" do
|
21
|
+
let(:result) { HentryConsumer.parse(File.open("spec/support/nested_example.html")) }
|
22
|
+
let(:entry) { result.entries.first }
|
23
|
+
subject { entry.authors.first }
|
24
|
+
|
25
|
+
its(:name) { should eq ["Jessica"]}
|
26
|
+
|
27
|
+
its(:emails) { should have(1).things }
|
28
|
+
its(:urls) { should have(1).things }
|
29
|
+
end
|
17
30
|
end
|
@@ -1,75 +1,151 @@
|
|
1
1
|
require 'hentry_consumer'
|
2
2
|
|
3
3
|
describe HentryConsumer::HEntry do
|
4
|
-
|
5
|
-
|
4
|
+
describe "example.html" do
|
5
|
+
let(:result) { HentryConsumer.parse("spec/support/example.html") }
|
6
|
+
let(:entry) { result.entries.first }
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
it "has a name" do
|
12
|
-
entry.name.should eq ["Senior Cat Living"]
|
13
|
-
end
|
8
|
+
it "should have an array of entries" do
|
9
|
+
entry.should be_an_instance_of HentryConsumer::HEntry
|
10
|
+
end
|
14
11
|
|
15
|
-
|
16
|
-
|
17
|
-
|
12
|
+
it "has a name" do
|
13
|
+
entry.name.should eq ["Senior Cat Living"]
|
14
|
+
end
|
18
15
|
|
19
|
-
|
20
|
-
|
21
|
-
|
16
|
+
it "has a summary" do
|
17
|
+
entry.summary.should eq ["Signed up with 3 locations"]
|
18
|
+
end
|
22
19
|
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
it "has a time" do
|
21
|
+
entry.published_at.should eq ["2012-08-26 20:09-0700"]
|
22
|
+
end
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
it "has an author as an hcard" do
|
31
|
-
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
32
|
-
end
|
24
|
+
it "has a bookmark" do
|
25
|
+
entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
|
26
|
+
end
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
entry.categories.should be_an_instance_of Hash
|
28
|
+
it "should have 2 authors" do
|
29
|
+
entry.authors.should have(2).things HentryConsumer::HCard
|
37
30
|
end
|
38
31
|
|
39
|
-
it "has
|
40
|
-
entry.
|
32
|
+
it "has an author as an hcard" do
|
33
|
+
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
34
|
+
end
|
35
|
+
|
36
|
+
describe "categories" do
|
37
|
+
it "has an array of categories" do
|
38
|
+
entry.categories.should be_an_instance_of Hash
|
39
|
+
end
|
40
|
+
|
41
|
+
it "has a key of the content" do
|
42
|
+
entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "content" do
|
47
|
+
let(:content) { entry.content }
|
48
|
+
|
49
|
+
it "should be a blob of html" do
|
50
|
+
content.first.should match /Locations/
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should be a blob of html" do
|
54
|
+
content.first.should match /\<dt\>/
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should be a blob of html" do
|
58
|
+
content.first.should_not match /time/
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
describe "json" do
|
64
|
+
let(:json) { JSON.parse(entry.to_json) }
|
65
|
+
|
66
|
+
it { json["items"].should be_an_instance_of Array }
|
67
|
+
it { json["items"].first["type"].should include 'h-entry'}
|
68
|
+
it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
|
69
|
+
it { json["items"].first["properties"]["content"].first.should match /Locations/ }
|
70
|
+
it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
|
71
|
+
it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
|
72
|
+
it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
|
73
|
+
it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
|
74
|
+
it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
|
41
75
|
end
|
42
76
|
end
|
43
77
|
|
44
|
-
describe "
|
45
|
-
let(:
|
78
|
+
describe "nested_example.html" do
|
79
|
+
let(:result) { HentryConsumer.parse("spec/support/nested_example.html") }
|
80
|
+
let(:entry) { result.entries.first }
|
46
81
|
|
47
|
-
it "should
|
48
|
-
|
82
|
+
it "should have an array of entries" do
|
83
|
+
entry.should be_an_instance_of HentryConsumer::HEntry
|
49
84
|
end
|
50
85
|
|
51
|
-
it "
|
52
|
-
|
86
|
+
it "has a name" do
|
87
|
+
entry.name.should eq ["Wabi Sabi Town"]
|
53
88
|
end
|
54
89
|
|
55
|
-
it "
|
56
|
-
|
90
|
+
it "has a summary" do
|
91
|
+
entry.summary.should eq ["Signed up with 2 locations"]
|
57
92
|
end
|
58
93
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
it
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
it
|
72
|
-
|
73
|
-
|
94
|
+
it "has a time" do
|
95
|
+
entry.published_at.should eq ["2012-10-10T19:11:17Z"]
|
96
|
+
end
|
97
|
+
|
98
|
+
it "has a bookmark" do
|
99
|
+
entry.bookmark.should eq "http://localhost:3000/customers/3"
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should have 1 author" do
|
103
|
+
entry.authors.should have(1).things HentryConsumer::HCard
|
104
|
+
end
|
105
|
+
|
106
|
+
it "has an author as an hcard" do
|
107
|
+
entry.authors.first.should be_an_instance_of HentryConsumer::HCard
|
108
|
+
end
|
109
|
+
|
110
|
+
describe "categories" do
|
111
|
+
it "has an array of categories" do
|
112
|
+
entry.categories.should be_an_instance_of Hash
|
113
|
+
end
|
74
114
|
|
115
|
+
it "has a key of the content" do
|
116
|
+
entry.categories["Some Category"].should eq "#"
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "content" do
|
121
|
+
let(:content) { entry.content }
|
122
|
+
|
123
|
+
it "should be a blob of html" do
|
124
|
+
content.first.should match /Locations/
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should be a blob of html" do
|
128
|
+
content.first.should match /\<dt\>/
|
129
|
+
end
|
130
|
+
|
131
|
+
it "should be a blob of html" do
|
132
|
+
content.first.should_not match /time/
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
describe "json" do
|
138
|
+
let(:json) { JSON.parse(entry.to_json) }
|
139
|
+
|
140
|
+
it { json["items"].should be_an_instance_of Array }
|
141
|
+
it { json["items"].first["type"].should include 'h-entry'}
|
142
|
+
it { json["items"].first["properties"]["name"].should eq ['Wabi Sabi Town']}
|
143
|
+
it { json["items"].first["properties"]["content"].first.should match /Locations/ }
|
144
|
+
it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
|
145
|
+
it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
|
146
|
+
it { json["items"].first["properties"]["bookmark"].should eq "http://localhost:3000/customers/3" }
|
147
|
+
it { json["items"].first["properties"]["published_at"].should eq ["2012-10-10T19:11:17Z"] }
|
148
|
+
it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
|
149
|
+
end
|
150
|
+
end
|
75
151
|
end
|