hentry_consumer 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,9 +1,4 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- group :test, :development do
4
- gem "rspec"
5
- gem 'guard-rspec'
6
- gem 'rb-fsevent', '~> 0.9.1'
7
- end
8
-
3
+ # Specify your gem's dependencies in hentry_consumer.gemspec
9
4
  gemspec
data/README.md CHANGED
@@ -22,7 +22,7 @@ serialized h-entry objects. The returned Object structure looks something like t
22
22
 
23
23
  ## Current Version
24
24
 
25
- 0.4.0
25
+ 0.5.0
26
26
 
27
27
 
28
28
  ## Requirements
@@ -57,10 +57,12 @@ require "hentry_consumer"
57
57
  HentryConsumer.parse(File|URL)
58
58
  ```
59
59
  [Example Gist of HTML with h-entry posts](https://raw.github.com/gist/3835447/7128a66a3ac7e971a82daac5fa2076d17b88e435/gistfile1.html)
60
+ [Another Example Gist of HTML with h-entry posts](https://gist.github.com/88d6d476483e9528fb3a)
60
61
 
61
62
  ## Authors
62
63
 
63
64
  * Bookis Smuin / [@bookis](https://github.com/bookis)
65
+ * Jessica Lynn Suttles / [@jlsuttles](https://github.com/jlsuttles)
64
66
 
65
67
  ## Contributions
66
68
 
@@ -78,13 +80,13 @@ If you find bugs, have feature requests or questions, please
78
80
  ### Specs
79
81
 
80
82
  ```bash
81
- rake spec
83
+ guard
82
84
  ```
83
85
 
84
86
  ### Releases
85
87
 
86
88
  ```bash
87
- TODO how do you do a release?
89
+ rake release
88
90
  ```
89
91
 
90
92
 
@@ -6,8 +6,8 @@ require 'hentry_consumer/version'
6
6
  Gem::Specification.new do |gem|
7
7
  gem.name = "hentry_consumer"
8
8
  gem.version = HentryConsumer::VERSION
9
- gem.authors = ["Bookis Smuin"]
10
- gem.email = ["vegan.bookis@gmail.com"]
9
+ gem.authors = ["Bookis Smuin", "Jessica Lynn Suttles"]
10
+ gem.email = ["vegan.bookis@gmail.com", "jlsuttles@gmail.com"]
11
11
  gem.description = %q{A hATOM feed parser}
12
12
  gem.summary = %q{Takes in HTML containing an h-feed classed element and returns serialized data based on the Microformat 2 hEntry specs}
13
13
  gem.homepage = "https://github.com/G5/hentry_consumer"
@@ -17,7 +17,11 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
20
- gem.add_dependency 'nokogiri'
21
- gem.add_dependency 'json'
20
+ gem.add_runtime_dependency 'nokogiri'
21
+ gem.add_runtime_dependency 'json'
22
22
 
23
+ gem.add_development_dependency "rspec", "~> 2.11.0"
24
+ gem.add_development_dependency "guard-rspec", "~> 2.1.0"
25
+ gem.add_development_dependency "rb-fsevent", "~> 0.9.2"
26
+ gem.add_development_dependency "debugger", "~> 1.2.1"
23
27
  end
@@ -2,29 +2,40 @@ module HentryConsumer
2
2
  class Element
3
3
  attr_accessor :element
4
4
 
5
- def initialize(microformat)
6
- @element = microformat
7
- parse_elements
5
+ def initialize(element)
6
+ @element = element
7
+ parse_elements(@element)
8
8
  end
9
-
10
- def to_html
11
- @element.to_html
12
- end
13
-
14
- def to_xml
15
- @element.to_xml
9
+
10
+ def parse_elements(elements)
11
+ if elements.is_a?(Nokogiri::XML::NodeSet)
12
+ elements.each do |element|
13
+ parse_elements(element)
14
+ end
15
+ else
16
+ parse_element(elements)
17
+ end
16
18
  end
17
19
 
18
- def parse_elements
19
- FormatClass.each do |letter|
20
- @element.css(">*[class*=#{letter}-]").each do |a|
21
- assign_value(symbolize_class(a["class"]), a.text.gsub('\n', " ").strip)
20
+ def parse_element(element)
21
+ classes = element["class"]
22
+ # element may be a microformat element
23
+ if classes =~ /(p|n|e|i|u|dt)-/
24
+ classes.split.each do |c|
25
+ parse_microformat(element, c)
22
26
  end
27
+ # element may contain a microformat element
28
+ else
29
+ parse_elements(element.children)
23
30
  end
24
31
  end
25
32
 
26
- def symbolize_class(klass)
27
- klass.to_s.downcase.split.first.gsub(/\w{1,2}-/, "").to_sym
33
+ def parse_microformat(element, c)
34
+ assign_value(symbolize_class(c), element.text)
35
+ end
36
+
37
+ def symbolize_class(c)
38
+ c.to_s.downcase.gsub(/\w{1,2}-/, "").to_sym
28
39
  end
29
40
 
30
41
  def [](key)
@@ -37,6 +48,7 @@ module HentryConsumer
37
48
 
38
49
  def assign_value(symbolized_class, value)
39
50
  return unless self.respond_to?(symbolized_class)
51
+ value = value.gsub('\n', " ").strip if value.is_a?(String)
40
52
  if FormatRules.can_have_many?(symbolized_class)
41
53
  self[symbolized_class] ||= []
42
54
  self[symbolized_class] << value
@@ -44,5 +56,13 @@ module HentryConsumer
44
56
  self[symbolized_class] = value
45
57
  end
46
58
  end
59
+
60
+ def to_html
61
+ @element.to_html
62
+ end
63
+
64
+ def to_xml
65
+ @element.to_xml
66
+ end
47
67
  end
48
- end
68
+ end
@@ -1,7 +1,8 @@
1
1
  module HentryConsumer
2
2
  class FormatRules
3
- REQUIRED = []
4
- UNIQUE = [:uid, :bookmark]
3
+ REQUIRED = []
4
+ UNIQUE = [:uid, :bookmark]
5
+
5
6
  class << self
6
7
  def required?(format)
7
8
  REQUIRED.include? format
@@ -15,4 +16,4 @@ module HentryConsumer
15
16
  end
16
17
  end
17
18
  end
18
- end
19
+ end
@@ -16,6 +16,5 @@ module HentryConsumer
16
16
  }]
17
17
  }.to_json(a)
18
18
  end
19
-
20
19
  end
21
- end
20
+ end
@@ -1,59 +1,60 @@
1
1
  module HentryConsumer
2
2
  class HEntry < Element
3
-
4
3
  attr_accessor :name, :categories, :author, :content, :bookmark, :published_at, :summary
5
4
  alias_method :authors, :author
6
5
 
7
- def parse_elements
8
- FormatClass.each do |letter|
9
- @element.css(">*[class*=#{letter}-]").each do |attrs|
10
- attrs["class"].split.each do |klass|
11
- parse_element(attrs, klass)
12
- end
13
- end
6
+ # overrides Element#parse_microformat
7
+ def parse_microformat(element, c)
8
+ case c
9
+ when "p-author" then parse_author(element)
10
+ when "p-category" then parse_category(element)
11
+ when "e-content" then parse_content(element)
12
+ when "dt-published" then parse_published(element)
13
+ when "u-uid" then parse_uid(element)
14
+ else parse_general(element, c)
14
15
  end
15
16
  end
16
17
 
18
+ def parse_author(element)
19
+ assign_value :author, HCard.new(element.children)
20
+ end
21
+
22
+ def parse_category(element)
23
+ self.categories ||= {}
24
+ self.categories[element.text.gsub("\n", " ").strip] = element["href"]
25
+ end
26
+
27
+ def parse_content(element)
28
+ assign_value :content, element.inner_html
29
+ end
30
+
31
+ def parse_published(element)
32
+ assign_value :published_at, element["datetime"]
33
+ end
34
+
35
+ def parse_uid(element)
36
+ assign_value :bookmark, element["href"]
37
+ end
38
+
39
+ def parse_general(element, c)
40
+ assign_value symbolize_class(c), element.text
41
+ end
42
+
17
43
  def to_json(*a)
18
44
  {:items =>
19
45
  [{
20
46
  :type => ["h-entry"],
21
47
  :properties => {
22
- :name => self.name,
23
- :categories => self.categories,
24
- :author => self.author,
25
- :content => self.content,
26
- :bookmark => self.bookmark,
27
- :published_at => self.published_at,
28
- :summary => self.summary
48
+ :name => self.name,
49
+ :categories => self.categories,
50
+ :author => self.author,
51
+ :content => self.content,
52
+ :bookmark => self.bookmark,
53
+ :published_at => self.published_at,
54
+ :summary => self.summary
29
55
  }
30
56
  }]
31
57
  }.to_json(a)
32
58
  end
33
-
34
- private
35
-
36
- def parse_element(microformat, klass)
37
- key, value = case klass
38
- when 'p-author'
39
- [symbolize_class(klass), HCard.new(microformat)]
40
- when 'p-category'
41
- self.categories ||= {}
42
- self.categories[microformat.text.gsub('\n', " ").strip] = microformat["href"]
43
- when 'e-content'
44
- [:content, parse_content(microformat)]
45
- when'dt-published'
46
- [:published_at, microformat["datetime"]]
47
- when "u-uid"
48
- [:bookmark, microformat['href']]
49
- else
50
- [symbolize_class(klass), microformat.text.gsub('\n', " ").strip]
51
- end
52
- assign_value(key, value)
53
- end
54
-
55
- def parse_content(microformat)
56
- microformat.inner_html
57
- end
58
59
  end
59
- end
60
+ end
@@ -1,21 +1,22 @@
1
1
  module HentryConsumer
2
2
  class HFeed
3
- attr_accessor :entries
3
+ attr_accessor :html, :entries
4
+
4
5
  def initialize(html)
6
+ @html = Nokogiri::HTML(open(html).read)
5
7
  @entries = []
6
- parse_html(html)
8
+ parse_html
7
9
  end
8
10
 
9
- def parse_html(html)
10
- doc = Nokogiri::HTML(open(html).read)
11
- doc.css(".h-entry").each do |mf_entry|
12
- entry = HEntry.new(mf_entry)
11
+ def parse_html
12
+ self.html.css(".h-entry").each do |hentry|
13
+ entry = HEntry.new(hentry.children)
13
14
  self.entries << entry
14
15
  end
15
16
  end
16
17
 
17
18
  def to_html
18
- self.entries.collect(&:to_html).join
19
+ self.html.css(".h-entry").collect(&:to_html).join
19
20
  end
20
21
  alias_method :to_s, :to_html
21
22
 
@@ -29,4 +30,4 @@ module HentryConsumer
29
30
  end
30
31
 
31
32
  end
32
- end
33
+ end
@@ -1,3 +1,3 @@
1
1
  module HentryConsumer
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -1,13 +1,18 @@
1
+ # I don't think you're supposed to require gems here,
2
+ # they should go in the Gemspec as dependencies
1
3
  require 'nokogiri'
2
4
  require 'open-uri'
3
5
  require 'json'
4
- require 'hentry_consumer/element'
5
6
  Gem.find_files("hentry_consumer/**/*.rb").each { |path| require path }
6
7
 
7
8
  module HentryConsumer
8
- FormatClass = %w(p n e i u dt)
9
- def self.parse(html)
10
- HFeed.new(html)
9
+ module ClassMethods
10
+ def parse(html)
11
+ HFeed.new(html)
12
+ end
13
+ end
14
+ extend ClassMethods
15
+ def self.included(other)
16
+ other.extend(ClassMethods)
11
17
  end
12
-
13
18
  end
@@ -1,17 +1,30 @@
1
1
  require 'hentry_consumer'
2
2
 
3
- describe HentryConsumer::HEntry do
3
+ describe HentryConsumer::HCard do
4
4
  before do
5
5
  stub_const("HentryConsumer::FormatRules::REQUIRED", [:url, :email])
6
6
  stub_const("HentryConsumer::FormatRules::UNIQUE", [:uid, :bookmark])
7
7
  end
8
8
 
9
- let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
10
- let(:entry) { result.entries.first }
11
- subject { entry.authors.first }
9
+ describe "example.html" do
10
+ let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
11
+ let(:entry) { result.entries.first }
12
+ subject { entry.authors.first }
12
13
 
13
- its(:name) { should eq ["Jessica Suttles"]}
14
+ its(:name) { should eq ["Jessica Suttles"]}
14
15
 
15
- its(:emails) { should have(1).things }
16
- its(:urls) { should have(2).things }
16
+ its(:emails) { should have(1).things }
17
+ its(:urls) { should have(2).things }
18
+ end
19
+
20
+ describe "nested_example.html" do
21
+ let(:result) { HentryConsumer.parse(File.open("spec/support/nested_example.html")) }
22
+ let(:entry) { result.entries.first }
23
+ subject { entry.authors.first }
24
+
25
+ its(:name) { should eq ["Jessica"]}
26
+
27
+ its(:emails) { should have(1).things }
28
+ its(:urls) { should have(1).things }
29
+ end
17
30
  end
@@ -1,75 +1,151 @@
1
1
  require 'hentry_consumer'
2
2
 
3
3
  describe HentryConsumer::HEntry do
4
- let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
5
- let(:entry) { result.entries.first }
4
+ describe "example.html" do
5
+ let(:result) { HentryConsumer.parse("spec/support/example.html") }
6
+ let(:entry) { result.entries.first }
6
7
 
7
- it "should have an array of entries" do
8
- entry.should be_an_instance_of HentryConsumer::HEntry
9
- end
10
-
11
- it "has a name" do
12
- entry.name.should eq ["Senior Cat Living"]
13
- end
8
+ it "should have an array of entries" do
9
+ entry.should be_an_instance_of HentryConsumer::HEntry
10
+ end
14
11
 
15
- it "has a summary" do
16
- entry.summary.should eq ["Signed up with 3 locations"]
17
- end
12
+ it "has a name" do
13
+ entry.name.should eq ["Senior Cat Living"]
14
+ end
18
15
 
19
- it "has a time" do
20
- entry.published_at.should eq ["2012-08-26 20:09-0700"]
21
- end
16
+ it "has a summary" do
17
+ entry.summary.should eq ["Signed up with 3 locations"]
18
+ end
22
19
 
23
- it "has a bookmark" do
24
- entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
25
- end
20
+ it "has a time" do
21
+ entry.published_at.should eq ["2012-08-26 20:09-0700"]
22
+ end
26
23
 
27
- it "should have 2 authors" do
28
- entry.authors.should have(2).things HentryConsumer::HCard
29
- end
30
- it "has an author as an hcard" do
31
- entry.authors.first.should be_an_instance_of HentryConsumer::HCard
32
- end
24
+ it "has a bookmark" do
25
+ entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
26
+ end
33
27
 
34
- describe "categories" do
35
- it "has an array of categories" do
36
- entry.categories.should be_an_instance_of Hash
28
+ it "should have 2 authors" do
29
+ entry.authors.should have(2).things HentryConsumer::HCard
37
30
  end
38
31
 
39
- it "has a key of the content" do
40
- entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
32
+ it "has an author as an hcard" do
33
+ entry.authors.first.should be_an_instance_of HentryConsumer::HCard
34
+ end
35
+
36
+ describe "categories" do
37
+ it "has an array of categories" do
38
+ entry.categories.should be_an_instance_of Hash
39
+ end
40
+
41
+ it "has a key of the content" do
42
+ entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
43
+ end
44
+ end
45
+
46
+ describe "content" do
47
+ let(:content) { entry.content }
48
+
49
+ it "should be a blob of html" do
50
+ content.first.should match /Locations/
51
+ end
52
+
53
+ it "should be a blob of html" do
54
+ content.first.should match /\<dt\>/
55
+ end
56
+
57
+ it "should be a blob of html" do
58
+ content.first.should_not match /time/
59
+ end
60
+
61
+ end
62
+
63
+ describe "json" do
64
+ let(:json) { JSON.parse(entry.to_json) }
65
+
66
+ it { json["items"].should be_an_instance_of Array }
67
+ it { json["items"].first["type"].should include 'h-entry'}
68
+ it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
69
+ it { json["items"].first["properties"]["content"].first.should match /Locations/ }
70
+ it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
71
+ it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
72
+ it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
73
+ it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
74
+ it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
41
75
  end
42
76
  end
43
77
 
44
- describe "content" do
45
- let(:content) { entry.content }
78
+ describe "nested_example.html" do
79
+ let(:result) { HentryConsumer.parse("spec/support/nested_example.html") }
80
+ let(:entry) { result.entries.first }
46
81
 
47
- it "should be a blob of html" do
48
- content.first.should match /Locations/
82
+ it "should have an array of entries" do
83
+ entry.should be_an_instance_of HentryConsumer::HEntry
49
84
  end
50
85
 
51
- it "should be a blob of html" do
52
- content.first.should match /\<dt\>/
86
+ it "has a name" do
87
+ entry.name.should eq ["Wabi Sabi Town"]
53
88
  end
54
89
 
55
- it "should be a blob of html" do
56
- content.first.should_not match /time/
90
+ it "has a summary" do
91
+ entry.summary.should eq ["Signed up with 2 locations"]
57
92
  end
58
93
 
59
- end
60
-
61
- describe "json" do
62
- let(:json) { JSON.parse(entry.to_json) }
63
-
64
- it { json["items"].should be_an_instance_of Array }
65
- it { json["items"].first["type"].should include 'h-entry'}
66
- it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
67
- it { json["items"].first["properties"]["content"].first.should match /Locations/ }
68
- it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
69
- it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
70
- it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
71
- it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
72
- it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
73
- end
94
+ it "has a time" do
95
+ entry.published_at.should eq ["2012-10-10T19:11:17Z"]
96
+ end
97
+
98
+ it "has a bookmark" do
99
+ entry.bookmark.should eq "http://localhost:3000/customers/3"
100
+ end
101
+
102
+ it "should have 1 author" do
103
+ entry.authors.should have(1).things HentryConsumer::HCard
104
+ end
105
+
106
+ it "has an author as an hcard" do
107
+ entry.authors.first.should be_an_instance_of HentryConsumer::HCard
108
+ end
109
+
110
+ describe "categories" do
111
+ it "has an array of categories" do
112
+ entry.categories.should be_an_instance_of Hash
113
+ end
74
114
 
115
+ it "has a key of the content" do
116
+ entry.categories["Some Category"].should eq "#"
117
+ end
118
+ end
119
+
120
+ describe "content" do
121
+ let(:content) { entry.content }
122
+
123
+ it "should be a blob of html" do
124
+ content.first.should match /Locations/
125
+ end
126
+
127
+ it "should be a blob of html" do
128
+ content.first.should match /\<dt\>/
129
+ end
130
+
131
+ it "should be a blob of html" do
132
+ content.first.should_not match /time/
133
+ end
134
+
135
+ end
136
+
137
+ describe "json" do
138
+ let(:json) { JSON.parse(entry.to_json) }
139
+
140
+ it { json["items"].should be_an_instance_of Array }
141
+ it { json["items"].first["type"].should include 'h-entry'}
142
+ it { json["items"].first["properties"]["name"].should eq ['Wabi Sabi Town']}
143
+ it { json["items"].first["properties"]["content"].first.should match /Locations/ }
144
+ it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
145
+ it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
146
+ it { json["items"].first["properties"]["bookmark"].should eq "http://localhost:3000/customers/3" }
147
+ it { json["items"].first["properties"]["published_at"].should eq ["2012-10-10T19:11:17Z"] }
148
+ it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
149
+ end
150
+ end
75
151
  end