hentry_consumer 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,9 +1,4 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
- group :test, :development do
4
- gem "rspec"
5
- gem 'guard-rspec'
6
- gem 'rb-fsevent', '~> 0.9.1'
7
- end
8
-
3
+ # Specify your gem's dependencies in hentry_consumer.gemspec
9
4
  gemspec
data/README.md CHANGED
@@ -22,7 +22,7 @@ serialized h-entry objects. The returned Object structure looks something like t
22
22
 
23
23
  ## Current Version
24
24
 
25
- 0.4.0
25
+ 0.5.0
26
26
 
27
27
 
28
28
  ## Requirements
@@ -57,10 +57,12 @@ require "hentry_consumer"
57
57
  HentryConsumer.parse(File|URL)
58
58
  ```
59
59
  [Example Gist of HTML with h-entry posts](https://raw.github.com/gist/3835447/7128a66a3ac7e971a82daac5fa2076d17b88e435/gistfile1.html)
60
+ [Another Example Gist of HTML with h-entry posts](https://gist.github.com/88d6d476483e9528fb3a)
60
61
 
61
62
  ## Authors
62
63
 
63
64
  * Bookis Smuin / [@bookis](https://github.com/bookis)
65
+ * Jessica Lynn Suttles / [@jlsuttles](https://github.com/jlsuttles)
64
66
 
65
67
  ## Contributions
66
68
 
@@ -78,13 +80,13 @@ If you find bugs, have feature requests or questions, please
78
80
  ### Specs
79
81
 
80
82
  ```bash
81
- rake spec
83
+ guard
82
84
  ```
83
85
 
84
86
  ### Releases
85
87
 
86
88
  ```bash
87
- TODO how do you do a release?
89
+ rake release
88
90
  ```
89
91
 
90
92
 
@@ -6,8 +6,8 @@ require 'hentry_consumer/version'
6
6
  Gem::Specification.new do |gem|
7
7
  gem.name = "hentry_consumer"
8
8
  gem.version = HentryConsumer::VERSION
9
- gem.authors = ["Bookis Smuin"]
10
- gem.email = ["vegan.bookis@gmail.com"]
9
+ gem.authors = ["Bookis Smuin", "Jessica Lynn Suttles"]
10
+ gem.email = ["vegan.bookis@gmail.com", "jlsuttles@gmail.com"]
11
11
  gem.description = %q{A hATOM feed parser}
12
12
  gem.summary = %q{Takes in HTML containing an h-feed classed element and returns serialized data based on the Microformat 2 hEntry specs}
13
13
  gem.homepage = "https://github.com/G5/hentry_consumer"
@@ -17,7 +17,11 @@ Gem::Specification.new do |gem|
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
20
- gem.add_dependency 'nokogiri'
21
- gem.add_dependency 'json'
20
+ gem.add_runtime_dependency 'nokogiri'
21
+ gem.add_runtime_dependency 'json'
22
22
 
23
+ gem.add_development_dependency "rspec", "~> 2.11.0"
24
+ gem.add_development_dependency "guard-rspec", "~> 2.1.0"
25
+ gem.add_development_dependency "rb-fsevent", "~> 0.9.2"
26
+ gem.add_development_dependency "debugger", "~> 1.2.1"
23
27
  end
@@ -2,29 +2,40 @@ module HentryConsumer
2
2
  class Element
3
3
  attr_accessor :element
4
4
 
5
- def initialize(microformat)
6
- @element = microformat
7
- parse_elements
5
+ def initialize(element)
6
+ @element = element
7
+ parse_elements(@element)
8
8
  end
9
-
10
- def to_html
11
- @element.to_html
12
- end
13
-
14
- def to_xml
15
- @element.to_xml
9
+
10
+ def parse_elements(elements)
11
+ if elements.is_a?(Nokogiri::XML::NodeSet)
12
+ elements.each do |element|
13
+ parse_elements(element)
14
+ end
15
+ else
16
+ parse_element(elements)
17
+ end
16
18
  end
17
19
 
18
- def parse_elements
19
- FormatClass.each do |letter|
20
- @element.css(">*[class*=#{letter}-]").each do |a|
21
- assign_value(symbolize_class(a["class"]), a.text.gsub('\n', " ").strip)
20
+ def parse_element(element)
21
+ classes = element["class"]
22
+ # element may be a microformat element
23
+ if classes =~ /(p|n|e|i|u|dt)-/
24
+ classes.split.each do |c|
25
+ parse_microformat(element, c)
22
26
  end
27
+ # element may contain a microformat element
28
+ else
29
+ parse_elements(element.children)
23
30
  end
24
31
  end
25
32
 
26
- def symbolize_class(klass)
27
- klass.to_s.downcase.split.first.gsub(/\w{1,2}-/, "").to_sym
33
+ def parse_microformat(element, c)
34
+ assign_value(symbolize_class(c), element.text)
35
+ end
36
+
37
+ def symbolize_class(c)
38
+ c.to_s.downcase.gsub(/\w{1,2}-/, "").to_sym
28
39
  end
29
40
 
30
41
  def [](key)
@@ -37,6 +48,7 @@ module HentryConsumer
37
48
 
38
49
  def assign_value(symbolized_class, value)
39
50
  return unless self.respond_to?(symbolized_class)
51
+ value = value.gsub('\n', " ").strip if value.is_a?(String)
40
52
  if FormatRules.can_have_many?(symbolized_class)
41
53
  self[symbolized_class] ||= []
42
54
  self[symbolized_class] << value
@@ -44,5 +56,13 @@ module HentryConsumer
44
56
  self[symbolized_class] = value
45
57
  end
46
58
  end
59
+
60
+ def to_html
61
+ @element.to_html
62
+ end
63
+
64
+ def to_xml
65
+ @element.to_xml
66
+ end
47
67
  end
48
- end
68
+ end
@@ -1,7 +1,8 @@
1
1
  module HentryConsumer
2
2
  class FormatRules
3
- REQUIRED = []
4
- UNIQUE = [:uid, :bookmark]
3
+ REQUIRED = []
4
+ UNIQUE = [:uid, :bookmark]
5
+
5
6
  class << self
6
7
  def required?(format)
7
8
  REQUIRED.include? format
@@ -15,4 +16,4 @@ module HentryConsumer
15
16
  end
16
17
  end
17
18
  end
18
- end
19
+ end
@@ -16,6 +16,5 @@ module HentryConsumer
16
16
  }]
17
17
  }.to_json(a)
18
18
  end
19
-
20
19
  end
21
- end
20
+ end
@@ -1,59 +1,60 @@
1
1
  module HentryConsumer
2
2
  class HEntry < Element
3
-
4
3
  attr_accessor :name, :categories, :author, :content, :bookmark, :published_at, :summary
5
4
  alias_method :authors, :author
6
5
 
7
- def parse_elements
8
- FormatClass.each do |letter|
9
- @element.css(">*[class*=#{letter}-]").each do |attrs|
10
- attrs["class"].split.each do |klass|
11
- parse_element(attrs, klass)
12
- end
13
- end
6
+ # overrides Element#parse_microformat
7
+ def parse_microformat(element, c)
8
+ case c
9
+ when "p-author" then parse_author(element)
10
+ when "p-category" then parse_category(element)
11
+ when "e-content" then parse_content(element)
12
+ when "dt-published" then parse_published(element)
13
+ when "u-uid" then parse_uid(element)
14
+ else parse_general(element, c)
14
15
  end
15
16
  end
16
17
 
18
+ def parse_author(element)
19
+ assign_value :author, HCard.new(element.children)
20
+ end
21
+
22
+ def parse_category(element)
23
+ self.categories ||= {}
24
+ self.categories[element.text.gsub("\n", " ").strip] = element["href"]
25
+ end
26
+
27
+ def parse_content(element)
28
+ assign_value :content, element.inner_html
29
+ end
30
+
31
+ def parse_published(element)
32
+ assign_value :published_at, element["datetime"]
33
+ end
34
+
35
+ def parse_uid(element)
36
+ assign_value :bookmark, element["href"]
37
+ end
38
+
39
+ def parse_general(element, c)
40
+ assign_value symbolize_class(c), element.text
41
+ end
42
+
17
43
  def to_json(*a)
18
44
  {:items =>
19
45
  [{
20
46
  :type => ["h-entry"],
21
47
  :properties => {
22
- :name => self.name,
23
- :categories => self.categories,
24
- :author => self.author,
25
- :content => self.content,
26
- :bookmark => self.bookmark,
27
- :published_at => self.published_at,
28
- :summary => self.summary
48
+ :name => self.name,
49
+ :categories => self.categories,
50
+ :author => self.author,
51
+ :content => self.content,
52
+ :bookmark => self.bookmark,
53
+ :published_at => self.published_at,
54
+ :summary => self.summary
29
55
  }
30
56
  }]
31
57
  }.to_json(a)
32
58
  end
33
-
34
- private
35
-
36
- def parse_element(microformat, klass)
37
- key, value = case klass
38
- when 'p-author'
39
- [symbolize_class(klass), HCard.new(microformat)]
40
- when 'p-category'
41
- self.categories ||= {}
42
- self.categories[microformat.text.gsub('\n', " ").strip] = microformat["href"]
43
- when 'e-content'
44
- [:content, parse_content(microformat)]
45
- when'dt-published'
46
- [:published_at, microformat["datetime"]]
47
- when "u-uid"
48
- [:bookmark, microformat['href']]
49
- else
50
- [symbolize_class(klass), microformat.text.gsub('\n', " ").strip]
51
- end
52
- assign_value(key, value)
53
- end
54
-
55
- def parse_content(microformat)
56
- microformat.inner_html
57
- end
58
59
  end
59
- end
60
+ end
@@ -1,21 +1,22 @@
1
1
  module HentryConsumer
2
2
  class HFeed
3
- attr_accessor :entries
3
+ attr_accessor :html, :entries
4
+
4
5
  def initialize(html)
6
+ @html = Nokogiri::HTML(open(html).read)
5
7
  @entries = []
6
- parse_html(html)
8
+ parse_html
7
9
  end
8
10
 
9
- def parse_html(html)
10
- doc = Nokogiri::HTML(open(html).read)
11
- doc.css(".h-entry").each do |mf_entry|
12
- entry = HEntry.new(mf_entry)
11
+ def parse_html
12
+ self.html.css(".h-entry").each do |hentry|
13
+ entry = HEntry.new(hentry.children)
13
14
  self.entries << entry
14
15
  end
15
16
  end
16
17
 
17
18
  def to_html
18
- self.entries.collect(&:to_html).join
19
+ self.html.css(".h-entry").collect(&:to_html).join
19
20
  end
20
21
  alias_method :to_s, :to_html
21
22
 
@@ -29,4 +30,4 @@ module HentryConsumer
29
30
  end
30
31
 
31
32
  end
32
- end
33
+ end
@@ -1,3 +1,3 @@
1
1
  module HentryConsumer
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -1,13 +1,18 @@
1
+ # I don't think you're supposed to require gems here,
2
+ # they should go in the Gemspec as dependencies
1
3
  require 'nokogiri'
2
4
  require 'open-uri'
3
5
  require 'json'
4
- require 'hentry_consumer/element'
5
6
  Gem.find_files("hentry_consumer/**/*.rb").each { |path| require path }
6
7
 
7
8
  module HentryConsumer
8
- FormatClass = %w(p n e i u dt)
9
- def self.parse(html)
10
- HFeed.new(html)
9
+ module ClassMethods
10
+ def parse(html)
11
+ HFeed.new(html)
12
+ end
13
+ end
14
+ extend ClassMethods
15
+ def self.included(other)
16
+ other.extend(ClassMethods)
11
17
  end
12
-
13
18
  end
@@ -1,17 +1,30 @@
1
1
  require 'hentry_consumer'
2
2
 
3
- describe HentryConsumer::HEntry do
3
+ describe HentryConsumer::HCard do
4
4
  before do
5
5
  stub_const("HentryConsumer::FormatRules::REQUIRED", [:url, :email])
6
6
  stub_const("HentryConsumer::FormatRules::UNIQUE", [:uid, :bookmark])
7
7
  end
8
8
 
9
- let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
10
- let(:entry) { result.entries.first }
11
- subject { entry.authors.first }
9
+ describe "example.html" do
10
+ let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
11
+ let(:entry) { result.entries.first }
12
+ subject { entry.authors.first }
12
13
 
13
- its(:name) { should eq ["Jessica Suttles"]}
14
+ its(:name) { should eq ["Jessica Suttles"]}
14
15
 
15
- its(:emails) { should have(1).things }
16
- its(:urls) { should have(2).things }
16
+ its(:emails) { should have(1).things }
17
+ its(:urls) { should have(2).things }
18
+ end
19
+
20
+ describe "nested_example.html" do
21
+ let(:result) { HentryConsumer.parse(File.open("spec/support/nested_example.html")) }
22
+ let(:entry) { result.entries.first }
23
+ subject { entry.authors.first }
24
+
25
+ its(:name) { should eq ["Jessica"]}
26
+
27
+ its(:emails) { should have(1).things }
28
+ its(:urls) { should have(1).things }
29
+ end
17
30
  end
@@ -1,75 +1,151 @@
1
1
  require 'hentry_consumer'
2
2
 
3
3
  describe HentryConsumer::HEntry do
4
- let(:result) { HentryConsumer.parse(File.open("spec/support/example.html")) }
5
- let(:entry) { result.entries.first }
4
+ describe "example.html" do
5
+ let(:result) { HentryConsumer.parse("spec/support/example.html") }
6
+ let(:entry) { result.entries.first }
6
7
 
7
- it "should have an array of entries" do
8
- entry.should be_an_instance_of HentryConsumer::HEntry
9
- end
10
-
11
- it "has a name" do
12
- entry.name.should eq ["Senior Cat Living"]
13
- end
8
+ it "should have an array of entries" do
9
+ entry.should be_an_instance_of HentryConsumer::HEntry
10
+ end
14
11
 
15
- it "has a summary" do
16
- entry.summary.should eq ["Signed up with 3 locations"]
17
- end
12
+ it "has a name" do
13
+ entry.name.should eq ["Senior Cat Living"]
14
+ end
18
15
 
19
- it "has a time" do
20
- entry.published_at.should eq ["2012-08-26 20:09-0700"]
21
- end
16
+ it "has a summary" do
17
+ entry.summary.should eq ["Signed up with 3 locations"]
18
+ end
22
19
 
23
- it "has a bookmark" do
24
- entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
25
- end
20
+ it "has a time" do
21
+ entry.published_at.should eq ["2012-08-26 20:09-0700"]
22
+ end
26
23
 
27
- it "should have 2 authors" do
28
- entry.authors.should have(2).things HentryConsumer::HCard
29
- end
30
- it "has an author as an hcard" do
31
- entry.authors.first.should be_an_instance_of HentryConsumer::HCard
32
- end
24
+ it "has a bookmark" do
25
+ entry.bookmark.should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700"
26
+ end
33
27
 
34
- describe "categories" do
35
- it "has an array of categories" do
36
- entry.categories.should be_an_instance_of Hash
28
+ it "should have 2 authors" do
29
+ entry.authors.should have(2).things HentryConsumer::HCard
37
30
  end
38
31
 
39
- it "has a key of the content" do
40
- entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
32
+ it "has an author as an hcard" do
33
+ entry.authors.first.should be_an_instance_of HentryConsumer::HCard
34
+ end
35
+
36
+ describe "categories" do
37
+ it "has an array of categories" do
38
+ entry.categories.should be_an_instance_of Hash
39
+ end
40
+
41
+ it "has a key of the content" do
42
+ entry.categories["New Customer"].should eq "http://g5.com/tag/new-customer"
43
+ end
44
+ end
45
+
46
+ describe "content" do
47
+ let(:content) { entry.content }
48
+
49
+ it "should be a blob of html" do
50
+ content.first.should match /Locations/
51
+ end
52
+
53
+ it "should be a blob of html" do
54
+ content.first.should match /\<dt\>/
55
+ end
56
+
57
+ it "should be a blob of html" do
58
+ content.first.should_not match /time/
59
+ end
60
+
61
+ end
62
+
63
+ describe "json" do
64
+ let(:json) { JSON.parse(entry.to_json) }
65
+
66
+ it { json["items"].should be_an_instance_of Array }
67
+ it { json["items"].first["type"].should include 'h-entry'}
68
+ it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
69
+ it { json["items"].first["properties"]["content"].first.should match /Locations/ }
70
+ it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
71
+ it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
72
+ it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
73
+ it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
74
+ it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
41
75
  end
42
76
  end
43
77
 
44
- describe "content" do
45
- let(:content) { entry.content }
78
+ describe "nested_example.html" do
79
+ let(:result) { HentryConsumer.parse("spec/support/nested_example.html") }
80
+ let(:entry) { result.entries.first }
46
81
 
47
- it "should be a blob of html" do
48
- content.first.should match /Locations/
82
+ it "should have an array of entries" do
83
+ entry.should be_an_instance_of HentryConsumer::HEntry
49
84
  end
50
85
 
51
- it "should be a blob of html" do
52
- content.first.should match /\<dt\>/
86
+ it "has a name" do
87
+ entry.name.should eq ["Wabi Sabi Town"]
53
88
  end
54
89
 
55
- it "should be a blob of html" do
56
- content.first.should_not match /time/
90
+ it "has a summary" do
91
+ entry.summary.should eq ["Signed up with 2 locations"]
57
92
  end
58
93
 
59
- end
60
-
61
- describe "json" do
62
- let(:json) { JSON.parse(entry.to_json) }
63
-
64
- it { json["items"].should be_an_instance_of Array }
65
- it { json["items"].first["type"].should include 'h-entry'}
66
- it { json["items"].first["properties"]["name"].should eq ['Senior Cat Living']}
67
- it { json["items"].first["properties"]["content"].first.should match /Locations/ }
68
- it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
69
- it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
70
- it { json["items"].first["properties"]["bookmark"].should eq "http://g5.com/feed/entries/2012-08-26-20-09-0700" }
71
- it { json["items"].first["properties"]["published_at"].should eq ["2012-08-26 20:09-0700"] }
72
- it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
73
- end
94
+ it "has a time" do
95
+ entry.published_at.should eq ["2012-10-10T19:11:17Z"]
96
+ end
97
+
98
+ it "has a bookmark" do
99
+ entry.bookmark.should eq "http://localhost:3000/customers/3"
100
+ end
101
+
102
+ it "should have 1 author" do
103
+ entry.authors.should have(1).things HentryConsumer::HCard
104
+ end
105
+
106
+ it "has an author as an hcard" do
107
+ entry.authors.first.should be_an_instance_of HentryConsumer::HCard
108
+ end
109
+
110
+ describe "categories" do
111
+ it "has an array of categories" do
112
+ entry.categories.should be_an_instance_of Hash
113
+ end
74
114
 
115
+ it "has a key of the content" do
116
+ entry.categories["Some Category"].should eq "#"
117
+ end
118
+ end
119
+
120
+ describe "content" do
121
+ let(:content) { entry.content }
122
+
123
+ it "should be a blob of html" do
124
+ content.first.should match /Locations/
125
+ end
126
+
127
+ it "should be a blob of html" do
128
+ content.first.should match /\<dt\>/
129
+ end
130
+
131
+ it "should be a blob of html" do
132
+ content.first.should_not match /time/
133
+ end
134
+
135
+ end
136
+
137
+ describe "json" do
138
+ let(:json) { JSON.parse(entry.to_json) }
139
+
140
+ it { json["items"].should be_an_instance_of Array }
141
+ it { json["items"].first["type"].should include 'h-entry'}
142
+ it { json["items"].first["properties"]["name"].should eq ['Wabi Sabi Town']}
143
+ it { json["items"].first["properties"]["content"].first.should match /Locations/ }
144
+ it { json["items"].first["properties"]["author"].should be_an_instance_of Array }
145
+ it { json["items"].first["properties"]["author"].first["items"].first["type"].should include "h-card" }
146
+ it { json["items"].first["properties"]["bookmark"].should eq "http://localhost:3000/customers/3" }
147
+ it { json["items"].first["properties"]["published_at"].should eq ["2012-10-10T19:11:17Z"] }
148
+ it { json["items"].first["properties"]["summary"].should be_an_instance_of Array }
149
+ end
150
+ end
75
151
  end