doc_wrapper 0.9.2 → 0.9.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- doc_wrapper (0.9.1)
4
+ doc_wrapper (0.9.2)
5
5
  activesupport (>= 3.0.0)
6
6
 
7
7
  GEM
@@ -0,0 +1,63 @@
1
+ DocWrapper is a simple DSL for creating wrappers around DOM objects.
2
+
3
+ Usage
4
+
5
+ % gem install doc_wrapper
6
+
7
+ Example Usages
8
+
9
+ DocWrapper allows you to easily create a declarative wrapper to access data from HTML Document Object Model (DOM) or XML DOM documents and optionally transform them.
10
+
11
+ DocWrapper will work with any underlying "document" that has a search method, such as a DOM generated by Nokogiri, or Hpricot. This allows the selectors used by DocWrapper to support any selector your DOM library does. Using Nokogiri, you can use either XPath or CSS selectors for very flexible property definition.
12
+
13
+ DocWrapper works by declaring properties with a name, type, and the search path to find the raw data in the DOM.
14
+
15
+ Basic Example
16
+
17
+ require 'nokogiri'
18
+ require 'doc_wrapper'
19
+
20
+ html = %{
21
+ <html>
22
+ <body>
23
+ <p class="first_name">Mark</p>
24
+ <p class="last_name">Menard</p>
25
+ </body>
26
+ </html>
27
+ }
28
+
29
+ class PersonWrapper
30
+ include DocWrapper::Base
31
+ include DocWrapper::Properties
32
+
33
+ property :first_name, :string, './p[class="first_name"]'
34
+ property :last_name, :string, './p[class="last_name"]'
35
+ end
36
+
37
+ person_wrapper = PersonWrapper.new(Nokogiri::HTML(html))
38
+ person_wrapper.first_name # => 'Mark'
39
+ person_wrapper.last_name # => 'Menard'
40
+
41
+ Supported Property Types
42
+
43
+ Currently DocWrapper support :string, :date, :time, :boolean, and :raw. Additionally DocWrapper supports embedded wrappers using has_one and has_many functionality very similar to ActiveRecord. See specs for example usages.
44
+
45
+ Access to Node Attributes
46
+
47
+ String, Date, Time and Boolean properties can reference an attribute on a node.
48
+
49
+ Given the following XML document:
50
+
51
+ <?xml version="1.0" encoding="UTF-8"?>
52
+ <feed>
53
+ <link type="text/html" href="http://search.twitter.com/search?q=yahoo.com" rel="alternate"/>
54
+ </feed>
55
+
56
+ You can access the link href with the following property definition.
57
+
58
+ class FeedWrapper
59
+ include DocWrapper::Base
60
+ include DocWrapper::Properties
61
+
62
+ property :link, :string, '//feed/link', :use_attribute => :href
63
+ end
@@ -13,6 +13,8 @@ require 'doc_wrapper/base'
13
13
  require 'doc_wrapper/multi_property_definition'
14
14
  require 'doc_wrapper/base_property_definition'
15
15
  require 'doc_wrapper/raw_property_definition'
16
+ require 'doc_wrapper/has_many_property_definition'
17
+ require 'doc_wrapper/has_one_property_definition'
16
18
  require 'doc_wrapper/inner_html_property_definition'
17
19
  require 'doc_wrapper/string_property_definition'
18
20
  require 'doc_wrapper/date_property_definition'
@@ -9,29 +9,26 @@ module DocWrapper
9
9
  end
10
10
 
11
11
  def multi_property (property_name, selectors, options = {}, &block)
12
- raise "Multi properties require a block" if block.nil?
12
+ raise "Multi-properties require a block" if block.nil?
13
13
  add_property_definition(property_name, MultiPropertyDefinition.new(property_name, selectors, initialize_options(options), block))
14
14
  end
15
15
 
16
16
  def has_many (property_name, selector, klass, options = {})
17
- options = initialize_options(options)
17
+ wrapper = HasManyPropertyDefinition.new(property_name, selector, klass, initialize_options(options))
18
18
  define_method property_name do
19
- get_has_many( property_name, selector, klass, options)
19
+ wrapper.property(documents)
20
20
  end
21
21
  end
22
22
 
23
23
  def has_one (property_name, selector, klass, options = {})
24
- options = initialize_options(options)
24
+ wrapper = HasOnePropertyDefinition.new(property_name, selector, klass, initialize_options(options))
25
25
  define_method(property_name) do
26
- get_has_one(property_name, selector, klass, options)
26
+ wrapper.property(documents)
27
27
  end
28
28
  end
29
29
 
30
30
  def namespaces (namespaces)
31
31
  @namespaces = namespaces
32
- # define_method(:namespaces) do
33
- # namespaces
34
- # end
35
32
  end
36
33
 
37
34
  ##################
@@ -40,27 +37,26 @@ module DocWrapper
40
37
 
41
38
  def add_property_definition (property_name, wrapper)
42
39
  add_property_name(property_name)
43
- add_property_wrapper(property_name, wrapper)
44
- add_property_accessor(property_name)
40
+ add_property_accessor(property_name, wrapper)
45
41
  end
46
42
 
47
43
  # Add a property name to the singleton property_names attribute.
48
44
  def add_property_name (property_name)
49
- # Add the property name to the property_names collection.
50
45
  self.property_names << property_name
51
46
  end
52
47
 
53
- # Add a property wrapper to the property_definitions Hash.
54
- def add_property_wrapper (property_name, wrapper)
55
- self.property_definitions[property_name] = wrapper
48
+ def add_property_accessor (property_name, wrapper)
49
+ define_method(property_name) do
50
+ wrapper.property(documents)
51
+ end
56
52
  end
57
53
 
58
- def add_property_accessor (property_name)
59
- class_eval <<-END
60
- def #{property_name.to_s}
61
- property_definitions[:#{property_name.to_s}].property(documents)
62
- end
63
- END
54
+ def build_property_definition (property_name, type, selector, options, block)
55
+ DocWrapper.const_get("#{camelize(type.to_s)}PropertyDefinition").new(property_name, type, selector, initialize_options(options), block)
56
+ end
57
+
58
+ def camelize (string)
59
+ string.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
64
60
  end
65
61
 
66
62
  # Set default options that all properties need.
@@ -72,15 +68,6 @@ module DocWrapper
72
68
  options[:namespaces] = @namespaces if @namespaces
73
69
  options
74
70
  end
75
-
76
-
77
- def build_property_definition (property_name, type, selector, options, block)
78
- DocWrapper.const_get("#{camelize(type.to_s)}PropertyDefinition").new(property_name, type, selector, initialize_options(options), block)
79
- end
80
-
81
- def camelize (string)
82
- string.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
83
- end
84
71
 
85
72
  end
86
73
  end
@@ -1,12 +1,9 @@
1
+ require 'date'
2
+
1
3
  module DocWrapper
2
4
  class DatePropertyDefinition < InnerHtmlPropertyDefinition
3
5
  def transform (result)
4
- if block
5
- result = block.call(result)
6
- else
7
- result = result.blank? ? nil : (options[:parser] ? options[:parser].call(result) : Date.parse(result))
8
- end
9
- result
6
+ block ? block.call(result) : (result.blank? ? nil : (options[:parser] ? options[:parser].call(result) : Date.parse(result)))
10
7
  end
11
8
  end
12
9
  end
@@ -0,0 +1,33 @@
1
+ class HasManyPropertyDefinition
2
+
3
+ attr_accessor :property_name, :selector, :klass, :options, :nodes
4
+
5
+ def initialize (property_name, selector, klass, options)
6
+ @property_name = property_name
7
+ @selector = selector
8
+ @klass = klass
9
+ @options = options
10
+ end
11
+
12
+ def property (documents)
13
+ self.nodes = documents.collect do |doc|
14
+ if options[:namespaces]
15
+ result = doc.search(selector, options[:namespaces])
16
+ else
17
+ result = doc.search(selector)
18
+ end
19
+ result.blank? ? nil : result
20
+ end.flatten.compact
21
+
22
+ nodes[start_row..end_row].collect { |node| klass.new(node) }
23
+ end
24
+
25
+ def start_row
26
+ options[:start_row] ? options[:start_row] : 0
27
+ end
28
+
29
+ def end_row
30
+ options[:end_row] ? options[:end_row] : nodes.size - 1
31
+ end
32
+
33
+ end
@@ -0,0 +1,17 @@
1
+ class HasOnePropertyDefinition
2
+
3
+ attr_accessor :property_name, :selector, :klass, :options
4
+
5
+ def initialize (property_name, selector, klass, options)
6
+ @property_name = property_name
7
+ @selector = selector
8
+ @klass = klass
9
+ @options = options
10
+ end
11
+
12
+ def property (documents)
13
+ nodes = documents.collect { |doc| result = doc.search(selector) ; result.blank? ? nil : result }.flatten.compact
14
+ klass.new(nodes)
15
+ end
16
+
17
+ end
@@ -1,13 +1,25 @@
1
1
  module DocWrapper
2
2
  class InnerHtmlPropertyDefinition < BasePropertyDefinition
3
3
  def property (documents)
4
+ transform(raw_property(documents))
5
+ end
6
+
7
+ def raw_property (documents)
8
+ if options[:use_attribute]
9
+ get_nodes(documents).first[options[:use_attribute]]
10
+ else
11
+ get_nodes(documents).inner_html.strip
12
+ end
13
+ end
14
+
15
+ def get_nodes (documents)
4
16
  if options[:namespaces]
5
- transform(documents[@options[:document] - 1].xpath(@selector, options[:namespaces]).inner_html.strip)
17
+ documents[@options[:document] - 1].xpath(@selector, options[:namespaces])
6
18
  else
7
19
  begin
8
- transform(documents[@options[:document] - 1].search(@selector).inner_html.strip)
20
+ documents[@options[:document] - 1].search(@selector)
9
21
  rescue Nokogiri::CSS::SyntaxError
10
- transform(documents[@options[:document] - 1].xpath(@selector).inner_html.strip)
22
+ documents[@options[:document] - 1].xpath(@selector)
11
23
  end
12
24
  end
13
25
  end
@@ -15,34 +15,10 @@ module DocWrapper
15
15
  def properties
16
16
  result = Hash.new
17
17
  property_names.each do |property|
18
- begin
19
- result[property] = send(property)
20
- rescue StandardError => e
21
- raise e
22
- end
18
+ result[property] = send(property)
23
19
  end
24
20
  result
25
21
  end
26
22
 
27
- def get_has_many (property_name, selector, klass, options)
28
- nodes = @documents.collect do |doc|
29
- if options[:namespaces]
30
- result = doc.search(selector, options[:namespaces])
31
- else
32
- result = doc.search(selector)
33
- end
34
- result.blank? ? nil : result
35
- end.flatten.compact
36
-
37
-
38
- start_row = options[:start_row] ? options[:start_row] : 0
39
- end_row = options[:end_row] ? options[:end_row] : nodes.size - 1
40
- nodes[start_row..end_row].collect { |node| klass.new(node) }
41
- end
42
-
43
- def get_has_one (property_name, selector, klass, options)
44
- nodes = @documents.collect { |doc| result = doc.search(selector) ; result.blank? ? nil : result }.flatten.compact
45
- klass.new(nodes)
46
- end
47
23
  end
48
24
  end
@@ -1,3 +1,5 @@
1
+ require 'time'
2
+
1
3
  module DocWrapper
2
4
  class TimePropertyDefinition < InnerHtmlPropertyDefinition
3
5
  def transform (result)
@@ -1,3 +1,3 @@
1
1
  module DocWrapper
2
- VERSION = "0.9.2"
2
+ VERSION = "0.9.4"
3
3
  end
@@ -14,13 +14,9 @@ class AtomDocWrapper
14
14
 
15
15
  property :twitter_id, :string, './atom:id'
16
16
  property :published, :string, './atom:published'
17
- property :link, :raw, './atom:link[@type="text/html"]' do |node_list|
18
- node_list.first[:href]
19
- end
17
+ property :link, :string, './atom:link[@type="text/html"]', :use_attribute => :href
20
18
  property :updated, :string, './atom:updated'
21
- property :author_avatar_link, :raw, './atom:link[@type="image/png"]' do |node_list|
22
- node_list.first[:href]
23
- end
19
+ property :author_avatar_link, :string, './atom:link[@type="image/png"]', :use_attribute => :href
24
20
  property :author, :string, './atom:author/atom:name'
25
21
  property :author_twitter_url, :string, './atom:author/atom:uri'
26
22
  property :content_text, :string, './atom:title'
@@ -161,8 +161,8 @@ class TestDocWrapper
161
161
  multi_property :arrayed_xpath, ["/html/body/table[2]/tr[2]/td"] do |elements|
162
162
  elements.join(" ")
163
163
  end
164
- property :raw_property, :raw, "/html/body/p[8]" do |ns|
165
- ns[0].attribute("class").inner_html
164
+ property :raw_property, :raw, "/html/body/p[8]" do |node_list|
165
+ node_list[0].attribute("class").inner_html
166
166
  end
167
167
 
168
168
  has_one :person, "/html/body/div", PersonWrapper
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_wrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.2
4
+ hash: 51
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 9
9
+ - 4
10
+ version: 0.9.4
5
11
  platform: ruby
6
12
  authors:
7
13
  - Mark Menard
@@ -9,59 +15,85 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2011-03-14 00:00:00 -04:00
18
+ date: 2011-06-26 00:00:00 -04:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: activesupport
17
- type: :runtime
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 3
32
+ - 0
33
+ - 0
23
34
  version: 3.0.0
24
- version:
35
+ type: :runtime
36
+ version_requirements: *id001
25
37
  - !ruby/object:Gem::Dependency
26
38
  name: bundler
27
- type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
30
42
  requirements:
31
43
  - - ">="
32
44
  - !ruby/object:Gem::Version
45
+ hash: 23
46
+ segments:
47
+ - 1
48
+ - 0
49
+ - 0
33
50
  version: 1.0.0
34
- version:
51
+ type: :development
52
+ version_requirements: *id002
35
53
  - !ruby/object:Gem::Dependency
36
54
  name: nokogiri
37
- type: :development
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
57
+ none: false
40
58
  requirements:
41
59
  - - ">="
42
60
  - !ruby/object:Gem::Version
61
+ hash: 3
62
+ segments:
63
+ - 0
43
64
  version: "0"
44
- version:
65
+ type: :development
66
+ version_requirements: *id003
45
67
  - !ruby/object:Gem::Dependency
46
68
  name: rspec
47
- type: :development
48
- version_requirement:
49
- version_requirements: !ruby/object:Gem::Requirement
69
+ prerelease: false
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
50
72
  requirements:
51
73
  - - ">="
52
74
  - !ruby/object:Gem::Version
75
+ hash: 15
76
+ segments:
77
+ - 2
78
+ - 0
79
+ - 0
53
80
  version: 2.0.0
54
- version:
81
+ type: :development
82
+ version_requirements: *id004
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: ZenTest
57
- type: :development
58
- version_requirement:
59
- version_requirements: !ruby/object:Gem::Requirement
85
+ prerelease: false
86
+ requirement: &id005 !ruby/object:Gem::Requirement
87
+ none: false
60
88
  requirements:
61
89
  - - ">="
62
90
  - !ruby/object:Gem::Version
91
+ hash: 3
92
+ segments:
93
+ - 0
63
94
  version: "0"
64
- version:
95
+ type: :development
96
+ version_requirements: *id005
65
97
  description: Using the DocWrapper DSL you can easily define classes that wrap HTML DOM Documents allowing extraction of properties using either XPath or CSS selectors.
66
98
  email:
67
99
  - mark@enablelabs.com
@@ -75,6 +107,7 @@ files:
75
107
  - .gitignore
76
108
  - Gemfile
77
109
  - Gemfile.lock
110
+ - README.txt
78
111
  - Rakefile
79
112
  - doc_wrapper.gemspec
80
113
  - lib/doc_wrapper.rb
@@ -83,6 +116,8 @@ files:
83
116
  - lib/doc_wrapper/base_property_definition.rb
84
117
  - lib/doc_wrapper/boolean_property_definition.rb
85
118
  - lib/doc_wrapper/date_property_definition.rb
119
+ - lib/doc_wrapper/has_many_property_definition.rb
120
+ - lib/doc_wrapper/has_one_property_definition.rb
86
121
  - lib/doc_wrapper/inner_html_property_definition.rb
87
122
  - lib/doc_wrapper/multi_property_definition.rb
88
123
  - lib/doc_wrapper/properties.rb
@@ -107,21 +142,29 @@ rdoc_options: []
107
142
  require_paths:
108
143
  - lib
109
144
  required_ruby_version: !ruby/object:Gem::Requirement
145
+ none: false
110
146
  requirements:
111
147
  - - ">="
112
148
  - !ruby/object:Gem::Version
149
+ hash: 3
150
+ segments:
151
+ - 0
113
152
  version: "0"
114
- version:
115
153
  required_rubygems_version: !ruby/object:Gem::Requirement
154
+ none: false
116
155
  requirements:
117
156
  - - ">="
118
157
  - !ruby/object:Gem::Version
158
+ hash: 23
159
+ segments:
160
+ - 1
161
+ - 3
162
+ - 6
119
163
  version: 1.3.6
120
- version:
121
164
  requirements: []
122
165
 
123
166
  rubyforge_project: doc_wrapper
124
- rubygems_version: 1.3.5
167
+ rubygems_version: 1.6.2
125
168
  signing_key:
126
169
  specification_version: 3
127
170
  summary: Declarative DSL for defining classes to wrap HTML DOM Documents