doc_wrapper 0.0.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock ADDED
@@ -0,0 +1,32 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ doc_wrapper (0.0.1)
5
+ activesupport (>= 3.0.0)
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ ZenTest (4.4.2)
11
+ activesupport (3.0.0)
12
+ diff-lcs (1.1.2)
13
+ nokogiri (1.4.4)
14
+ rspec (2.5.0)
15
+ rspec-core (~> 2.5.0)
16
+ rspec-expectations (~> 2.5.0)
17
+ rspec-mocks (~> 2.5.0)
18
+ rspec-core (2.5.1)
19
+ rspec-expectations (2.5.0)
20
+ diff-lcs (~> 1.1.2)
21
+ rspec-mocks (2.5.0)
22
+
23
+ PLATFORMS
24
+ ruby
25
+
26
+ DEPENDENCIES
27
+ ZenTest
28
+ activesupport (>= 3.0.0)
29
+ bundler (>= 1.0.0)
30
+ doc_wrapper!
31
+ nokogiri
32
+ rspec (>= 2.0.0)
@@ -0,0 +1,7 @@
1
+ module DocWrapper
2
+ module Base
3
+ def initialize (*args)
4
+ @documents = args.flatten
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,71 @@
1
+ module DocWrapper
2
+ module ClassMethods
3
+
4
+ # Add a property name to the singleton property_names attribute.
5
+ def add_property_name (property_name)
6
+ # Add the property name to the property_names collection.
7
+ self.property_names << property_name
8
+ end
9
+
10
+ # Add a property wrapper to the property_definitions Hash.
11
+ def add_property_wrapper (property_name, wrapper)
12
+ self.property_definitions[property_name] = wrapper
13
+ end
14
+
15
+ def add_property_accessor (property_name)
16
+ class_eval <<-END
17
+ def #{property_name.to_s}
18
+ property_definitions[:#{property_name.to_s}].property(documents)
19
+ end
20
+ END
21
+ end
22
+
23
+ # Set default options that all properties need.
24
+ def initialize_options (options)
25
+ # Make sure the options have a :document key with a value of 1.
26
+ # This forces all lookups to be for the 0th document in documents if
27
+ # the user did not specify an offset into the array.
28
+ { :document => 1 }.merge(options)
29
+ end
30
+
31
+ def add_property_definition (property_name, wrapper)
32
+ add_property_name(property_name)
33
+ add_property_wrapper(property_name, wrapper)
34
+ add_property_accessor(property_name)
35
+ end
36
+
37
+ # Create a typed property definition for a document wrapper.
38
+ # The property_name must be a symbol.
39
+ def property (property_name, type, selector, options = {}, &block)
40
+ raise "Unhandled property type: #{type.to_s}" if ![:string, :date, :time, :boolean, :raw].include?(type)
41
+ add_property_definition(property_name, build_property_definition(property_name, type, selector, initialize_options(options), block))
42
+ end
43
+
44
+ def build_property_definition (property_name, type, selector, options, block)
45
+ DocWrapper.const_get("#{camelize(type.to_s)}PropertyDefinition").new(property_name, type, selector, initialize_options(options), block)
46
+ end
47
+
48
+ def camelize (string)
49
+ string.gsub(/\/(.?)/) { "::#{$1.upcase}" }.gsub(/(?:^|_)(.)/) { $1.upcase }
50
+ end
51
+
52
+ def multi_property (property_name, selectors, options = {}, &block)
53
+ raise "Multi properties require a block" if block.nil?
54
+ add_property_definition(property_name, MultiPropertyDefinition.new(property_name, selectors, initialize_options(options), block))
55
+ end
56
+
57
+ def has_many (property_name, selector, klass, options = {})
58
+ options = initialize_options(options)
59
+ define_method property_name do
60
+ get_has_many( property_name, selector, klass, options)
61
+ end
62
+ end
63
+
64
+ def has_one (property_name, selector, klass, options = {})
65
+ options = initialize_options(options)
66
+ define_method(property_name) do
67
+ get_has_one(property_name, selector, klass, options)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,25 @@
1
+ module DocWrapper
2
+ class BasePropertyDefinition
3
+ attr_accessor :property_name, :type, :selector, :options, :block
4
+
5
+ def initialize (property_name, type, selector, options, block)
6
+ @property_name = property_name
7
+ @type = type
8
+ @selector = selector
9
+ @options = options
10
+ @block = block
11
+ end
12
+
13
+ def property (documents)
14
+ begin
15
+ transform(documents[@options[:document] - 1].search(@selector))
16
+ rescue Nokogiri::CSS::SyntaxError
17
+ transform(documents[@options[:document] - 1].xpath(@selector))
18
+ end
19
+ end
20
+
21
+ def transform (result)
22
+ result
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,13 @@
1
+ module DocWrapper
2
+ class BooleanPropertyDefinition < InnerHtmlPropertyDefinition
3
+ def transform (result)
4
+ raise "BooleanPropertyDefinition: boolean_test is deprecated please use options[:parser] or a block." if options[:boolean_test]
5
+ if block
6
+ result = block.call(result)
7
+ else
8
+ result = result.blank? ? nil : (options[:parser] ? options[:parser].call(result) : options[:boolean_test].call(result) )
9
+ end
10
+ result
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,12 @@
1
+ module DocWrapper
2
+ class DatePropertyDefinition < InnerHtmlPropertyDefinition
3
+ def transform (result)
4
+ if block
5
+ result = block.call(result)
6
+ else
7
+ result = result.blank? ? nil : (options[:parser] ? options[:parser].call(result) : Date.parse(result))
8
+ end
9
+ result
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module DocWrapper
2
+ class InnerHtmlPropertyDefinition < BasePropertyDefinition
3
+ def property (documents)
4
+ begin
5
+ transform(documents[@options[:document] - 1].search(@selector).inner_html.strip)
6
+ rescue Nokogiri::CSS::SyntaxError
7
+ transform(documents[@options[:document] - 1].xpath(@selector).inner_html.strip)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,32 @@
1
+ module DocWrapper
2
+ class MultiPropertyDefinition
3
+ attr_accessor :property_name, :selectors, :options, :block
4
+
5
+ def initialize (property_name, selectors, options, block)
6
+ @property_name = property_name
7
+ @selectors = selectors
8
+ @options = options
9
+ @block = block
10
+ end
11
+
12
+ def property (documents)
13
+ results = []
14
+ selectors.each do |selector|
15
+ nodes = documents[@options[:document] - 1].search(selector)
16
+ if nodes.respond_to? :inner_html
17
+ nodes.each do |node|
18
+ results << node.inner_html.strip
19
+ end
20
+ else
21
+ results << nodes.inner_html.strip
22
+ end
23
+ end
24
+ transform(results.flatten)
25
+ end
26
+
27
+ def transform (results)
28
+ return nil if results.size == 0
29
+ result = block.call(results)
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,39 @@
1
+ module DocWrapper
2
+ module Properties
3
+
4
+ attr_reader :documents
5
+
6
+ def self.included (base)
7
+ base.extend(SattrAccessor)
8
+ base.extend(ClassMethods)
9
+ base.sattr_accessor :property_names
10
+ base.sattr_accessor :property_definitions
11
+ base.property_names = []
12
+ base.property_definitions = {}
13
+ end
14
+
15
+ def properties
16
+ result = Hash.new
17
+ property_names.each do |property|
18
+ begin
19
+ result[property] = send(property)
20
+ rescue StandardError => e
21
+ raise e
22
+ end
23
+ end
24
+ result
25
+ end
26
+
27
+ def get_has_many (property_name, selector, klass, options)
28
+ nodes = @documents.collect { |doc| result = doc.search(selector) ; result.blank? ? nil : result }.flatten.compact
29
+ start_row = options[:start_row] ? options[:start_row] : 0
30
+ end_row = options[:end_row] ? options[:end_row] : nodes.size - 1
31
+ nodes[start_row..end_row].collect { |node| klass.new(node) }
32
+ end
33
+
34
+ def get_has_one (property_name, selector, klass, options)
35
+ nodes = @documents.collect { |doc| result = doc.search(selector) ; result.blank? ? nil : result }.flatten.compact
36
+ klass.new(nodes)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,8 @@
1
+ module DocWrapper
2
+ class RawPropertyDefinition < BasePropertyDefinition
3
+ def transform (result)
4
+ result = block.call(result) if block
5
+ result
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,55 @@
1
+ module DocWrapper
2
+
3
+ module SattrAccessor
4
+
5
+ def sattr_reader (sym, options = {})
6
+ options = { :inheritable => false }.merge(options)
7
+ class_eval(<<-END, __FILE__, __LINE__)
8
+ def self.#{sym}
9
+ @#{sym}
10
+ end
11
+
12
+ def #{sym}
13
+ result = self.class.#{sym}
14
+ #{
15
+ "
16
+ if result.is_a?(Array)
17
+ # Get the value from our ancestor if there is one.
18
+ if self.class.superclass.respond_to? :#{sym}
19
+ result << self.class.superclass.#{sym}
20
+ end
21
+ result.flatten!
22
+ end
23
+ if result.is_a?(Hash)
24
+ # Get the value from our ancestor if there is one.
25
+ if self.class.superclass.respond_to? :#{sym}
26
+ result = self.class.superclass.#{sym}.merge(result)
27
+ end
28
+ end
29
+ " if options[:inheritable]
30
+ }
31
+ result
32
+ end
33
+ END
34
+ end
35
+
36
+ def sattr_writer (sym)
37
+ class_eval %Q{
38
+ def self.#{sym}= (value)
39
+ @#{sym} = value
40
+ end
41
+
42
+ def #{sym}= (value)
43
+ self.class.#{sym} = value
44
+ end
45
+ }
46
+ end
47
+
48
+ def sattr_accessor (sym, options = {})
49
+ sattr_reader(sym, options)
50
+ sattr_writer(sym)
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,9 @@
1
+ module DocWrapper
2
+ class StringPropertyDefinition < InnerHtmlPropertyDefinition
3
+ def transform (result)
4
+ result = options[:parser].call(result) if !result.blank? && options[:parser]
5
+ result = block.call(result) if block
6
+ result
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ module DocWrapper
2
+ class TimePropertyDefinition < InnerHtmlPropertyDefinition
3
+ def transform (result)
4
+ if block
5
+ result = block.call(result)
6
+ else
7
+ result = result.blank? ? nil : (options[:parser] ? options[:parser].call(result) : Time.parse(result))
8
+ end
9
+ result
10
+ end
11
+ end
12
+ end
@@ -1,3 +1,3 @@
1
1
  module DocWrapper
2
- VERSION = "0.0.1"
2
+ VERSION = "0.9.0"
3
3
  end
@@ -0,0 +1,171 @@
1
+ require 'spec_helper'
2
+
3
+
4
+
5
+ describe DocWrapper do
6
+
7
+ describe "The wrapped test documents" do
8
+ let(:doc1) do
9
+ Nokogiri::HTML(read_fixture_file("doc_wrapper_test.html"))
10
+ end
11
+
12
+ let(:doc2) do
13
+ Nokogiri::HTML(read_fixture_file("doc_wrapper_test_2.html"))
14
+ end
15
+
16
+ let(:document) do
17
+ TestDocWrapper.new([ doc1, doc2 ])
18
+ end
19
+
20
+ describe "#properties" do
21
+ let(:document_properties) do
22
+ document.properties
23
+ end
24
+
25
+ it { document_properties[:paragraph].should == 'A paragraph' }
26
+ it "should strip HTML &nbsp; from string properties" do
27
+ pending
28
+ document_properties[:space_example].should == 'Space Example'
29
+ end
30
+ it { document_properties[:active].should be_true }
31
+ it { document_properties[:date].should == Date.parse('12-Dec-2009') }
32
+ it { document_properties[:date_with_block].should == 'result from block' }
33
+ it { document_properties[:time].should == Time.parse('12-Dec-2009 12:31 PM') }
34
+ it { document_properties[:parsed_date].should == Date.parse('12-Dec-2009') }
35
+ it { document_properties[:parsed_time].should == Time.parse('12-Dec-2009 12:31 PM') }
36
+ it { document_properties[:table_data].should == 'A table data' }
37
+ it { document_properties[:paragraph2].should == 'A second paragraph' }
38
+ it { document_properties[:combined_tds].should == 'A table data A table data' }
39
+ it { document_properties[:combined_date_and_time].should == Time.parse('12-Dec-2009 12:31 PM') }
40
+ it { document_properties[:arrayed_xpath].should == 'name1 position1 salary1' }
41
+ it { document_properties.size.should == 14 }
42
+ end
43
+
44
+ it { document.should respond_to(:paragraph) }
45
+ it { document.should respond_to(:table_data) }
46
+ it { document.should respond_to(:date) }
47
+ it { document.should respond_to(:parsed_date) }
48
+ it { document.should respond_to(:combined_tds) }
49
+ it { document.should respond_to(:combined_date_and_time) }
50
+ it { document.should respond_to(:arrayed_xpath) }
51
+
52
+ it { document.paragraph.should == 'A paragraph' }
53
+ it { document.paragraph2.should == 'A second paragraph' }
54
+ it { document.table_data.should == 'A table data' }
55
+ it { document.date.should == Date.parse('12-Dec-2009') }
56
+ it { document.parsed_date.should == Date.parse('12-Dec-2009') }
57
+ it { document.date_with_block.should == 'result from block' }
58
+ it { document.active.should == true }
59
+ it { document.time.should == Time.parse('12-Dec-2009 12:31 PM') }
60
+ it "should strip HTML &nbsp; from string properties" do
61
+ pending
62
+ document.space_example.should == 'Space Example'
63
+ end
64
+ it { document.combined_tds.should == 'A table data A table data' }
65
+ it { document.combined_date_and_time.should == Time.parse('12-Dec-2009 12:31 PM') }
66
+ it { document.arrayed_xpath.should == 'name1 position1 salary1' }
67
+ it { document.raw_property.should == 'raw' }
68
+ it { document.line_items.size.should == 4 }
69
+
70
+ describe "paragraph_wrapper" do
71
+ let(:paragraph_wrapper) do
72
+ document.paragraph_wrapper
73
+ end
74
+
75
+ it { paragraph_wrapper.paragraph.should == 'A paragraph'}
76
+ end
77
+
78
+ describe "line item 0" do
79
+ let(:line_items) do
80
+ document.line_items[0]
81
+ end
82
+
83
+ it { line_items.name.should == 'name1' }
84
+ it { line_items.position.should == 'position1' }
85
+ it { line_items.salary.should == 'salary1' }
86
+ end
87
+
88
+ describe "line item 1" do
89
+ let(:line_items) do
90
+ document.line_items[1]
91
+ end
92
+
93
+ it { line_items.name.should == 'name2' }
94
+ it { line_items.position.should == 'position2' }
95
+ it { line_items.salary.should == 'salary2' }
96
+ end
97
+
98
+ describe "has_one person" do
99
+ let(:person) do
100
+ document.person
101
+ end
102
+
103
+ it { person.name.should == 'Mark Menard' }
104
+ it { person.home_town.should == 'Troy, NY' }
105
+ end
106
+
107
+ end
108
+ end
109
+
110
+ class TestDocLineItem
111
+ include DocWrapper::Base
112
+ include DocWrapper::Properties
113
+
114
+ property :name, :string, "./td[1]"
115
+ property :position, :string, "./td[2]"
116
+ property :salary, :string, "./td[3]"
117
+
118
+ end
119
+
120
+ class PersonWrapper
121
+ include DocWrapper::Properties
122
+ include DocWrapper::Base
123
+
124
+ property :name, :string, "./p[1]"
125
+ property :home_town, :string, "./p[2]"
126
+ end
127
+
128
+ class ParagraphWrapper
129
+ include DocWrapper::Properties
130
+ include DocWrapper::Base
131
+
132
+ property :paragraph, :string, '.'
133
+ end
134
+
135
+ class TestDocWrapper
136
+ include DocWrapper::Properties
137
+ include DocWrapper::Base
138
+
139
+ property :paragraph, :string, "/html/body/p[1]"
140
+ has_one :paragraph_wrapper, "/html/body/p[1]", ParagraphWrapper
141
+ property :space_example, :string, "/html/body/p[5]" do |x|
142
+ x.strip
143
+ end
144
+ property :active, :boolean, "/html/body/p[2]", :parser => lambda { |x| x == 'Yes' }
145
+ property :date, :date, "/html/body/p[3]"
146
+ property :parsed_date, :date, "/html/body/p[3]", :parser => lambda { |x| Date.parse(x) }
147
+ property :date_with_block, :date, "/html/body/p[3]" do |x|
148
+ "result from block"
149
+ end
150
+ property :time, :time, "/html/body/p[4]"
151
+ property :parsed_time, :time, "/html/body/p[4]", :parser => lambda { |x| Time.parse(x) }
152
+ property :table_data, :string, "/html/body/table[1]/tr/td[1]"
153
+ property :paragraph2, :string, "/html/body/p[1]", :document => 2
154
+ has_many :line_items, "/html/body/table[2]/tr", TestDocLineItem, :start_row => 1, :end_row => 4
155
+ multi_property :combined_tds, ["/html/body/table[1]/tr/td[1]", "/html/body/table[1]/tr/td[1]"] do |elements|
156
+ elements.join(" ")
157
+ end
158
+ multi_property :combined_date_and_time, ["/html/body/p[6]", "/html/body/p[7]"] do |elements|
159
+ Time.parse(elements.join(" "))
160
+ end
161
+
162
+ # Example of a multi_property that uses an XPath that returns an array of elements.
163
+ multi_property :arrayed_xpath, ["/html/body/table[2]/tr[2]/td"] do |elements|
164
+ elements.join(" ")
165
+ end
166
+ property :raw_property, :raw, "/html/body/p[8]" do |ns|
167
+ ns[0].attribute("class").inner_html
168
+ end
169
+
170
+ has_one :person, "/html/body/div", PersonWrapper
171
+ end
@@ -0,0 +1,29 @@
1
+ <html>
2
+ <body>
3
+ <p>A paragraph</p><!-- paragraph -->
4
+ <p>Yes</p><!-- active -->
5
+ <p>12-Dec-2009</p><!-- date -->
6
+ <p>12-Dec-2009 12:31 PM</p><!-- date and time -->
7
+ <p> &nbsp; Space Example&nbsp; &nbsp;</p>
8
+ <table>
9
+ <tr>
10
+ <td>A table data</td>
11
+ </tr>
12
+ </table>
13
+ <table class="has_many">
14
+ <tr><th colspan="3">header</th></tr>
15
+ <tr><td>name1</td><td>position1</td><td>salary1</td></tr>
16
+ <tr><td>name2</td><td>position2</td><td>salary2</td></tr>
17
+ <tr><td>name3</td><td>position3</td><td>salary3</td></tr>
18
+ <tr><td>name4</td><td>position4</td><td>salary4</td></tr>
19
+ <tr><th colspan="3">footer</th></tr>
20
+ </table>
21
+ <p>12-Dec-2009</p><!-- date -->
22
+ <p>12:31 PM</p><!-- time -->
23
+ <p class="raw">Raw paragraph.</p>
24
+ <div>
25
+ <p class="name">Mark Menard</p>
26
+ <p class="home_town">Troy, NY</p>
27
+ </div>
28
+ </body>
29
+ </html>
@@ -0,0 +1,5 @@
1
+ <html>
2
+ <body>
3
+ <p>A second paragraph</p>
4
+ </body>
5
+ </html>
@@ -0,0 +1,6 @@
1
+ require File.expand_path("../../lib/doc_wrapper", __FILE__)
2
+ require 'nokogiri'
3
+
4
+ def read_fixture_file (file_name)
5
+ File.open(File.join(File.dirname(__FILE__), "/fixtures/#{file_name}")).read
6
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_wrapper
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 59
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
+ - 9
8
9
  - 0
9
- - 1
10
- version: 0.0.1
10
+ version: 0.9.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Mark Menard
@@ -106,10 +106,27 @@ extra_rdoc_files: []
106
106
  files:
107
107
  - .gitignore
108
108
  - Gemfile
109
+ - Gemfile.lock
109
110
  - Rakefile
110
111
  - doc_wrapper.gemspec
111
112
  - lib/doc_wrapper.rb
113
+ - lib/doc_wrapper/base.rb
114
+ - lib/doc_wrapper/base_class_methods.rb
115
+ - lib/doc_wrapper/base_property_definition.rb
116
+ - lib/doc_wrapper/boolean_property_definition.rb
117
+ - lib/doc_wrapper/date_property_definition.rb
118
+ - lib/doc_wrapper/inner_html_property_definition.rb
119
+ - lib/doc_wrapper/multi_property_definition.rb
120
+ - lib/doc_wrapper/properties.rb
121
+ - lib/doc_wrapper/raw_property_definition.rb
122
+ - lib/doc_wrapper/sattr_accessor.rb
123
+ - lib/doc_wrapper/string_property_definition.rb
124
+ - lib/doc_wrapper/time_property_definition.rb
112
125
  - lib/doc_wrapper/version.rb
126
+ - spec/doc_wrapper_spec.rb
127
+ - spec/fixtures/doc_wrapper_test.html
128
+ - spec/fixtures/doc_wrapper_test_2.html
129
+ - spec/spec_helper.rb
113
130
  has_rdoc: true
114
131
  homepage: http://rubygems.org/gems/doc_wrapper
115
132
  licenses: []