maiha-css_parser 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. data/README +31 -0
  2. data/Rakefile +1 -1
  3. data/lib/css_parser.rb +66 -11
  4. data/spec/css_parser_spec.rb +27 -0
  5. metadata +4 -2
data/README CHANGED
@@ -16,4 +16,35 @@ Example
16
16
  User.new parser.attributes
17
17
 
18
18
 
19
+ Formatter
20
+ =========
21
+
22
+ "css" accepts :as option to specify output format.
23
+ Let's consider following html.
24
+
25
+ <span class=name>
26
+ Maiha
27
+ <span class=nick>(maiha)</span>
28
+ </span>
29
+
30
+ When the case of :html formatter, this is in default.
31
+
32
+ class UserParser < CssParser
33
+ css :name, "span.name"
34
+ end
35
+
36
+ UserParser.new(html).name
37
+ # => "Maiha\n<span class=\"nick\">(maiha)</span>"
38
+
39
+ When the case of :text formatter.
40
+
41
+ class UserParser < CssParser
42
+ css :name, "span.name", :as=>:text
43
+ end
44
+
45
+ UserParser.new(html).name
46
+ # => "Maiha\n (maiha)"
47
+
48
+
49
+
19
50
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
6
6
  EMAIL = "maiha@wota.jp"
7
7
  HOMEPAGE = "http://github.com/maiha/css_parser"
8
8
  SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
9
- GEM_VERSION = "0.1.2"
9
+ GEM_VERSION = "0.2.0"
10
10
 
11
11
  spec = Gem::Specification.new do |s|
12
12
  # s.rubyforge_project = 'merb'
@@ -1,6 +1,8 @@
1
1
  require 'rubygems'
2
2
  require 'dsl_accessor'
3
3
  require 'hpricot'
4
+ require 'nkf'
5
+ require 'pathname'
4
6
 
5
7
  class CssParser
6
8
  dsl_accessor :stored_css, proc{{}}
@@ -8,7 +10,56 @@ class CssParser
8
10
  ######################################################################
9
11
  ### Exceptions
10
12
 
11
- class ReservedCss < StandardError; end
13
+ class ReservedCss < StandardError; end
14
+ class InvalidFormat < StandardError; end
15
+
16
+ ######################################################################
17
+ ### StoredCss
18
+
19
+ StoredCss = Struct.new(:key, :pattern, :options)
20
+ class StoredCss
21
+ def formatter
22
+ Formatter.guess(options[:as])
23
+ end
24
+ end
25
+
26
+ ######################################################################
27
+ ### Formatters
28
+
29
+ module Formatter
30
+ def self.guess(type)
31
+ name = type.to_s.capitalize
32
+ if name.empty?
33
+ Base
34
+ else
35
+ Formatter.const_get(name)
36
+ end
37
+ rescue
38
+ raise InvalidFormat, type.inspect
39
+ end
40
+
41
+ class Base
42
+ def initialize(element)
43
+ @element = element
44
+ end
45
+
46
+ def execute
47
+ raise NotImplementedError, "subclass responsibility"
48
+ end
49
+ end
50
+
51
+ class Html < Base
52
+ def execute
53
+ @element.inner_html
54
+ end
55
+ end
56
+
57
+ class Text < Base
58
+ def execute
59
+ @element.inner_text
60
+ end
61
+ end
62
+ end
12
63
 
13
64
  ######################################################################
14
65
  ### InstanceMethods
@@ -24,7 +75,7 @@ class CssParser
24
75
 
25
76
  def attributes(keys = nil)
26
77
  keys ||= self.class.my_stored_css.keys
27
- keys.inject({}){|h,key| h[key] = send(key); h}
78
+ keys.inject({}){|h,key| h[key] = __send__(key); h}
28
79
  end
29
80
 
30
81
  ######################################################################
@@ -35,10 +86,11 @@ class CssParser
35
86
  new(html, file)
36
87
  end
37
88
 
38
- def self.css(key, pattern)
89
+ def self.css(key, pattern, options = {})
39
90
  key = key.to_s.intern
91
+ options[:as] ||= :html
40
92
  guard_from_overridden(key)
41
- define_css(key, pattern)
93
+ define_css(key, pattern, options)
42
94
  end
43
95
 
44
96
  private
@@ -51,26 +103,29 @@ class CssParser
51
103
  @my_stored_css ||= (stored_css.dup rescue stored_css)
52
104
  end
53
105
 
54
- def self.define_css(key, pattern)
55
- # not defined yet
106
+ def self.define_css(key, pattern, options)
107
+ stored = StoredCss.new(key, pattern, options)
108
+
109
+ # when the instance method is not defined yet
56
110
  unless instance_methods.include?(key.to_s)
57
111
  css_module.module_eval do
58
112
  define_method(key) do
59
- pattern = self.class.my_stored_css[key]
60
- element = parser.search(pattern).first
61
- element ? element.inner_html : nil
113
+ stored = self.class.my_stored_css[key]
114
+ element = parser.search(stored.pattern).first
115
+ element ? stored.formatter.new(element).execute : nil
62
116
  end
63
117
  end
64
118
  end
65
119
 
66
- my_stored_css[key] = pattern
120
+ # update stored
121
+ my_stored_css[key] = stored
67
122
  end
68
123
 
69
124
  def self.guard_from_overridden(key)
70
125
  return if my_stored_css.has_key?(key)
71
126
 
72
127
  if instance_methods(true).include?(key.to_s)
73
- raise ReservedCss, "#{key} is reserved for #{self.to_s.classify}##{key}"
128
+ raise ReservedCss, "#{key} is reserved for #{self.to_s}##{key}"
74
129
  end
75
130
  if %w( attributes parser ).include?(key.to_s)
76
131
  raise ReservedCss, "#{key} is reserved for CssParser module"
@@ -60,6 +60,33 @@ describe CssParser do
60
60
  foo.name.should == "maiha"
61
61
  end
62
62
 
63
+ it "should return inner html in default" do
64
+ class CssParser
65
+ css :foo, "div"
66
+ end
67
+
68
+ foo = CssParser.new('<div><span>a</span></div>')
69
+ foo.foo.should == "<span>a</span>"
70
+ end
71
+
72
+ it "should return inner html with :html format" do
73
+ class CssParser
74
+ css :foo, "div", :as=>:html
75
+ end
76
+
77
+ foo = CssParser.new('<div><span>a</span></div>')
78
+ foo.foo.should == "<span>a</span>"
79
+ end
80
+
81
+ it "should return inner text with :text format" do
82
+ class CssParser
83
+ css :foo, "div", :as=>:text
84
+ end
85
+
86
+ foo = CssParser.new('<div><span>a</span></div>')
87
+ foo.foo.should == "a"
88
+ end
89
+
63
90
  it "should define instance method as module" do
64
91
  class CssParser
65
92
  css :foo, "div"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-css_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,11 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-24 00:00:00 -08:00
12
+ date: 2009-04-07 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hpricot
17
+ type: :runtime
17
18
  version_requirement:
18
19
  version_requirements: !ruby/object:Gem::Requirement
19
20
  requirements:
@@ -23,6 +24,7 @@ dependencies:
23
24
  version:
24
25
  - !ruby/object:Gem::Dependency
25
26
  name: maiha-dsl_accessor
27
+ type: :runtime
26
28
  version_requirement:
27
29
  version_requirements: !ruby/object:Gem::Requirement
28
30
  requirements: