maiha-css_parser 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. data/README +31 -0
  2. data/Rakefile +1 -1
  3. data/lib/css_parser.rb +66 -11
  4. data/spec/css_parser_spec.rb +27 -0
  5. metadata +4 -2
data/README CHANGED
@@ -16,4 +16,35 @@ Example
16
16
  User.new parser.attributes
17
17
 
18
18
 
19
+ Formatter
20
+ =========
21
+
22
+ "css" accepts :as option to specify output format.
23
+ Let's consider following html.
24
+
25
+ <span class=name>
26
+ Maiha
27
+ <span class=nick>(maiha)</span>
28
+ </span>
29
+
30
+ When the case of :html formatter, this is in default.
31
+
32
+ class UserParser < CssParser
33
+ css :name, "span.name"
34
+ end
35
+
36
+ UserParser.new(html).name
37
+ # => "Maiha\n<span class=\"nick\">(maiha)</span>"
38
+
39
+ When the case of :text formatter.
40
+
41
+ class UserParser < CssParser
42
+ css :name, "span.name", :as=>:text
43
+ end
44
+
45
+ UserParser.new(html).name
46
+ # => "Maiha\n (maiha)"
47
+
48
+
49
+
19
50
  Copyright (c) 2008 maiha@wota.jp, released under the MIT license
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
6
6
  EMAIL = "maiha@wota.jp"
7
7
  HOMEPAGE = "http://github.com/maiha/css_parser"
8
8
  SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
9
- GEM_VERSION = "0.1.2"
9
+ GEM_VERSION = "0.2.0"
10
10
 
11
11
  spec = Gem::Specification.new do |s|
12
12
  # s.rubyforge_project = 'merb'
@@ -1,6 +1,8 @@
1
1
  require 'rubygems'
2
2
  require 'dsl_accessor'
3
3
  require 'hpricot'
4
+ require 'nkf'
5
+ require 'pathname'
4
6
 
5
7
  class CssParser
6
8
  dsl_accessor :stored_css, proc{{}}
@@ -8,7 +10,56 @@ class CssParser
8
10
  ######################################################################
9
11
  ### Exceptions
10
12
 
11
- class ReservedCss < StandardError; end
13
+ class ReservedCss < StandardError; end
14
+ class InvalidFormat < StandardError; end
15
+
16
+ ######################################################################
17
+ ### StoredCss
18
+
19
+ StoredCss = Struct.new(:key, :pattern, :options)
20
+ class StoredCss
21
+ def formatter
22
+ Formatter.guess(options[:as])
23
+ end
24
+ end
25
+
26
+ ######################################################################
27
+ ### Formatters
28
+
29
+ module Formatter
30
+ def self.guess(type)
31
+ name = type.to_s.capitalize
32
+ if name.empty?
33
+ Base
34
+ else
35
+ Formatter.const_get(name)
36
+ end
37
+ rescue
38
+ raise InvalidFormat, type.inspect
39
+ end
40
+
41
+ class Base
42
+ def initialize(element)
43
+ @element = element
44
+ end
45
+
46
+ def execute
47
+ raise NotImplementedError, "subclass responsibility"
48
+ end
49
+ end
50
+
51
+ class Html < Base
52
+ def execute
53
+ @element.inner_html
54
+ end
55
+ end
56
+
57
+ class Text < Base
58
+ def execute
59
+ @element.inner_text
60
+ end
61
+ end
62
+ end
12
63
 
13
64
  ######################################################################
14
65
  ### InstanceMethods
@@ -24,7 +75,7 @@ class CssParser
24
75
 
25
76
  def attributes(keys = nil)
26
77
  keys ||= self.class.my_stored_css.keys
27
- keys.inject({}){|h,key| h[key] = send(key); h}
78
+ keys.inject({}){|h,key| h[key] = __send__(key); h}
28
79
  end
29
80
 
30
81
  ######################################################################
@@ -35,10 +86,11 @@ class CssParser
35
86
  new(html, file)
36
87
  end
37
88
 
38
- def self.css(key, pattern)
89
+ def self.css(key, pattern, options = {})
39
90
  key = key.to_s.intern
91
+ options[:as] ||= :html
40
92
  guard_from_overridden(key)
41
- define_css(key, pattern)
93
+ define_css(key, pattern, options)
42
94
  end
43
95
 
44
96
  private
@@ -51,26 +103,29 @@ class CssParser
51
103
  @my_stored_css ||= (stored_css.dup rescue stored_css)
52
104
  end
53
105
 
54
- def self.define_css(key, pattern)
55
- # not defined yet
106
+ def self.define_css(key, pattern, options)
107
+ stored = StoredCss.new(key, pattern, options)
108
+
109
+ # when the instance method is not defined yet
56
110
  unless instance_methods.include?(key.to_s)
57
111
  css_module.module_eval do
58
112
  define_method(key) do
59
- pattern = self.class.my_stored_css[key]
60
- element = parser.search(pattern).first
61
- element ? element.inner_html : nil
113
+ stored = self.class.my_stored_css[key]
114
+ element = parser.search(stored.pattern).first
115
+ element ? stored.formatter.new(element).execute : nil
62
116
  end
63
117
  end
64
118
  end
65
119
 
66
- my_stored_css[key] = pattern
120
+ # update stored
121
+ my_stored_css[key] = stored
67
122
  end
68
123
 
69
124
  def self.guard_from_overridden(key)
70
125
  return if my_stored_css.has_key?(key)
71
126
 
72
127
  if instance_methods(true).include?(key.to_s)
73
- raise ReservedCss, "#{key} is reserved for #{self.to_s.classify}##{key}"
128
+ raise ReservedCss, "#{key} is reserved for #{self.to_s}##{key}"
74
129
  end
75
130
  if %w( attributes parser ).include?(key.to_s)
76
131
  raise ReservedCss, "#{key} is reserved for CssParser module"
@@ -60,6 +60,33 @@ describe CssParser do
60
60
  foo.name.should == "maiha"
61
61
  end
62
62
 
63
+ it "should return inner html in default" do
64
+ class CssParser
65
+ css :foo, "div"
66
+ end
67
+
68
+ foo = CssParser.new('<div><span>a</span></div>')
69
+ foo.foo.should == "<span>a</span>"
70
+ end
71
+
72
+ it "should return inner html with :html format" do
73
+ class CssParser
74
+ css :foo, "div", :as=>:html
75
+ end
76
+
77
+ foo = CssParser.new('<div><span>a</span></div>')
78
+ foo.foo.should == "<span>a</span>"
79
+ end
80
+
81
+ it "should return inner text with :text format" do
82
+ class CssParser
83
+ css :foo, "div", :as=>:text
84
+ end
85
+
86
+ foo = CssParser.new('<div><span>a</span></div>')
87
+ foo.foo.should == "a"
88
+ end
89
+
63
90
  it "should define instance method as module" do
64
91
  class CssParser
65
92
  css :foo, "div"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: maiha-css_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - maiha
@@ -9,11 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-24 00:00:00 -08:00
12
+ date: 2009-04-07 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: hpricot
17
+ type: :runtime
17
18
  version_requirement:
18
19
  version_requirements: !ruby/object:Gem::Requirement
19
20
  requirements:
@@ -23,6 +24,7 @@ dependencies:
23
24
  version:
24
25
  - !ruby/object:Gem::Dependency
25
26
  name: maiha-dsl_accessor
27
+ type: :runtime
26
28
  version_requirement:
27
29
  version_requirements: !ruby/object:Gem::Requirement
28
30
  requirements: