maiha-css_parser 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +31 -0
- data/Rakefile +1 -1
- data/lib/css_parser.rb +66 -11
- data/spec/css_parser_spec.rb +27 -0
- metadata +4 -2
data/README
CHANGED
@@ -16,4 +16,35 @@ Example
|
|
16
16
|
User.new parser.attributes
|
17
17
|
|
18
18
|
|
19
|
+
Formatter
|
20
|
+
=========
|
21
|
+
|
22
|
+
"css" accepts :as option to specify output format.
|
23
|
+
Let's consider following html.
|
24
|
+
|
25
|
+
<span class=name>
|
26
|
+
Maiha
|
27
|
+
<span class=nick>(maiha)</span>
|
28
|
+
</span>
|
29
|
+
|
30
|
+
When the case of :html formatter, this is in default.
|
31
|
+
|
32
|
+
class UserParser < CssParser
|
33
|
+
css :name, "span.name"
|
34
|
+
end
|
35
|
+
|
36
|
+
UserParser.new(html).name
|
37
|
+
# => "Maiha\n<span class=\"nick\">(maiha)</span>"
|
38
|
+
|
39
|
+
When the case of :text formatter.
|
40
|
+
|
41
|
+
class UserParser < CssParser
|
42
|
+
css :name, "span.name", :as=>:text
|
43
|
+
end
|
44
|
+
|
45
|
+
UserParser.new(html).name
|
46
|
+
# => "Maiha\n (maiha)"
|
47
|
+
|
48
|
+
|
49
|
+
|
19
50
|
Copyright (c) 2008 maiha@wota.jp, released under the MIT license
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/css_parser"
|
8
8
|
SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
|
9
|
-
GEM_VERSION = "0.
|
9
|
+
GEM_VERSION = "0.2.0"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
# s.rubyforge_project = 'merb'
|
data/lib/css_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'dsl_accessor'
|
3
3
|
require 'hpricot'
|
4
|
+
require 'nkf'
|
5
|
+
require 'pathname'
|
4
6
|
|
5
7
|
class CssParser
|
6
8
|
dsl_accessor :stored_css, proc{{}}
|
@@ -8,7 +10,56 @@ class CssParser
|
|
8
10
|
######################################################################
|
9
11
|
### Exceptions
|
10
12
|
|
11
|
-
class ReservedCss
|
13
|
+
class ReservedCss < StandardError; end
|
14
|
+
class InvalidFormat < StandardError; end
|
15
|
+
|
16
|
+
######################################################################
|
17
|
+
### StoredCss
|
18
|
+
|
19
|
+
StoredCss = Struct.new(:key, :pattern, :options)
|
20
|
+
class StoredCss
|
21
|
+
def formatter
|
22
|
+
Formatter.guess(options[:as])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
######################################################################
|
27
|
+
### Formatters
|
28
|
+
|
29
|
+
module Formatter
|
30
|
+
def self.guess(type)
|
31
|
+
name = type.to_s.capitalize
|
32
|
+
if name.empty?
|
33
|
+
Base
|
34
|
+
else
|
35
|
+
Formatter.const_get(name)
|
36
|
+
end
|
37
|
+
rescue
|
38
|
+
raise InvalidFormat, type.inspect
|
39
|
+
end
|
40
|
+
|
41
|
+
class Base
|
42
|
+
def initialize(element)
|
43
|
+
@element = element
|
44
|
+
end
|
45
|
+
|
46
|
+
def execute
|
47
|
+
raise NotImplementedError, "subclass responsibility"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Html < Base
|
52
|
+
def execute
|
53
|
+
@element.inner_html
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Text < Base
|
58
|
+
def execute
|
59
|
+
@element.inner_text
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
12
63
|
|
13
64
|
######################################################################
|
14
65
|
### InstanceMethods
|
@@ -24,7 +75,7 @@ class CssParser
|
|
24
75
|
|
25
76
|
def attributes(keys = nil)
|
26
77
|
keys ||= self.class.my_stored_css.keys
|
27
|
-
keys.inject({}){|h,key| h[key] =
|
78
|
+
keys.inject({}){|h,key| h[key] = __send__(key); h}
|
28
79
|
end
|
29
80
|
|
30
81
|
######################################################################
|
@@ -35,10 +86,11 @@ class CssParser
|
|
35
86
|
new(html, file)
|
36
87
|
end
|
37
88
|
|
38
|
-
def self.css(key, pattern)
|
89
|
+
def self.css(key, pattern, options = {})
|
39
90
|
key = key.to_s.intern
|
91
|
+
options[:as] ||= :html
|
40
92
|
guard_from_overridden(key)
|
41
|
-
define_css(key, pattern)
|
93
|
+
define_css(key, pattern, options)
|
42
94
|
end
|
43
95
|
|
44
96
|
private
|
@@ -51,26 +103,29 @@ class CssParser
|
|
51
103
|
@my_stored_css ||= (stored_css.dup rescue stored_css)
|
52
104
|
end
|
53
105
|
|
54
|
-
def self.define_css(key, pattern)
|
55
|
-
|
106
|
+
def self.define_css(key, pattern, options)
|
107
|
+
stored = StoredCss.new(key, pattern, options)
|
108
|
+
|
109
|
+
# when the instance method is not defined yet
|
56
110
|
unless instance_methods.include?(key.to_s)
|
57
111
|
css_module.module_eval do
|
58
112
|
define_method(key) do
|
59
|
-
|
60
|
-
element = parser.search(pattern).first
|
61
|
-
element ? element.
|
113
|
+
stored = self.class.my_stored_css[key]
|
114
|
+
element = parser.search(stored.pattern).first
|
115
|
+
element ? stored.formatter.new(element).execute : nil
|
62
116
|
end
|
63
117
|
end
|
64
118
|
end
|
65
119
|
|
66
|
-
|
120
|
+
# update stored
|
121
|
+
my_stored_css[key] = stored
|
67
122
|
end
|
68
123
|
|
69
124
|
def self.guard_from_overridden(key)
|
70
125
|
return if my_stored_css.has_key?(key)
|
71
126
|
|
72
127
|
if instance_methods(true).include?(key.to_s)
|
73
|
-
raise ReservedCss, "#{key} is reserved for #{self.to_s
|
128
|
+
raise ReservedCss, "#{key} is reserved for #{self.to_s}##{key}"
|
74
129
|
end
|
75
130
|
if %w( attributes parser ).include?(key.to_s)
|
76
131
|
raise ReservedCss, "#{key} is reserved for CssParser module"
|
data/spec/css_parser_spec.rb
CHANGED
@@ -60,6 +60,33 @@ describe CssParser do
|
|
60
60
|
foo.name.should == "maiha"
|
61
61
|
end
|
62
62
|
|
63
|
+
it "should return inner html in default" do
|
64
|
+
class CssParser
|
65
|
+
css :foo, "div"
|
66
|
+
end
|
67
|
+
|
68
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
69
|
+
foo.foo.should == "<span>a</span>"
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should return inner html with :html format" do
|
73
|
+
class CssParser
|
74
|
+
css :foo, "div", :as=>:html
|
75
|
+
end
|
76
|
+
|
77
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
78
|
+
foo.foo.should == "<span>a</span>"
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should return inner text with :text format" do
|
82
|
+
class CssParser
|
83
|
+
css :foo, "div", :as=>:text
|
84
|
+
end
|
85
|
+
|
86
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
87
|
+
foo.foo.should == "a"
|
88
|
+
end
|
89
|
+
|
63
90
|
it "should define instance method as module" do
|
64
91
|
class CssParser
|
65
92
|
css :foo, "div"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maiha-css_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,11 +9,12 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-04-07 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hpricot
|
17
|
+
type: :runtime
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|
@@ -23,6 +24,7 @@ dependencies:
|
|
23
24
|
version:
|
24
25
|
- !ruby/object:Gem::Dependency
|
25
26
|
name: maiha-dsl_accessor
|
27
|
+
type: :runtime
|
26
28
|
version_requirement:
|
27
29
|
version_requirements: !ruby/object:Gem::Requirement
|
28
30
|
requirements:
|