maiha-css_parser 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +31 -0
- data/Rakefile +1 -1
- data/lib/css_parser.rb +66 -11
- data/spec/css_parser_spec.rb +27 -0
- metadata +4 -2
data/README
CHANGED
@@ -16,4 +16,35 @@ Example
|
|
16
16
|
User.new parser.attributes
|
17
17
|
|
18
18
|
|
19
|
+
Formatter
|
20
|
+
=========
|
21
|
+
|
22
|
+
"css" accepts :as option to specify output format.
|
23
|
+
Let's consider following html.
|
24
|
+
|
25
|
+
<span class=name>
|
26
|
+
Maiha
|
27
|
+
<span class=nick>(maiha)</span>
|
28
|
+
</span>
|
29
|
+
|
30
|
+
When the case of :html formatter, this is in default.
|
31
|
+
|
32
|
+
class UserParser < CssParser
|
33
|
+
css :name, "span.name"
|
34
|
+
end
|
35
|
+
|
36
|
+
UserParser.new(html).name
|
37
|
+
# => "Maiha\n<span class=\"nick\">(maiha)</span>"
|
38
|
+
|
39
|
+
When the case of :text formatter.
|
40
|
+
|
41
|
+
class UserParser < CssParser
|
42
|
+
css :name, "span.name", :as=>:text
|
43
|
+
end
|
44
|
+
|
45
|
+
UserParser.new(html).name
|
46
|
+
# => "Maiha\n (maiha)"
|
47
|
+
|
48
|
+
|
49
|
+
|
19
50
|
Copyright (c) 2008 maiha@wota.jp, released under the MIT license
|
data/Rakefile
CHANGED
@@ -6,7 +6,7 @@ AUTHOR = "maiha"
|
|
6
6
|
EMAIL = "maiha@wota.jp"
|
7
7
|
HOMEPAGE = "http://github.com/maiha/css_parser"
|
8
8
|
SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
|
9
|
-
GEM_VERSION = "0.
|
9
|
+
GEM_VERSION = "0.2.0"
|
10
10
|
|
11
11
|
spec = Gem::Specification.new do |s|
|
12
12
|
# s.rubyforge_project = 'merb'
|
data/lib/css_parser.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'dsl_accessor'
|
3
3
|
require 'hpricot'
|
4
|
+
require 'nkf'
|
5
|
+
require 'pathname'
|
4
6
|
|
5
7
|
class CssParser
|
6
8
|
dsl_accessor :stored_css, proc{{}}
|
@@ -8,7 +10,56 @@ class CssParser
|
|
8
10
|
######################################################################
|
9
11
|
### Exceptions
|
10
12
|
|
11
|
-
class ReservedCss
|
13
|
+
class ReservedCss < StandardError; end
|
14
|
+
class InvalidFormat < StandardError; end
|
15
|
+
|
16
|
+
######################################################################
|
17
|
+
### StoredCss
|
18
|
+
|
19
|
+
StoredCss = Struct.new(:key, :pattern, :options)
|
20
|
+
class StoredCss
|
21
|
+
def formatter
|
22
|
+
Formatter.guess(options[:as])
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
######################################################################
|
27
|
+
### Formatters
|
28
|
+
|
29
|
+
module Formatter
|
30
|
+
def self.guess(type)
|
31
|
+
name = type.to_s.capitalize
|
32
|
+
if name.empty?
|
33
|
+
Base
|
34
|
+
else
|
35
|
+
Formatter.const_get(name)
|
36
|
+
end
|
37
|
+
rescue
|
38
|
+
raise InvalidFormat, type.inspect
|
39
|
+
end
|
40
|
+
|
41
|
+
class Base
|
42
|
+
def initialize(element)
|
43
|
+
@element = element
|
44
|
+
end
|
45
|
+
|
46
|
+
def execute
|
47
|
+
raise NotImplementedError, "subclass responsibility"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Html < Base
|
52
|
+
def execute
|
53
|
+
@element.inner_html
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
class Text < Base
|
58
|
+
def execute
|
59
|
+
@element.inner_text
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
12
63
|
|
13
64
|
######################################################################
|
14
65
|
### InstanceMethods
|
@@ -24,7 +75,7 @@ class CssParser
|
|
24
75
|
|
25
76
|
def attributes(keys = nil)
|
26
77
|
keys ||= self.class.my_stored_css.keys
|
27
|
-
keys.inject({}){|h,key| h[key] =
|
78
|
+
keys.inject({}){|h,key| h[key] = __send__(key); h}
|
28
79
|
end
|
29
80
|
|
30
81
|
######################################################################
|
@@ -35,10 +86,11 @@ class CssParser
|
|
35
86
|
new(html, file)
|
36
87
|
end
|
37
88
|
|
38
|
-
def self.css(key, pattern)
|
89
|
+
def self.css(key, pattern, options = {})
|
39
90
|
key = key.to_s.intern
|
91
|
+
options[:as] ||= :html
|
40
92
|
guard_from_overridden(key)
|
41
|
-
define_css(key, pattern)
|
93
|
+
define_css(key, pattern, options)
|
42
94
|
end
|
43
95
|
|
44
96
|
private
|
@@ -51,26 +103,29 @@ class CssParser
|
|
51
103
|
@my_stored_css ||= (stored_css.dup rescue stored_css)
|
52
104
|
end
|
53
105
|
|
54
|
-
def self.define_css(key, pattern)
|
55
|
-
|
106
|
+
def self.define_css(key, pattern, options)
|
107
|
+
stored = StoredCss.new(key, pattern, options)
|
108
|
+
|
109
|
+
# when the instance method is not defined yet
|
56
110
|
unless instance_methods.include?(key.to_s)
|
57
111
|
css_module.module_eval do
|
58
112
|
define_method(key) do
|
59
|
-
|
60
|
-
element = parser.search(pattern).first
|
61
|
-
element ? element.
|
113
|
+
stored = self.class.my_stored_css[key]
|
114
|
+
element = parser.search(stored.pattern).first
|
115
|
+
element ? stored.formatter.new(element).execute : nil
|
62
116
|
end
|
63
117
|
end
|
64
118
|
end
|
65
119
|
|
66
|
-
|
120
|
+
# update stored
|
121
|
+
my_stored_css[key] = stored
|
67
122
|
end
|
68
123
|
|
69
124
|
def self.guard_from_overridden(key)
|
70
125
|
return if my_stored_css.has_key?(key)
|
71
126
|
|
72
127
|
if instance_methods(true).include?(key.to_s)
|
73
|
-
raise ReservedCss, "#{key} is reserved for #{self.to_s
|
128
|
+
raise ReservedCss, "#{key} is reserved for #{self.to_s}##{key}"
|
74
129
|
end
|
75
130
|
if %w( attributes parser ).include?(key.to_s)
|
76
131
|
raise ReservedCss, "#{key} is reserved for CssParser module"
|
data/spec/css_parser_spec.rb
CHANGED
@@ -60,6 +60,33 @@ describe CssParser do
|
|
60
60
|
foo.name.should == "maiha"
|
61
61
|
end
|
62
62
|
|
63
|
+
it "should return inner html in default" do
|
64
|
+
class CssParser
|
65
|
+
css :foo, "div"
|
66
|
+
end
|
67
|
+
|
68
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
69
|
+
foo.foo.should == "<span>a</span>"
|
70
|
+
end
|
71
|
+
|
72
|
+
it "should return inner html with :html format" do
|
73
|
+
class CssParser
|
74
|
+
css :foo, "div", :as=>:html
|
75
|
+
end
|
76
|
+
|
77
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
78
|
+
foo.foo.should == "<span>a</span>"
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should return inner text with :text format" do
|
82
|
+
class CssParser
|
83
|
+
css :foo, "div", :as=>:text
|
84
|
+
end
|
85
|
+
|
86
|
+
foo = CssParser.new('<div><span>a</span></div>')
|
87
|
+
foo.foo.should == "a"
|
88
|
+
end
|
89
|
+
|
63
90
|
it "should define instance method as module" do
|
64
91
|
class CssParser
|
65
92
|
css :foo, "div"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: maiha-css_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- maiha
|
@@ -9,11 +9,12 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-04-07 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: hpricot
|
17
|
+
type: :runtime
|
17
18
|
version_requirement:
|
18
19
|
version_requirements: !ruby/object:Gem::Requirement
|
19
20
|
requirements:
|
@@ -23,6 +24,7 @@ dependencies:
|
|
23
24
|
version:
|
24
25
|
- !ruby/object:Gem::Dependency
|
25
26
|
name: maiha-dsl_accessor
|
27
|
+
type: :runtime
|
26
28
|
version_requirement:
|
27
29
|
version_requirements: !ruby/object:Gem::Requirement
|
28
30
|
requirements:
|