maiha-css_parser 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +19 -0
- data/Rakefile +52 -0
- data/lib/css_parser.rb +79 -0
- data/spec/css_parser_spec.rb +96 -0
- data/spec/spec_helper.rb +6 -0
- metadata +78 -0
data/README
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
css_parser
|
2
|
+
==========
|
3
|
+
|
4
|
+
hpricot helper that scrapes html easily by parser class defined css selector
|
5
|
+
|
6
|
+
|
7
|
+
Example
|
8
|
+
=======
|
9
|
+
|
10
|
+
class UserParser < CssParser
|
11
|
+
css :name, "div#contents span.name"
|
12
|
+
css :age , "div#contents span.age"
|
13
|
+
end
|
14
|
+
|
15
|
+
parser = UserParser.file('user.html')
|
16
|
+
User.new parser.attributes
|
17
|
+
|
18
|
+
|
19
|
+
Copyright (c) 2008 maiha@wota.jp, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/gempackagetask'
|
3
|
+
|
4
|
+
GEM_NAME = "css_parser"
|
5
|
+
AUTHOR = "maiha"
|
6
|
+
EMAIL = "maiha@wota.jp"
|
7
|
+
HOMEPAGE = "http://github.com/maiha/css_parser"
|
8
|
+
SUMMARY = "hpricot helper that scrapes html easily by parser class defined css selector"
|
9
|
+
GEM_VERSION = "0.1"
|
10
|
+
|
11
|
+
spec = Gem::Specification.new do |s|
|
12
|
+
# s.rubyforge_project = 'merb'
|
13
|
+
s.name = GEM_NAME
|
14
|
+
s.version = GEM_VERSION
|
15
|
+
s.platform = Gem::Platform::RUBY
|
16
|
+
s.has_rdoc = true
|
17
|
+
s.extra_rdoc_files = ["README", "LICENSE", 'TODO']
|
18
|
+
s.summary = SUMMARY
|
19
|
+
s.description = s.summary
|
20
|
+
s.author = AUTHOR
|
21
|
+
s.email = EMAIL
|
22
|
+
s.homepage = HOMEPAGE
|
23
|
+
s.add_dependency('hpricot', '>= 0.1')
|
24
|
+
s.add_dependency('dsl_accessor', '>= 0.1')
|
25
|
+
s.require_path = 'lib'
|
26
|
+
s.files = %w(LICENSE README Rakefile TODO) + Dir.glob("{lib,spec,app,public,stubs}/**/*")
|
27
|
+
end
|
28
|
+
|
29
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
30
|
+
pkg.gem_spec = spec
|
31
|
+
end
|
32
|
+
|
33
|
+
desc "Install the gem"
|
34
|
+
task :install do
|
35
|
+
Merb::RakeHelper.install(GEM_NAME, :version => GEM_VERSION)
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "Uninstall the gem"
|
39
|
+
task :uninstall do
|
40
|
+
Merb::RakeHelper.uninstall(GEM_NAME, :version => GEM_VERSION)
|
41
|
+
end
|
42
|
+
|
43
|
+
desc "Create a gemspec file"
|
44
|
+
task :gemspec do
|
45
|
+
File.open("#{GEM_NAME}.gemspec", "w") do |file|
|
46
|
+
file.puts spec.to_ruby
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
require 'spec/rake/spectask'
|
51
|
+
desc 'Default: run spec examples'
|
52
|
+
task :default => 'spec'
|
data/lib/css_parser.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'dsl_accessor'
|
3
|
+
require 'hpricot'
|
4
|
+
|
5
|
+
class CssParser
|
6
|
+
dsl_accessor :stored_css, proc{{}}
|
7
|
+
|
8
|
+
######################################################################
|
9
|
+
### Exceptions
|
10
|
+
|
11
|
+
class ReservedCss < StandardError; end
|
12
|
+
|
13
|
+
######################################################################
|
14
|
+
### InstanceMethods
|
15
|
+
|
16
|
+
def initialize(html = nil, filename = nil)
|
17
|
+
@html = html.to_s
|
18
|
+
@filename = filename
|
19
|
+
end
|
20
|
+
|
21
|
+
def parser
|
22
|
+
@parser ||= Hpricot(@html)
|
23
|
+
end
|
24
|
+
|
25
|
+
def attributes(keys = nil)
|
26
|
+
keys ||= self.class.my_stored_css.keys
|
27
|
+
keys.inject({}){|h,key| h[key] = send(key); h}
|
28
|
+
end
|
29
|
+
|
30
|
+
######################################################################
|
31
|
+
### Class Methods
|
32
|
+
|
33
|
+
def self.file(file)
|
34
|
+
html = NKF.nkf('-w', Pathname(file).read)
|
35
|
+
new(html, file)
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.css(key, pattern)
|
39
|
+
key = key.to_s.intern
|
40
|
+
guard_from_overridden(key)
|
41
|
+
define_css(key, pattern)
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def self.css_module
|
46
|
+
@css_module ||= (include (mod = Module.new); mod)
|
47
|
+
end
|
48
|
+
|
49
|
+
# stored_css object for this class
|
50
|
+
def self.my_stored_css
|
51
|
+
@my_stored_css ||= (stored_css.dup rescue stored_css)
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.define_css(key, pattern)
|
55
|
+
# not defined yet
|
56
|
+
unless instance_methods.include?(key.to_s)
|
57
|
+
css_module.module_eval do
|
58
|
+
define_method(key) do
|
59
|
+
pattern = self.class.my_stored_css[key]
|
60
|
+
element = parser.search(pattern).first
|
61
|
+
element ? element.inner_html : nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
my_stored_css[key] = pattern
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.guard_from_overridden(key)
|
70
|
+
return if my_stored_css.has_key?(key)
|
71
|
+
|
72
|
+
if instance_methods(true).include?(key.to_s)
|
73
|
+
raise ReservedCss, "#{key} is reserved for #{self.to_s.classify}##{key}"
|
74
|
+
end
|
75
|
+
if %w( attributes parser ).include?(key.to_s)
|
76
|
+
raise ReservedCss, "#{key} is reserved for CssParser module"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "spec_helper" )
|
2
|
+
|
3
|
+
describe CssParser do
|
4
|
+
it "should provide .file" do
|
5
|
+
CssParser.should respond_to(:file)
|
6
|
+
end
|
7
|
+
|
8
|
+
describe ".file" do
|
9
|
+
it "should return a CssParser" do
|
10
|
+
CssParser.file(__FILE__).class.should == CssParser
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should provide .css" do
|
15
|
+
CssParser.should respond_to(:css)
|
16
|
+
end
|
17
|
+
|
18
|
+
describe ".css" do
|
19
|
+
it "should create an instance-level accessor to the argument" do
|
20
|
+
lambda {CssParser.foo2 }.should raise_error(NoMethodError)
|
21
|
+
lambda {CssParser.new.foo2}.should raise_error(NoMethodError)
|
22
|
+
CssParser.css :foo2, "pattern"
|
23
|
+
lambda {CssParser.foo2 }.should raise_error(NoMethodError)
|
24
|
+
lambda {CssParser.new.foo2}.should_not raise_error(NoMethodError)
|
25
|
+
end
|
26
|
+
|
27
|
+
describe " should raise" do
|
28
|
+
it "when an existing instance method is specified" do
|
29
|
+
lambda {
|
30
|
+
CssParser.css :send, "pattern"
|
31
|
+
}.should raise_error(CssParser::ReservedCss)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "when reserved methods are specified" do
|
35
|
+
lambda {
|
36
|
+
CssParser.css :attributes, "pattern"
|
37
|
+
}.should raise_error(CssParser::ReservedCss)
|
38
|
+
lambda {
|
39
|
+
CssParser.css :parser, "pattern"
|
40
|
+
}.should raise_error(CssParser::ReservedCss)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should parse" do
|
46
|
+
class CssParser
|
47
|
+
css :foo, "div"
|
48
|
+
end
|
49
|
+
|
50
|
+
foo = CssParser.new('<div>maiha</div>')
|
51
|
+
foo.foo.should == "maiha"
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should respect css selector" do
|
55
|
+
class Foo < CssParser
|
56
|
+
css :name, "div.name"
|
57
|
+
end
|
58
|
+
|
59
|
+
foo = Foo.new('<div>xxx</div><div class=name>maiha</div>')
|
60
|
+
foo.name.should == "maiha"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should define instance method as module" do
|
64
|
+
class CssParser
|
65
|
+
css :foo, "div"
|
66
|
+
|
67
|
+
def foo
|
68
|
+
"[#{super}]"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
foo = CssParser.new('<div>a</div>')
|
73
|
+
foo.foo.should == "[a]"
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should provide #parser" do
|
78
|
+
CssParser.new.should respond_to(:parser)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should provide #attributes" do
|
82
|
+
CssParser.new.should respond_to(:attributes)
|
83
|
+
end
|
84
|
+
|
85
|
+
describe "#attributes" do
|
86
|
+
it "should return composed hash" do
|
87
|
+
class Foo < CssParser
|
88
|
+
css :name, "#name"
|
89
|
+
css :age , "#age"
|
90
|
+
end
|
91
|
+
|
92
|
+
Foo.new('').attributes.should == {:age=>nil, :name=>nil}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: maiha-css_parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.1"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- maiha
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-01-23 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "0.1"
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: dsl_accessor
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0.1"
|
32
|
+
version:
|
33
|
+
description: hpricot helper that scrapes html easily by parser class defined css selector
|
34
|
+
email: maiha@wota.jp
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README
|
41
|
+
- LICENSE
|
42
|
+
- TODO
|
43
|
+
files:
|
44
|
+
- LICENSE
|
45
|
+
- README
|
46
|
+
- Rakefile
|
47
|
+
- TODO
|
48
|
+
- lib/css_parser.rb
|
49
|
+
- spec/spec_helper.rb
|
50
|
+
- spec/css_parser_spec.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage: http://github.com/maiha/css_parser
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.2.0
|
74
|
+
signing_key:
|
75
|
+
specification_version: 2
|
76
|
+
summary: hpricot helper that scrapes html easily by parser class defined css selector
|
77
|
+
test_files: []
|
78
|
+
|